synapse-orch-ai 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/backend/core/cache/__init__.py +13 -0
  2. package/backend/core/cache/prompt_cache.py +166 -0
  3. package/backend/core/cache/response_cache.py +204 -0
  4. package/backend/core/cache/store.py +147 -0
  5. package/backend/core/cache/tool_cache.py +71 -0
  6. package/backend/core/config.py +7 -2
  7. package/backend/core/llm_providers.py +275 -113
  8. package/backend/core/models_orchestration.py +16 -0
  9. package/backend/core/orchestration/context.py +23 -10
  10. package/backend/core/orchestration/logger.py +9 -1
  11. package/backend/core/orchestration/steps.py +10 -0
  12. package/backend/core/react_engine.py +209 -109
  13. package/backend/core/routes/chat.py +9 -1
  14. package/backend/core/routes/orchestrations.py +46 -0
  15. package/backend/core/routes/usage.py +14 -0
  16. package/backend/core/tools.py +109 -53
  17. package/backend/core/usage_tracker.py +214 -14
  18. package/frontend-build/.next/BUILD_ID +1 -1
  19. package/frontend-build/.next/build-manifest.json +3 -3
  20. package/frontend-build/.next/prerender-manifest.json +3 -3
  21. package/frontend-build/.next/server/app/_global-error.html +1 -1
  22. package/frontend-build/.next/server/app/_global-error.rsc +1 -1
  23. package/frontend-build/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +1 -1
  24. package/frontend-build/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  25. package/frontend-build/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  26. package/frontend-build/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  27. package/frontend-build/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  28. package/frontend-build/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  29. package/frontend-build/.next/server/app/_not-found.html +1 -1
  30. package/frontend-build/.next/server/app/_not-found.rsc +2 -2
  31. package/frontend-build/.next/server/app/_not-found.segments/_full.segment.rsc +2 -2
  32. package/frontend-build/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  33. package/frontend-build/.next/server/app/_not-found.segments/_index.segment.rsc +2 -2
  34. package/frontend-build/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  35. package/frontend-build/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  36. package/frontend-build/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
  37. package/frontend-build/.next/server/app/index.html +1 -1
  38. package/frontend-build/.next/server/app/index.rsc +3 -3
  39. package/frontend-build/.next/server/app/index.segments/__PAGE__.segment.rsc +2 -2
  40. package/frontend-build/.next/server/app/index.segments/_full.segment.rsc +3 -3
  41. package/frontend-build/.next/server/app/index.segments/_head.segment.rsc +1 -1
  42. package/frontend-build/.next/server/app/index.segments/_index.segment.rsc +2 -2
  43. package/frontend-build/.next/server/app/index.segments/_tree.segment.rsc +2 -2
  44. package/frontend-build/.next/server/app/login/page_client-reference-manifest.js +1 -1
  45. package/frontend-build/.next/server/app/login.html +1 -1
  46. package/frontend-build/.next/server/app/login.rsc +2 -2
  47. package/frontend-build/.next/server/app/login.segments/_full.segment.rsc +2 -2
  48. package/frontend-build/.next/server/app/login.segments/_head.segment.rsc +1 -1
  49. package/frontend-build/.next/server/app/login.segments/_index.segment.rsc +2 -2
  50. package/frontend-build/.next/server/app/login.segments/_tree.segment.rsc +2 -2
  51. package/frontend-build/.next/server/app/login.segments/login/__PAGE__.segment.rsc +1 -1
  52. package/frontend-build/.next/server/app/login.segments/login.segment.rsc +1 -1
  53. package/frontend-build/.next/server/app/page_client-reference-manifest.js +1 -1
  54. package/frontend-build/.next/server/app/settings/[tab]/page_client-reference-manifest.js +1 -1
  55. package/frontend-build/.next/server/chunks/ssr/_0b~n.nn._.js +15 -15
  56. package/frontend-build/.next/server/chunks/ssr/src_app_page_tsx_0ss2.w7._.js +1 -1
  57. package/frontend-build/.next/server/middleware-build-manifest.js +3 -3
  58. package/frontend-build/.next/server/middleware-manifest.json +5 -5
  59. package/frontend-build/.next/server/pages/404.html +1 -1
  60. package/frontend-build/.next/server/pages/500.html +1 -1
  61. package/frontend-build/.next/server/server-reference-manifest.js +1 -1
  62. package/frontend-build/.next/server/server-reference-manifest.json +1 -1
  63. package/frontend-build/.next/static/chunks/0htooj7jtj_tj.js +1 -0
  64. package/frontend-build/.next/static/chunks/0zsell.3txhrd.css +1 -0
  65. package/frontend-build/.next/static/chunks/{0p_5pvs8njhrd.js → 139v3bz1_4_sh.js} +15 -15
  66. package/package.json +1 -1
  67. package/frontend-build/.next/static/chunks/0m6b8x86zmjhr.js +0 -1
  68. package/frontend-build/.next/static/chunks/0sifx3jp~fn_h.css +0 -1
  69. /package/frontend-build/.next/static/{wMcHp1vhEa3V0OBQS57Hg → cs3gFMScfwOahyeFSLcqL}/_buildManifest.js +0 -0
  70. /package/frontend-build/.next/static/{wMcHp1vhEa3V0OBQS57Hg → cs3gFMScfwOahyeFSLcqL}/_clientMiddlewareManifest.js +0 -0
  71. /package/frontend-build/.next/static/{wMcHp1vhEa3V0OBQS57Hg → cs3gFMScfwOahyeFSLcqL}/_ssgManifest.js +0 -0
@@ -0,0 +1,13 @@
1
+ """
2
+ Caching layer for the orchestration and agent system.
3
+
4
+ Three sub-modules:
5
+ - prompt_cache: provider-payload decorators (Anthropic cache_control, etc.)
6
+ - tool_cache: memoization for deterministic MCP/builder/custom tool results
7
+ - response_cache: exact + semantic cache for LLM responses (skips AGENT steps)
8
+
9
+ All caches are opt-in per step via StepConfig.cache_* and globally via settings.
10
+ """
11
+ from core.cache import prompt_cache, tool_cache, response_cache, store
12
+
13
+ __all__ = ["prompt_cache", "tool_cache", "response_cache", "store"]
@@ -0,0 +1,166 @@
1
+ """
2
+ Provider-payload decorators that turn on prompt caching.
3
+
4
+ Caching pricing is asymmetric and provider-specific:
5
+ - Anthropic: cache writes cost ~1.25x base input; cache reads cost ~0.1x base input.
6
+ - OpenAI: automatic for >=1024-token stable prefixes; reads are ~0.5x base input.
7
+ - DeepSeek: server-side automatic; reads ~0.1x base input. Reported via
8
+ `prompt_cache_hit_tokens` / `prompt_cache_miss_tokens`.
9
+ - Gemini: requires an explicit `cached_content` handle via client.caches.create().
10
+ Reads ~0.25x base input. Minimum TTL: 5 min.
11
+ - Bedrock: Anthropic-on-Bedrock supports `cachePoint` content blocks in Converse.
12
+
13
+ Caching is gated by:
14
+ 1. global settings.prompt_cache_enabled
15
+ 2. system prompt length (Anthropic minimum ~1024 tokens for Sonnet/Opus,
16
+ ~2048 for Haiku; we use a conservative 4000-char floor).
17
+
18
+ When unsure or when a provider isn't supported, these helpers no-op so the
19
+ caller's payload is unchanged.
20
+ """
21
+ from typing import Any
22
+
23
+ # Anthropic charges for cache writes; only worth it when the prefix is meaningful.
24
+ # 4000 chars ≈ 1000 tokens — under Anthropic's minimum, the cache_control marker
25
+ # is silently ignored, so this floor avoids paying for ineligible writes.
26
+ MIN_CACHEABLE_CHARS = 4000
27
+
28
+ # Separator emitted by core.tools.build_system_prompt between the stable section
29
+ # (cacheable) and the volatile section (turn budget, current time, RAG context).
30
+ # Splitting here keeps the cache prefix byte-stable across turns.
31
+ VOLATILE_SEPARATOR = "\n---\n"
32
+
33
+
34
+ def is_cacheable_system(system: str | None) -> bool:
35
+ return bool(system) and len(system) >= MIN_CACHEABLE_CHARS
36
+
37
+
38
+ def split_stable_volatile(system: str | None) -> tuple[str, str]:
39
+ """Return (stable_prefix, volatile_suffix). Empty suffix when no separator."""
40
+ if not system:
41
+ return "", ""
42
+ idx = system.find(VOLATILE_SEPARATOR)
43
+ if idx < 0:
44
+ return system, ""
45
+ return system[:idx], system[idx:]
46
+
47
+
48
+ # ── Anthropic ────────────────────────────────────────────────────────────────
49
+
50
+ def decorate_anthropic_kwargs(kwargs: dict, system: str | None) -> dict:
51
+ """Mutate `kwargs` so the system prompt + tool block become cache breakpoints.
52
+
53
+ Anthropic supports up to 4 cache_control markers per request; we use 2:
54
+ - end of stable section of system prompt (1 marker)
55
+ - end of tools array (1 marker)
56
+
57
+ The system prompt is split on the VOLATILE_SEPARATOR ("\\n---\\n"). The
58
+ stable prefix is marked as cacheable; the volatile suffix (turn budget,
59
+ current time, RAG context) goes into a second uncached text block so
60
+ cache reads stay valid across turns even when those values change.
61
+ """
62
+ if not is_cacheable_system(system):
63
+ return kwargs
64
+
65
+ stable, volatile = split_stable_volatile(str(system))
66
+
67
+ blocks: list[dict] = [{
68
+ "type": "text",
69
+ "text": stable,
70
+ "cache_control": {"type": "ephemeral"},
71
+ }]
72
+ if volatile:
73
+ blocks.append({"type": "text", "text": volatile})
74
+ kwargs["system"] = blocks
75
+
76
+ # Mark the last tool definition so the whole tools array is part of the prefix.
77
+ tools = kwargs.get("tools")
78
+ if isinstance(tools, list) and tools:
79
+ last = dict(tools[-1]) # shallow copy — don't mutate caller's list
80
+ last["cache_control"] = {"type": "ephemeral"}
81
+ kwargs["tools"] = tools[:-1] + [last]
82
+
83
+ return kwargs
84
+
85
+
86
+ def extract_anthropic_cache_tokens(response) -> tuple[int, int]:
87
+ """Return (cache_read_tokens, cache_write_tokens) from an Anthropic SDK response."""
88
+ usage = getattr(response, "usage", None)
89
+ if not usage:
90
+ return 0, 0
91
+ read = getattr(usage, "cache_read_input_tokens", 0) or 0
92
+ write = getattr(usage, "cache_creation_input_tokens", 0) or 0
93
+ return int(read), int(write)
94
+
95
+
96
+ # ── OpenAI / Grok / v1-compatible ────────────────────────────────────────────
97
+
98
+ def extract_openai_cache_tokens(usage: dict) -> tuple[int, int]:
99
+ """Return (cache_read_tokens, cache_write_tokens) from an OpenAI-style usage dict.
100
+
101
+ OpenAI's auto-caching only reports reads (`prompt_tokens_details.cached_tokens`).
102
+ There is no separate write cost — the first call just pays the normal input rate.
103
+ """
104
+ if not isinstance(usage, dict):
105
+ return 0, 0
106
+ details = usage.get("prompt_tokens_details") or {}
107
+ read = int(details.get("cached_tokens") or 0)
108
+ return read, 0
109
+
110
+
111
+ # ── DeepSeek ─────────────────────────────────────────────────────────────────
112
+
113
+ def extract_deepseek_cache_tokens(usage: dict) -> tuple[int, int]:
114
+ """DeepSeek surfaces hit/miss separately."""
115
+ if not isinstance(usage, dict):
116
+ return 0, 0
117
+ hit = int(usage.get("prompt_cache_hit_tokens") or 0)
118
+ # DeepSeek has no explicit write tier — misses are billed at the normal rate.
119
+ return hit, 0
120
+
121
+
122
+ # ── Gemini ───────────────────────────────────────────────────────────────────
123
+
124
+ def extract_gemini_cache_tokens(response) -> tuple[int, int]:
125
+ """Gemini reports cached tokens in usage_metadata.cached_content_token_count."""
126
+ um = getattr(response, "usage_metadata", None)
127
+ if not um:
128
+ return 0, 0
129
+ read = int(getattr(um, "cached_content_token_count", 0) or 0)
130
+ return read, 0
131
+
132
+
133
+ # ── Bedrock ──────────────────────────────────────────────────────────────────
134
+
135
+ def decorate_bedrock_system_blocks(system_blocks: list[dict], system: str | None) -> list[dict]:
136
+ """Append a cachePoint marker after the system text block.
137
+
138
+ Bedrock's Converse API uses `{"cachePoint": {"type": "default"}}` instead
139
+ of inline cache_control. Only supported on a subset of models (Anthropic
140
+ Claude on Bedrock, Nova). Unsupported models silently ignore the marker.
141
+ """
142
+ if not is_cacheable_system(system):
143
+ return system_blocks
144
+ if not system_blocks:
145
+ return system_blocks
146
+ # Append a cachePoint after the existing text blocks.
147
+ return list(system_blocks) + [{"cachePoint": {"type": "default"}}]
148
+
149
+
150
+ def extract_bedrock_cache_tokens(resp: dict) -> tuple[int, int]:
151
+ """Bedrock returns cache metrics under response['usage']."""
152
+ if not isinstance(resp, dict):
153
+ return 0, 0
154
+ usage = resp.get("usage") or {}
155
+ read = int(usage.get("cacheReadInputTokens") or 0)
156
+ write = int(usage.get("cacheWriteInputTokens") or usage.get("cacheCreationInputTokens") or 0)
157
+ return read, write
158
+
159
+
160
+ # ── Helper for callers ───────────────────────────────────────────────────────
161
+
162
+ def cache_enabled(settings: dict | None) -> bool:
163
+ """Honor the global toggle. Defaults to True when the key is missing."""
164
+ if not settings:
165
+ return True
166
+ return bool(settings.get("prompt_cache_enabled", True))
@@ -0,0 +1,204 @@
1
+ """
2
+ LLM response cache — exact-match + optional semantic-match.
3
+
4
+ Exact match: SHA256 of (model, system_prompt, messages, tools_json). O(1) lookup.
5
+ Semantic match: embed the last user message, compare against prior cached entries
6
+ for the same (model, system_prompt) family.
7
+
8
+ By design, this cache is OFF unless a caller explicitly opts in. AGENT steps in
9
+ orchestration must NEVER consult it (their behaviour is state-dependent and the
10
+ shared_state mutations from skipping the LLM call would diverge silently).
11
+ LLM / EVALUATOR / EXTRACT_JSON steps can opt in safely.
12
+ """
13
+ import json
14
+ from typing import Any, Optional
15
+
16
+ from core.cache import store
17
+
18
+ NAMESPACE_EXACT = "responses_exact"
19
+ # Semantic cache is opt-in per step; entries are scoped by step_id to keep
20
+ # behaviour comparable to exact match (similar prompts on the same step only).
21
+ NAMESPACE_SEMANTIC_PREFIX = "responses_semantic_"
22
+
23
+
24
+ def _build_exact_key(
25
+ model: str,
26
+ system: str | None,
27
+ messages: list[dict] | None,
28
+ tools: list[dict] | None,
29
+ ) -> str:
30
+ # Tools are normalised to a stable string — list of function names + their schemas.
31
+ tools_norm: list[dict] = []
32
+ for t in tools or []:
33
+ fn = t.get("function", {}) if isinstance(t, dict) else {}
34
+ tools_norm.append({
35
+ "name": fn.get("name", ""),
36
+ "params": fn.get("parameters", {}),
37
+ })
38
+ return store.make_key(
39
+ "resp",
40
+ model or "",
41
+ system or "",
42
+ messages or [],
43
+ tools_norm,
44
+ )
45
+
46
+
47
+ def get_exact(
48
+ model: str,
49
+ system: str | None,
50
+ messages: list[dict] | None,
51
+ tools: list[dict] | None,
52
+ ) -> Optional[dict]:
53
+ """Return the cached response entry {"text", "input_tokens", "output_tokens"} or None."""
54
+ key = _build_exact_key(model, system, messages, tools)
55
+ entry = store.get(NAMESPACE_EXACT, key)
56
+ if entry is None:
57
+ return None
58
+ return entry.get("value")
59
+
60
+
61
+ def set_exact(
62
+ model: str,
63
+ system: str | None,
64
+ messages: list[dict] | None,
65
+ tools: list[dict] | None,
66
+ *,
67
+ text: str,
68
+ input_tokens: int,
69
+ output_tokens: int,
70
+ ttl_seconds: int = 3600,
71
+ step_id: str | None = None,
72
+ ) -> None:
73
+ key = _build_exact_key(model, system, messages, tools)
74
+ store.set(
75
+ NAMESPACE_EXACT,
76
+ key,
77
+ {
78
+ "text": text,
79
+ "input_tokens": input_tokens,
80
+ "output_tokens": output_tokens,
81
+ },
82
+ ttl_seconds=ttl_seconds,
83
+ meta={"model": model, "step_id": step_id},
84
+ )
85
+
86
+
87
+ # ── Semantic cache (optional, ChromaDB-backed via memory.MemoryStore) ─────────
88
+ #
89
+ # Implementation is intentionally light. We reuse the same embedding pipeline
90
+ # the chat memory layer uses, store the (system+user) text in a per-step Chroma
91
+ # collection, and persist the response text in our flat-file store keyed by
92
+ # the document's ID. A high similarity threshold (0.95 by default) keeps
93
+ # semantic hits limited to nearly-identical prompts.
94
+
95
+ _semantic_collections: dict[str, Any] = {}
96
+
97
+
98
+ def _get_memory_store():
99
+ """Resolve the live MemoryStore from server module (initialised at startup)."""
100
+ try:
101
+ from core import server as _server
102
+ return getattr(_server, "memory_store", None)
103
+ except Exception:
104
+ return None
105
+
106
+
107
+ def _get_semantic_collection(step_id: str):
108
+ """Lazy ChromaDB collection per step. Returns None on failure (cache disabled)."""
109
+ if step_id in _semantic_collections:
110
+ return _semantic_collections[step_id]
111
+ mem = _get_memory_store()
112
+ if mem is None or not getattr(mem, "client", None):
113
+ _semantic_collections[step_id] = None
114
+ return None
115
+ try:
116
+ coll = mem.client.get_or_create_collection(name=f"{NAMESPACE_SEMANTIC_PREFIX}{step_id}")
117
+ _semantic_collections[step_id] = coll
118
+ return coll
119
+ except Exception as e:
120
+ print(f"DEBUG cache: semantic cache unavailable ({e}); falling back to exact only")
121
+ _semantic_collections[step_id] = None
122
+ return None
123
+
124
+
125
+ def _embed(text: str) -> Optional[list[float]]:
126
+ mem = _get_memory_store()
127
+ if mem is None:
128
+ return None
129
+ try:
130
+ return mem.get_embedding(text)
131
+ except Exception:
132
+ return None
133
+
134
+
135
+ def get_semantic(
136
+ step_id: str,
137
+ model: str,
138
+ system: str | None,
139
+ user_message: str,
140
+ threshold: float = 0.95,
141
+ ) -> Optional[dict]:
142
+ """Return the response from the closest semantic neighbour, if any beat threshold."""
143
+ coll = _get_semantic_collection(step_id)
144
+ if coll is None:
145
+ return None
146
+ emb = _embed((system or "") + "\n\n" + user_message)
147
+ if emb is None:
148
+ return None
149
+ try:
150
+ res = coll.query(query_embeddings=[emb], n_results=1)
151
+ except Exception:
152
+ return None
153
+ ids = (res.get("ids") or [[]])[0]
154
+ distances = (res.get("distances") or [[]])[0]
155
+ metas = (res.get("metadatas") or [[]])[0]
156
+ if not ids:
157
+ return None
158
+ # Chroma returns cosine distance; similarity = 1 - distance.
159
+ similarity = 1.0 - float(distances[0])
160
+ if similarity < threshold:
161
+ return None
162
+ if metas[0].get("model") != model:
163
+ return None
164
+ entry = store.get(NAMESPACE_EXACT, ids[0])
165
+ if entry is None:
166
+ return None
167
+ return entry.get("value")
168
+
169
+
170
+ def set_semantic(
171
+ step_id: str,
172
+ model: str,
173
+ system: str | None,
174
+ user_message: str,
175
+ *,
176
+ text: str,
177
+ input_tokens: int,
178
+ output_tokens: int,
179
+ ttl_seconds: int = 3600,
180
+ ) -> None:
181
+ coll = _get_semantic_collection(step_id)
182
+ if coll is None:
183
+ return
184
+ emb = _embed((system or "") + "\n\n" + user_message)
185
+ if emb is None:
186
+ return
187
+ # Reuse the exact-cache key as the Chroma document ID so storage stays unified.
188
+ key = store.make_key("resp_semantic", model, step_id, user_message)
189
+ store.set(
190
+ NAMESPACE_EXACT,
191
+ key,
192
+ {"text": text, "input_tokens": input_tokens, "output_tokens": output_tokens},
193
+ ttl_seconds=ttl_seconds,
194
+ meta={"model": model, "step_id": step_id, "semantic": True},
195
+ )
196
+ try:
197
+ coll.upsert(
198
+ ids=[key],
199
+ embeddings=[emb],
200
+ documents=[(user_message or "")[:2000]],
201
+ metadatas=[{"model": model, "step_id": step_id}],
202
+ )
203
+ except Exception as e:
204
+ print(f"DEBUG cache: semantic upsert failed ({e})")
@@ -0,0 +1,147 @@
1
+ """
2
+ Shared disk-backed key/value store for the cache layer.
3
+
4
+ Each cached value lives in its own JSON file under data/cache/<namespace>/<aa>/<full_hash>.json
5
+ where <aa> is the first two hex chars of the hash (avoids cramming thousands of
6
+ files into a single directory).
7
+
8
+ Format on disk:
9
+ {
10
+ "value": <jsonable>,
11
+ "created_at": <unix ts>,
12
+ "ttl_seconds": <int|None>,
13
+ "meta": {...} // arbitrary caller metadata (tool_name, model, etc.)
14
+ }
15
+
16
+ The store is intentionally simple — no LRU, no compression, no Redis. The
17
+ hot path is one open()+json.load() per lookup; for the dataset sizes we care
18
+ about (tens of MB per namespace) this is well under a millisecond.
19
+ """
20
+ import hashlib
21
+ import json
22
+ import os
23
+ import threading
24
+ import time
25
+ from pathlib import Path
26
+ from typing import Any, Optional
27
+
28
+ from core.config import DATA_DIR
29
+
30
+ CACHE_ROOT = Path(DATA_DIR) / "cache"
31
+
32
+ _lock = threading.Lock()
33
+
34
+
35
+ def _hash_key(key: str) -> str:
36
+ return hashlib.sha256(key.encode("utf-8")).hexdigest()
37
+
38
+
39
+ def _path_for(namespace: str, key_hash: str) -> Path:
40
+ return CACHE_ROOT / namespace / key_hash[:2] / f"{key_hash}.json"
41
+
42
+
43
+ def make_key(*parts: Any) -> str:
44
+ """Build a deterministic cache key from arbitrary parts.
45
+
46
+ Dicts/lists are serialised with sort_keys so attribute order doesn't break
47
+ the hash. Bytes and tuples are coerced via repr.
48
+ """
49
+ norm: list[str] = []
50
+ for p in parts:
51
+ if p is None:
52
+ norm.append("\x00")
53
+ elif isinstance(p, (dict, list)):
54
+ norm.append(json.dumps(p, sort_keys=True, default=str, separators=(",", ":")))
55
+ else:
56
+ norm.append(str(p))
57
+ return _hash_key("\x1f".join(norm))
58
+
59
+
60
+ def get(namespace: str, key: str) -> Optional[dict]:
61
+ """Return the cached entry dict, or None if missing/expired."""
62
+ key_hash = key if len(key) == 64 and all(c in "0123456789abcdef" for c in key) else _hash_key(key)
63
+ path = _path_for(namespace, key_hash)
64
+ if not path.exists():
65
+ return None
66
+ try:
67
+ with open(path, "r", encoding="utf-8") as f:
68
+ entry = json.load(f)
69
+ except Exception:
70
+ return None
71
+ ttl = entry.get("ttl_seconds")
72
+ if ttl is not None and ttl > 0:
73
+ age = time.time() - entry.get("created_at", 0)
74
+ if age > ttl:
75
+ try:
76
+ path.unlink()
77
+ except Exception:
78
+ pass
79
+ return None
80
+ return entry
81
+
82
+
83
+ def set(namespace: str, key: str, value: Any, ttl_seconds: Optional[int] = None, meta: Optional[dict] = None) -> str:
84
+ """Persist `value` under `key` in `namespace`. Returns the key hash."""
85
+ key_hash = key if len(key) == 64 and all(c in "0123456789abcdef" for c in key) else _hash_key(key)
86
+ path = _path_for(namespace, key_hash)
87
+ entry = {
88
+ "value": value,
89
+ "created_at": time.time(),
90
+ "ttl_seconds": ttl_seconds,
91
+ "meta": meta or {},
92
+ }
93
+ with _lock:
94
+ path.parent.mkdir(parents=True, exist_ok=True)
95
+ tmp = path.with_suffix(".tmp")
96
+ with open(tmp, "w", encoding="utf-8") as f:
97
+ json.dump(entry, f, ensure_ascii=False, default=str)
98
+ os.replace(tmp, path)
99
+ return key_hash
100
+
101
+
102
+ def delete(namespace: str, key: str) -> bool:
103
+ key_hash = key if len(key) == 64 and all(c in "0123456789abcdef" for c in key) else _hash_key(key)
104
+ path = _path_for(namespace, key_hash)
105
+ if path.exists():
106
+ try:
107
+ path.unlink()
108
+ return True
109
+ except Exception:
110
+ return False
111
+ return False
112
+
113
+
114
+ def clear_namespace(namespace: str) -> int:
115
+ """Delete every entry under a namespace. Returns the count removed."""
116
+ base = CACHE_ROOT / namespace
117
+ if not base.exists():
118
+ return 0
119
+ removed = 0
120
+ with _lock:
121
+ for p in base.rglob("*.json"):
122
+ try:
123
+ p.unlink()
124
+ removed += 1
125
+ except Exception:
126
+ pass
127
+ return removed
128
+
129
+
130
+ def stats() -> dict:
131
+ """Return per-namespace entry count and total bytes on disk."""
132
+ out: dict[str, dict] = {}
133
+ if not CACHE_ROOT.exists():
134
+ return out
135
+ for ns_dir in CACHE_ROOT.iterdir():
136
+ if not ns_dir.is_dir():
137
+ continue
138
+ count = 0
139
+ size = 0
140
+ for p in ns_dir.rglob("*.json"):
141
+ try:
142
+ count += 1
143
+ size += p.stat().st_size
144
+ except Exception:
145
+ pass
146
+ out[ns_dir.name] = {"entries": count, "bytes": size}
147
+ return out
@@ -0,0 +1,71 @@
1
+ """
2
+ Deterministic tool-result memoization.
3
+
4
+ Only tools in DETERMINISTIC_TOOLS are eligible — anything that reads live state
5
+ (bash, sql_agent, web_scraper, sandbox) is bypassed because cached results
6
+ would silently mask reality.
7
+
8
+ Scope rules:
9
+ - "session": key includes the session_id (e.g. personal_details, user-bound configs)
10
+ - "global": key includes only tool_name + args (e.g. code_search, pdf_parser)
11
+ """
12
+ from typing import Any, Optional
13
+
14
+ from core.cache import store
15
+
16
+ NAMESPACE = "tool_results"
17
+
18
+ # Maps tool name → scope. Listed conservatively: only tools whose output is a
19
+ # pure function of their args (and optionally the per-user session).
20
+ DETERMINISTIC_TOOLS: dict[str, str] = {
21
+ "code_search": "global",
22
+ "pdf_parser": "global",
23
+ "xlsx_parser": "global",
24
+ "time": "global",
25
+ "code_indexer": "global",
26
+ "collect_data": "global",
27
+ "personal_details": "session",
28
+ }
29
+
30
+
31
+ def is_cacheable(tool_name: str) -> bool:
32
+ return tool_name in DETERMINISTIC_TOOLS
33
+
34
+
35
+ def _key(tool_name: str, tool_args: dict, session_id: Optional[str]) -> str:
36
+ scope = DETERMINISTIC_TOOLS.get(tool_name, "global")
37
+ sid = session_id or "_global_" if scope == "session" else "_global_"
38
+ return store.make_key("tool", tool_name, sid, tool_args or {})
39
+
40
+
41
+ def get(tool_name: str, tool_args: dict, session_id: Optional[str] = None) -> Optional[Any]:
42
+ """Return the cached tool result, or None if there's no live entry."""
43
+ if not is_cacheable(tool_name):
44
+ return None
45
+ entry = store.get(NAMESPACE, _key(tool_name, tool_args, session_id))
46
+ if entry is None:
47
+ return None
48
+ return entry.get("value")
49
+
50
+
51
+ def set(
52
+ tool_name: str,
53
+ tool_args: dict,
54
+ result: Any,
55
+ ttl_seconds: int = 3600,
56
+ session_id: Optional[str] = None,
57
+ ) -> None:
58
+ if not is_cacheable(tool_name):
59
+ return
60
+ store.set(
61
+ NAMESPACE,
62
+ _key(tool_name, tool_args, session_id),
63
+ result,
64
+ ttl_seconds=ttl_seconds,
65
+ meta={"tool_name": tool_name, "scope": DETERMINISTIC_TOOLS.get(tool_name)},
66
+ )
67
+
68
+
69
+ def clear_tool(tool_name: str) -> int:
70
+ """Helper for manual invalidation (e.g. after the user re-indexes their codebase)."""
71
+ return store.clear_namespace(f"{NAMESPACE}/{tool_name}")
@@ -50,8 +50,13 @@ def load_settings():
50
50
  "global_config": {},
51
51
  "vault_enabled": True,
52
52
  "vault_threshold": 100000,
53
- "auto_compact_enabled": False,
54
- "auto_compact_threshold": 100000,
53
+ "auto_compact_enabled": True,
54
+ "auto_compact_threshold": 80000,
55
+ # Prompt caching: decorate provider payloads with cache_control markers
56
+ # so subsequent ReAct turns reuse the cached system + tools prefix.
57
+ # ~50–80% cost reduction on multi-turn agents at the cost of a 25% write
58
+ # surcharge on the first turn. Disable only if a provider misbehaves.
59
+ "prompt_cache_enabled": True,
55
60
  "allow_db_write": False,
56
61
  "coding_agent_enabled": True,
57
62
  "report_agent_enabled": True,