clementine-agent 1.0.85 → 1.0.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -23,6 +23,8 @@ export interface RouteDecision {
|
|
|
23
23
|
confidence: number;
|
|
24
24
|
reasoning: string;
|
|
25
25
|
}
|
|
26
|
+
/** Test-only: reset the cache between runs. */
|
|
27
|
+
export declare function _resetRouteCache(): void;
|
|
26
28
|
export declare function isDirectImperative(userMessage: string): {
|
|
27
29
|
match: boolean;
|
|
28
30
|
pattern?: string;
|
|
@@ -18,6 +18,49 @@
|
|
|
18
18
|
*/
|
|
19
19
|
import pino from 'pino';
|
|
20
20
|
const logger = pino({ name: 'clementine.route-classifier' });
|
|
21
|
+
// ── LRU cache for repeated messages ──────────────────────────────────
|
|
22
|
+
// Same text + same available-agents set → same decision. Skips the
|
|
23
|
+
// Haiku LLM call entirely on cache hit (saves 1-2 seconds per repeat).
|
|
24
|
+
// Bounded by both size and TTL so stale rosters can't cause wrong routes.
|
|
25
|
+
const ROUTE_CACHE_MAX = 100;
|
|
26
|
+
const ROUTE_CACHE_TTL_MS = 5 * 60 * 1000;
|
|
27
|
+
// Insertion-ordered Map = LRU when we delete-and-reinsert on hit.
|
|
28
|
+
const routeCache = new Map();
|
|
29
|
+
function cacheKey(text, agents) {
|
|
30
|
+
// Trim + normalize whitespace so trailing-newline variations of the
|
|
31
|
+
// same message hit the same cache entry.
|
|
32
|
+
const normText = text.replace(/\s+/g, ' ').trim();
|
|
33
|
+
// Sort slugs so order doesn't matter; include count to invalidate on
|
|
34
|
+
// hire/fire (the agents array changes shape).
|
|
35
|
+
const slugFingerprint = agents.map(a => a.slug).sort().join(',');
|
|
36
|
+
return `${slugFingerprint}::${normText}`;
|
|
37
|
+
}
|
|
38
|
+
function cacheGet(key, now) {
|
|
39
|
+
const entry = routeCache.get(key);
|
|
40
|
+
if (!entry)
|
|
41
|
+
return null;
|
|
42
|
+
if (entry.expiresAt <= now) {
|
|
43
|
+
routeCache.delete(key);
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
// LRU touch: remove + re-insert to move to end of insertion order
|
|
47
|
+
routeCache.delete(key);
|
|
48
|
+
routeCache.set(key, entry);
|
|
49
|
+
return entry;
|
|
50
|
+
}
|
|
51
|
+
function cachePut(key, decision, now) {
|
|
52
|
+
routeCache.set(key, { decision, expiresAt: now + ROUTE_CACHE_TTL_MS });
|
|
53
|
+
while (routeCache.size > ROUTE_CACHE_MAX) {
|
|
54
|
+
const oldest = routeCache.keys().next().value;
|
|
55
|
+
if (oldest === undefined)
|
|
56
|
+
break;
|
|
57
|
+
routeCache.delete(oldest);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/** Test-only: reset the cache between runs. */
|
|
61
|
+
export function _resetRouteCache() {
|
|
62
|
+
routeCache.clear();
|
|
63
|
+
}
|
|
21
64
|
/**
|
|
22
65
|
* Direct-imperative guardrail.
|
|
23
66
|
*
|
|
@@ -229,6 +272,15 @@ export async function classifyRoute(userMessage, agents, gateway) {
|
|
|
229
272
|
logger.debug({ trigger: 'question-opener' }, 'Routing skipped — question-opener');
|
|
230
273
|
return null;
|
|
231
274
|
}
|
|
275
|
+
// Cache hit short-circuit — same message + same roster as a recent
|
|
276
|
+
// call gets the same decision without firing the Haiku classifier.
|
|
277
|
+
const now = Date.now();
|
|
278
|
+
const key = cacheKey(userMessage, agents);
|
|
279
|
+
const hit = cacheGet(key, now);
|
|
280
|
+
if (hit) {
|
|
281
|
+
logger.debug({ trigger: 'cache-hit', cachedAgent: hit.decision?.targetAgent ?? 'clementine' }, 'Route classifier cache hit');
|
|
282
|
+
return hit.decision;
|
|
283
|
+
}
|
|
232
284
|
// LLM classifier for everything else.
|
|
233
285
|
const prompt = buildPrompt(userMessage, agents);
|
|
234
286
|
let raw;
|
|
@@ -239,6 +291,7 @@ export async function classifyRoute(userMessage, agents, gateway) {
|
|
|
239
291
|
}
|
|
240
292
|
catch (err) {
|
|
241
293
|
logger.warn({ err }, 'Route classifier call failed');
|
|
294
|
+
// Don't cache failures — next call should retry the LLM.
|
|
242
295
|
return null;
|
|
243
296
|
}
|
|
244
297
|
const decision = parseResponse(raw);
|
|
@@ -254,6 +307,7 @@ export async function classifyRoute(userMessage, agents, gateway) {
|
|
|
254
307
|
decision.targetAgent = 'clementine';
|
|
255
308
|
decision.confidence = Math.min(decision.confidence, 0.3);
|
|
256
309
|
}
|
|
310
|
+
cachePut(key, decision, now);
|
|
257
311
|
return decision;
|
|
258
312
|
}
|
|
259
313
|
//# sourceMappingURL=route-classifier.js.map
|