@framers/agentos 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/memory-router/MemoryRouter.d.ts +195 -0
- package/dist/memory-router/MemoryRouter.d.ts.map +1 -0
- package/dist/memory-router/MemoryRouter.js +155 -0
- package/dist/memory-router/MemoryRouter.js.map +1 -0
- package/dist/memory-router/backend-costs.d.ts +67 -0
- package/dist/memory-router/backend-costs.d.ts.map +1 -0
- package/dist/memory-router/backend-costs.js +136 -0
- package/dist/memory-router/backend-costs.js.map +1 -0
- package/dist/memory-router/classifier.d.ts +169 -0
- package/dist/memory-router/classifier.d.ts.map +1 -0
- package/dist/memory-router/classifier.js +193 -0
- package/dist/memory-router/classifier.js.map +1 -0
- package/dist/memory-router/dispatcher.d.ts +115 -0
- package/dist/memory-router/dispatcher.d.ts.map +1 -0
- package/dist/memory-router/dispatcher.js +84 -0
- package/dist/memory-router/dispatcher.js.map +1 -0
- package/dist/memory-router/index.d.ts +124 -0
- package/dist/memory-router/index.d.ts.map +1 -0
- package/dist/memory-router/index.js +121 -0
- package/dist/memory-router/index.js.map +1 -0
- package/dist/memory-router/routing-tables.d.ts +125 -0
- package/dist/memory-router/routing-tables.d.ts.map +1 -0
- package/dist/memory-router/routing-tables.js +137 -0
- package/dist/memory-router/routing-tables.js.map +1 -0
- package/dist/memory-router/select-backend.d.ts +136 -0
- package/dist/memory-router/select-backend.d.ts.map +1 -0
- package/dist/memory-router/select-backend.js +210 -0
- package/dist/memory-router/select-backend.js.map +1 -0
- package/dist/sandbox/executor/CodeSandbox.d.ts.map +1 -1
- package/dist/sandbox/executor/CodeSandbox.js +24 -0
- package/dist/sandbox/executor/CodeSandbox.js.map +1 -1
- package/dist/sandbox/executor/ICodeSandbox.d.ts +7 -3
- package/dist/sandbox/executor/ICodeSandbox.d.ts.map +1 -1
- package/dist/sandbox/executor/ICodeSandbox.js.map +1 -1
- package/package.json +6 -1
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentOS MemoryRouter Module
|
|
3
|
+
*
|
|
4
|
+
* LLM-as-judge orchestrator that picks the best memory-recall architecture
|
|
5
|
+
* per query, with budget-aware dispatch across {canonical-hybrid,
|
|
6
|
+
* observational-memory-v10, observational-memory-v11} backends.
|
|
7
|
+
*
|
|
8
|
+
* **Architecture Overview:**
|
|
9
|
+
* ```
|
|
10
|
+
* ┌────────────────────────────────────────────────────────────────────┐
|
|
11
|
+
* │ MemoryRouter │
|
|
12
|
+
* │ Orchestrates classification + routing-table dispatch + optional │
|
|
13
|
+
* │ backend execution (via IMemoryDispatcher) │
|
|
14
|
+
* └────────────────────────────────────────────────────────────────────┘
|
|
15
|
+
* │
|
|
16
|
+
* ┌──────────────────────┼────────────────────────┐
|
|
17
|
+
* ▼ ▼ ▼
|
|
18
|
+
* ┌───────────────┐ ┌─────────────────┐ ┌───────────────────┐
|
|
19
|
+
* │ IMemoryClassi-│ │ selectBackend │ │ IMemoryDispatcher │
|
|
20
|
+
* │ fier │ │ (pure, budget- │ │ (optional exec) │
|
|
21
|
+
* │ (LLM judge) │ │ aware) │ │ │
|
|
22
|
+
* └───────────────┘ └─────────────────┘ └───────────────────┘
|
|
23
|
+
* │
|
|
24
|
+
* ┌─────────────────────┼─────────────────────┐
|
|
25
|
+
* ▼ ▼ ▼
|
|
26
|
+
* ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────────┐
|
|
27
|
+
* │ canonical- │ │ observational- │ │ observational- │
|
|
28
|
+
* │ hybrid │ │ memory-v10 │ │ memory-v11 │
|
|
29
|
+
* │ (BM25 + dense │ │ (synth obs log │ │ (v10 + verbatim │
|
|
30
|
+
* │ + Cohere │ │ + dyn router) │ │ citation for │
|
|
31
|
+
* │ rerank) │ │ │ │ KU/SSU) │
|
|
32
|
+
* └─────────────────┘ └─────────────────┘ └─────────────────────┘
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* **Design principles:**
|
|
36
|
+
*
|
|
37
|
+
* 1. **Pure where possible.** `selectBackend` is a pure function: given a
|
|
38
|
+
* category + routing table + cost data, it produces a deterministic
|
|
39
|
+
* decision with no I/O. Suitable for use inside cache-key construction
|
|
40
|
+
* and hot dispatch loops.
|
|
41
|
+
*
|
|
42
|
+
* 2. **LLM-provider-agnostic.** The classifier talks to an adapter interface
|
|
43
|
+
* ({@link IMemoryClassifierLLM}) — there is NO SDK import inside this
|
|
44
|
+
* module. Wire any provider (OpenAI, Anthropic, local, mock) via the
|
|
45
|
+
* adapter.
|
|
46
|
+
*
|
|
47
|
+
* 3. **Dispatch is injected.** Backend execution depends on how the caller's
|
|
48
|
+
* memory state is wired (OM backends need ingest-time setup, canonical
|
|
49
|
+
* does not). The router decides; {@link IMemoryDispatcher} executes.
|
|
50
|
+
* Callers who only need canonical-hybrid can register one executor and
|
|
51
|
+
* ignore the others.
|
|
52
|
+
*
|
|
53
|
+
* 4. **Shipping presets.** Three routing tables (minimize-cost, balanced,
|
|
54
|
+
* maximize-accuracy) ship with costs calibrated from LongMemEval-S
|
|
55
|
+
* Phase B N=500. Consumers can override routing tables, cost-points, or
|
|
56
|
+
* per-category mappings for custom workloads.
|
|
57
|
+
*
|
|
58
|
+
* 5. **Budget-aware.** Optional per-query USD budget with three modes
|
|
59
|
+
* (hard / soft / cheapest-fallback) so production cost ceilings are
|
|
60
|
+
* enforceable without bespoke retry logic.
|
|
61
|
+
*
|
|
62
|
+
* @module @framers/agentos/memory-router
|
|
63
|
+
*
|
|
64
|
+
* @example Minimal usage: just decide, execute yourself.
|
|
65
|
+
* ```ts
|
|
66
|
+
* import {
|
|
67
|
+
* LLMMemoryClassifier,
|
|
68
|
+
* MemoryRouter,
|
|
69
|
+
* } from '../memory-router';
|
|
70
|
+
*
|
|
71
|
+
* const router = new MemoryRouter({
|
|
72
|
+
* classifier: new LLMMemoryClassifier({ llm: openaiAdapter }),
|
|
73
|
+
* preset: 'minimize-cost',
|
|
74
|
+
* });
|
|
75
|
+
*
|
|
76
|
+
* const { classifier, routing } = await router.decide(query);
|
|
77
|
+
* if (routing.chosenBackend === 'canonical-hybrid') {
|
|
78
|
+
* const traces = await mem.recall(query, { limit: 10 });
|
|
79
|
+
* // ...
|
|
80
|
+
* }
|
|
81
|
+
* ```
|
|
82
|
+
*
|
|
83
|
+
* @example Full pipeline: decide + dispatch.
|
|
84
|
+
* ```ts
|
|
85
|
+
* import {
|
|
86
|
+
* LLMMemoryClassifier,
|
|
87
|
+
* MemoryRouter,
|
|
88
|
+
* FunctionMemoryDispatcher,
|
|
89
|
+
* } from '../memory-router';
|
|
90
|
+
*
|
|
91
|
+
* const router = new MemoryRouter({
|
|
92
|
+
* classifier: new LLMMemoryClassifier({ llm: openaiAdapter }),
|
|
93
|
+
* preset: 'minimize-cost',
|
|
94
|
+
* budget: { perQueryUsd: 0.05, mode: 'cheapest-fallback' },
|
|
95
|
+
* dispatcher: new FunctionMemoryDispatcher<ScoredTrace, { topK: number }>({
|
|
96
|
+
* 'canonical-hybrid': async (q, { topK }) =>
|
|
97
|
+
* mem.recall(q, { limit: topK }),
|
|
98
|
+
* 'observational-memory-v10': async (q, p) =>
|
|
99
|
+
* await omPipelineV10.recall(q, p),
|
|
100
|
+
* 'observational-memory-v11': async (q, p) =>
|
|
101
|
+
* await omPipelineV11.recall(q, p),
|
|
102
|
+
* }),
|
|
103
|
+
* });
|
|
104
|
+
*
|
|
105
|
+
* const { decision, traces, backend } = await router.decideAndDispatch(
|
|
106
|
+
* query,
|
|
107
|
+
* { topK: 10 },
|
|
108
|
+
* );
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
export type { MemoryQueryCategory, MemoryBackendId, MemoryRouterPreset, RoutingTable, } from './routing-tables.js';
|
|
112
|
+
export { MEMORY_QUERY_CATEGORIES } from './routing-tables.js';
|
|
113
|
+
export type { MemoryBackendCostPoint } from './backend-costs.js';
|
|
114
|
+
export type { MemoryBudgetMode, MemoryRouterConfig, MemoryRoutingDecision, } from './select-backend.js';
|
|
115
|
+
export type { IMemoryClassifier, IMemoryClassifierLLM, MemoryClassifierLLMRequest, MemoryClassifierLLMResponse, MemoryClassifierClassifyOptions, MemoryClassifierResult, LLMMemoryClassifierOptions, } from './classifier.js';
|
|
116
|
+
export type { IMemoryDispatcher, MemoryDispatchArgs, MemoryDispatchResult, MemoryBackendExecutor, MemoryBackendRegistry, } from './dispatcher.js';
|
|
117
|
+
export type { MemoryBudgetPolicy, MemoryRouterOptions, MemoryRouterDecideOptions, MemoryRouterDecision, MemoryRouterDispatchedDecision, } from './MemoryRouter.js';
|
|
118
|
+
export { MINIMIZE_COST_TABLE, BALANCED_TABLE, MAXIMIZE_ACCURACY_TABLE, PRESET_TABLES, } from './routing-tables.js';
|
|
119
|
+
export { TIER_1_CANONICAL_COSTS, TIER_2A_V10_COSTS, TIER_2B_V11_COSTS, DEFAULT_MEMORY_BACKEND_COSTS, } from './backend-costs.js';
|
|
120
|
+
export { selectBackend, MemoryRouterUnknownCategoryError, MemoryRouterBudgetExceededError, } from './select-backend.js';
|
|
121
|
+
export { CLASSIFIER_SYSTEM_PROMPT, CLASSIFIER_SYSTEM_PROMPT_FEWSHOT, SAFE_FALLBACK_CATEGORY, LLMMemoryClassifier, normalizeClassifierOutput, parseClassifierOutput, } from './classifier.js';
|
|
122
|
+
export { FunctionMemoryDispatcher, UnsupportedMemoryBackendError, } from './dispatcher.js';
|
|
123
|
+
export { MemoryRouter, MemoryRouterDispatcherMissingError, } from './MemoryRouter.js';
|
|
124
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/memory-router/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6GG;AAMH,YAAY,EACV,mBAAmB,EACnB,eAAe,EACf,kBAAkB,EAClB,YAAY,GACb,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,YAAY,EAAE,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAEjE,YAAY,EACV,gBAAgB,EAChB,kBAAkB,EAClB,qBAAqB,GACtB,MAAM,qBAAqB,CAAC;AAE7B,YAAY,EACV,iBAAiB,EACjB,oBAAoB,EACpB,0BAA0B,EAC1B,2BAA2B,EAC3B,+BAA+B,EAC/B,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,iBAAiB,CAAC;AAEzB,YAAY,EACV,iBAAiB,EACjB,kBAAkB,EAClB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,iBAAiB,CAAC;AAEzB,YAAY,EACV,kBAAkB,EAClB,mBAAmB,EACnB,yBAAyB,EACzB,oBAAoB,EACpB,8BAA8B,GAC/B,MAAM,mBAAmB,CAAC;AAM3B,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,uBAAuB,EACvB,aAAa,GACd,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,sBAAsB,EACtB,iBAAiB,EACjB,iBAAiB,EACjB,4BAA4B,GAC7B,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EACL,aAAa,EACb,gCAAgC,EAChC,+BAA+B,GAChC,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,wBAAwB,EACxB,gCAAgC,EAChC,sBAAsB,EACtB,mBAAmB,EACnB,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,wBAAwB,EACxB,6BAA6B,GAC9B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,YAAY,EACZ,kCAAkC,GACnC,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentOS MemoryRouter Module
|
|
3
|
+
*
|
|
4
|
+
* LLM-as-judge orchestrator that picks the best memory-recall architecture
|
|
5
|
+
* per query, with budget-aware dispatch across {canonical-hybrid,
|
|
6
|
+
* observational-memory-v10, observational-memory-v11} backends.
|
|
7
|
+
*
|
|
8
|
+
* **Architecture Overview:**
|
|
9
|
+
* ```
|
|
10
|
+
* ┌────────────────────────────────────────────────────────────────────┐
|
|
11
|
+
* │ MemoryRouter │
|
|
12
|
+
* │ Orchestrates classification + routing-table dispatch + optional │
|
|
13
|
+
* │ backend execution (via IMemoryDispatcher) │
|
|
14
|
+
* └────────────────────────────────────────────────────────────────────┘
|
|
15
|
+
* │
|
|
16
|
+
* ┌──────────────────────┼────────────────────────┐
|
|
17
|
+
* ▼ ▼ ▼
|
|
18
|
+
* ┌───────────────┐ ┌─────────────────┐ ┌───────────────────┐
|
|
19
|
+
* │ IMemoryClassi-│ │ selectBackend │ │ IMemoryDispatcher │
|
|
20
|
+
* │ fier │ │ (pure, budget- │ │ (optional exec) │
|
|
21
|
+
* │ (LLM judge) │ │ aware) │ │ │
|
|
22
|
+
* └───────────────┘ └─────────────────┘ └───────────────────┘
|
|
23
|
+
* │
|
|
24
|
+
* ┌─────────────────────┼─────────────────────┐
|
|
25
|
+
* ▼ ▼ ▼
|
|
26
|
+
* ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────────┐
|
|
27
|
+
* │ canonical- │ │ observational- │ │ observational- │
|
|
28
|
+
* │ hybrid │ │ memory-v10 │ │ memory-v11 │
|
|
29
|
+
* │ (BM25 + dense │ │ (synth obs log │ │ (v10 + verbatim │
|
|
30
|
+
* │ + Cohere │ │ + dyn router) │ │ citation for │
|
|
31
|
+
* │ rerank) │ │ │ │ KU/SSU) │
|
|
32
|
+
* └─────────────────┘ └─────────────────┘ └─────────────────────┘
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* **Design principles:**
|
|
36
|
+
*
|
|
37
|
+
* 1. **Pure where possible.** `selectBackend` is a pure function: given a
|
|
38
|
+
* category + routing table + cost data, it produces a deterministic
|
|
39
|
+
* decision with no I/O. Suitable for use inside cache-key construction
|
|
40
|
+
* and hot dispatch loops.
|
|
41
|
+
*
|
|
42
|
+
* 2. **LLM-provider-agnostic.** The classifier talks to an adapter interface
|
|
43
|
+
* ({@link IMemoryClassifierLLM}) — there is NO SDK import inside this
|
|
44
|
+
* module. Wire any provider (OpenAI, Anthropic, local, mock) via the
|
|
45
|
+
* adapter.
|
|
46
|
+
*
|
|
47
|
+
* 3. **Dispatch is injected.** Backend execution depends on how the caller's
|
|
48
|
+
* memory state is wired (OM backends need ingest-time setup, canonical
|
|
49
|
+
* does not). The router decides; {@link IMemoryDispatcher} executes.
|
|
50
|
+
* Callers who only need canonical-hybrid can register one executor and
|
|
51
|
+
* ignore the others.
|
|
52
|
+
*
|
|
53
|
+
* 4. **Shipping presets.** Three routing tables (minimize-cost, balanced,
|
|
54
|
+
* maximize-accuracy) ship with costs calibrated from LongMemEval-S
|
|
55
|
+
* Phase B N=500. Consumers can override routing tables, cost-points, or
|
|
56
|
+
* per-category mappings for custom workloads.
|
|
57
|
+
*
|
|
58
|
+
* 5. **Budget-aware.** Optional per-query USD budget with three modes
|
|
59
|
+
* (hard / soft / cheapest-fallback) so production cost ceilings are
|
|
60
|
+
* enforceable without bespoke retry logic.
|
|
61
|
+
*
|
|
62
|
+
* @module @framers/agentos/memory-router
|
|
63
|
+
*
|
|
64
|
+
* @example Minimal usage: just decide, execute yourself.
|
|
65
|
+
* ```ts
|
|
66
|
+
* import {
|
|
67
|
+
* LLMMemoryClassifier,
|
|
68
|
+
* MemoryRouter,
|
|
69
|
+
* } from '../memory-router';
|
|
70
|
+
*
|
|
71
|
+
* const router = new MemoryRouter({
|
|
72
|
+
* classifier: new LLMMemoryClassifier({ llm: openaiAdapter }),
|
|
73
|
+
* preset: 'minimize-cost',
|
|
74
|
+
* });
|
|
75
|
+
*
|
|
76
|
+
* const { classifier, routing } = await router.decide(query);
|
|
77
|
+
* if (routing.chosenBackend === 'canonical-hybrid') {
|
|
78
|
+
* const traces = await mem.recall(query, { limit: 10 });
|
|
79
|
+
* // ...
|
|
80
|
+
* }
|
|
81
|
+
* ```
|
|
82
|
+
*
|
|
83
|
+
* @example Full pipeline: decide + dispatch.
|
|
84
|
+
* ```ts
|
|
85
|
+
* import {
|
|
86
|
+
* LLMMemoryClassifier,
|
|
87
|
+
* MemoryRouter,
|
|
88
|
+
* FunctionMemoryDispatcher,
|
|
89
|
+
* } from '../memory-router';
|
|
90
|
+
*
|
|
91
|
+
* const router = new MemoryRouter({
|
|
92
|
+
* classifier: new LLMMemoryClassifier({ llm: openaiAdapter }),
|
|
93
|
+
* preset: 'minimize-cost',
|
|
94
|
+
* budget: { perQueryUsd: 0.05, mode: 'cheapest-fallback' },
|
|
95
|
+
* dispatcher: new FunctionMemoryDispatcher<ScoredTrace, { topK: number }>({
|
|
96
|
+
* 'canonical-hybrid': async (q, { topK }) =>
|
|
97
|
+
* mem.recall(q, { limit: topK }),
|
|
98
|
+
* 'observational-memory-v10': async (q, p) =>
|
|
99
|
+
* await omPipelineV10.recall(q, p),
|
|
100
|
+
* 'observational-memory-v11': async (q, p) =>
|
|
101
|
+
* await omPipelineV11.recall(q, p),
|
|
102
|
+
* }),
|
|
103
|
+
* });
|
|
104
|
+
*
|
|
105
|
+
* const { decision, traces, backend } = await router.decideAndDispatch(
|
|
106
|
+
* query,
|
|
107
|
+
* { topK: 10 },
|
|
108
|
+
* );
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
export { MEMORY_QUERY_CATEGORIES } from './routing-tables.js';
|
|
112
|
+
// ============================================================================
|
|
113
|
+
// Values
|
|
114
|
+
// ============================================================================
|
|
115
|
+
export { MINIMIZE_COST_TABLE, BALANCED_TABLE, MAXIMIZE_ACCURACY_TABLE, PRESET_TABLES, } from './routing-tables.js';
|
|
116
|
+
export { TIER_1_CANONICAL_COSTS, TIER_2A_V10_COSTS, TIER_2B_V11_COSTS, DEFAULT_MEMORY_BACKEND_COSTS, } from './backend-costs.js';
|
|
117
|
+
export { selectBackend, MemoryRouterUnknownCategoryError, MemoryRouterBudgetExceededError, } from './select-backend.js';
|
|
118
|
+
export { CLASSIFIER_SYSTEM_PROMPT, CLASSIFIER_SYSTEM_PROMPT_FEWSHOT, SAFE_FALLBACK_CATEGORY, LLMMemoryClassifier, normalizeClassifierOutput, parseClassifierOutput, } from './classifier.js';
|
|
119
|
+
export { FunctionMemoryDispatcher, UnsupportedMemoryBackendError, } from './dispatcher.js';
|
|
120
|
+
export { MemoryRouter, MemoryRouterDispatcherMissingError, } from './MemoryRouter.js';
|
|
121
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/memory-router/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6GG;AAYH,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAoC9D,+EAA+E;AAC/E,SAAS;AACT,+EAA+E;AAE/E,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,uBAAuB,EACvB,aAAa,GACd,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,sBAAsB,EACtB,iBAAiB,EACjB,iBAAiB,EACjB,4BAA4B,GAC7B,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EACL,aAAa,EACb,gCAAgC,EAChC,+BAA+B,GAChC,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,wBAAwB,EACxB,gCAAgC,EAChC,sBAAsB,EACtB,mBAAmB,EACnB,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,wBAAwB,EACxB,6BAA6B,GAC9B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,YAAY,EACZ,kCAAkC,GACnC,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file routing-tables.ts
|
|
3
|
+
* @description Preset routing tables for {@link MemoryRouter}.
|
|
4
|
+
*
|
|
5
|
+
* The MemoryRouter dispatches each query to one of the available
|
|
6
|
+
* {@link MemoryBackendId} backends based on the classifier-predicted
|
|
7
|
+
* {@link MemoryQueryCategory}. The mapping from category to backend is a
|
|
8
|
+
* "routing table" — a frozen object that callers can pass through unchanged
|
|
9
|
+
* for the shipping defaults, or override per-category for custom workloads.
|
|
10
|
+
*
|
|
11
|
+
* Three preset tables ship out of the box, each calibrated from Phase B
|
|
12
|
+
* N=500 LongMemEval-S measurements:
|
|
13
|
+
*
|
|
14
|
+
* - {@link MINIMIZE_COST_TABLE}: cheapest Pareto-dominant backend per
|
|
15
|
+
* category. Pays the OM premium only on multi-session and
|
|
16
|
+
* single-session-preference (the categories where the architectural lift
|
|
17
|
+
* exceeds the cost premium).
|
|
18
|
+
* - {@link BALANCED_TABLE}: trades modest cost for large latency wins on
|
|
19
|
+
* knowledge-update and temporal-reasoning.
|
|
20
|
+
* - {@link MAXIMIZE_ACCURACY_TABLE}: highest-accuracy backend per category;
|
|
21
|
+
* ties broken by cost. v2 (post-Phase-B-2026-04-24) routes
|
|
22
|
+
* temporal-reasoning back to canonical-hybrid after Phase B revealed the
|
|
23
|
+
* v1 routing's accuracy gain was within CI noise but paid OM ingest cost.
|
|
24
|
+
*
|
|
25
|
+
* @module @framers/agentos/memory-router/routing-tables
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* The six question categories the LLM-as-judge classifier produces.
|
|
29
|
+
* Calibrated from LongMemEval-S categories; mappings to other benchmark
|
|
30
|
+
* taxonomies (e.g. LOCOMO single-hop / multi-hop / temporal /
|
|
31
|
+
* open-domain / adversarial) are handled at adapter boundaries.
|
|
32
|
+
*/
|
|
33
|
+
export declare const MEMORY_QUERY_CATEGORIES: readonly ["single-session-user", "single-session-assistant", "single-session-preference", "knowledge-update", "multi-session", "temporal-reasoning"];
|
|
34
|
+
/**
|
|
35
|
+
* The six question categories the LLM-as-judge classifier produces.
|
|
36
|
+
*/
|
|
37
|
+
export type MemoryQueryCategory = (typeof MEMORY_QUERY_CATEGORIES)[number];
|
|
38
|
+
/**
|
|
39
|
+
* The retrieval architecture identifiers the router can dispatch to.
|
|
40
|
+
*
|
|
41
|
+
* - `canonical-hybrid`: BM25 + dense + RRF fusion + Cohere rerank-v3.5
|
|
42
|
+
* over the raw memory traces. The default cheapest-and-fastest path.
|
|
43
|
+
* - `observational-memory-v10`: synthesized observation log fed to the
|
|
44
|
+
* reader, with classifier-driven routing inside the OM pipeline. No
|
|
45
|
+
* verbatim citation rule.
|
|
46
|
+
* - `observational-memory-v11`: same as v10 but with conditional
|
|
47
|
+
* verbatim citation appended for knowledge-update and
|
|
48
|
+
* single-session-user categories. Wins on multi-session and
|
|
49
|
+
* single-session-preference.
|
|
50
|
+
*
|
|
51
|
+
* Backend execution itself lives in {@link MemoryDispatcher}; this type
|
|
52
|
+
* is the contract between the routing decision and the dispatcher.
|
|
53
|
+
*/
|
|
54
|
+
export type MemoryBackendId = 'canonical-hybrid' | 'observational-memory-v10' | 'observational-memory-v11';
|
|
55
|
+
/**
|
|
56
|
+
* The three shipping presets. Each preset corresponds to a distinct point
|
|
57
|
+
* on the Phase B-measured cost-accuracy Pareto frontier.
|
|
58
|
+
*/
|
|
59
|
+
export type MemoryRouterPreset = 'minimize-cost' | 'balanced' | 'maximize-accuracy';
|
|
60
|
+
/**
|
|
61
|
+
* A routing table maps every {@link MemoryQueryCategory} to its preferred
|
|
62
|
+
* {@link MemoryBackendId} for the given preset. Tables ship frozen so
|
|
63
|
+
* consumers cannot mutate the routing surface from outside the module.
|
|
64
|
+
*/
|
|
65
|
+
export interface RoutingTable {
|
|
66
|
+
readonly preset: MemoryRouterPreset;
|
|
67
|
+
readonly defaultMapping: Readonly<Record<MemoryQueryCategory, MemoryBackendId>>;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Preset: minimize-cost.
|
|
71
|
+
*
|
|
72
|
+
* Pareto-dominant cheapest backend per category. Pays the OM premium only
|
|
73
|
+
* on the two categories where the architectural lift earns it
|
|
74
|
+
* (multi-session +6.8pp, single-session-preference +3.3pp). Every other
|
|
75
|
+
* category routes to canonical-hybrid where Phase B measurements show the
|
|
76
|
+
* cheaper backend either dominates or matches within CI noise.
|
|
77
|
+
*
|
|
78
|
+
* Phase B simulation: 73.9% accuracy at $0.092/correct; oracle ceiling
|
|
79
|
+
* 76.0% at $0.157/correct. **Pareto-dominates the all-Tier-2b flat
|
|
80
|
+
* baseline by 4.77x cost reduction at +0.5pp accuracy** on the
|
|
81
|
+
* LongMemEval-S Phase B distribution.
|
|
82
|
+
*
|
|
83
|
+
* Recommended default for cost-sensitive workloads.
|
|
84
|
+
*/
|
|
85
|
+
export declare const MINIMIZE_COST_TABLE: RoutingTable;
|
|
86
|
+
/**
|
|
87
|
+
* Preset: balanced.
|
|
88
|
+
*
|
|
89
|
+
* Trades 1.6x cost for >10x latency reductions on knowledge-update and
|
|
90
|
+
* temporal-reasoning. Phase B measurements show Tier 2a v10 ties Tier 1
|
|
91
|
+
* canonical on accuracy for these two categories at much lower latency
|
|
92
|
+
* (4-19s vs 80-100s) — the latency win comes from skipping per-turn
|
|
93
|
+
* cognitive replay in favor of synthesized observations.
|
|
94
|
+
*
|
|
95
|
+
* Phase B simulation: 74.5% accuracy at $0.205/correct; 2.12x cheaper
|
|
96
|
+
* than Tier 2b flat with comparable accuracy.
|
|
97
|
+
*
|
|
98
|
+
* Recommended for interactive workloads where latency matters and the
|
|
99
|
+
* cost premium over minimize-cost is acceptable.
|
|
100
|
+
*/
|
|
101
|
+
export declare const BALANCED_TABLE: RoutingTable;
|
|
102
|
+
/**
|
|
103
|
+
* Preset: maximize-accuracy (v2).
|
|
104
|
+
*
|
|
105
|
+
* Highest-accuracy backend per category, ties broken by cost. v2
|
|
106
|
+
* (2026-04-24, post-Phase-B) routes temporal-reasoning back to
|
|
107
|
+
* canonical-hybrid after Phase B revealed:
|
|
108
|
+
* - v1 routing (TR -> Tier 2a) paid OM ingest cost for a within-CI
|
|
109
|
+
* accuracy gain (71.0% Tier 2a vs 70.2% Tier 1) on a hold-out slice;
|
|
110
|
+
* - combined with classifier misroutes the aggregate fell below the
|
|
111
|
+
* 74% acceptance floor at 73.8%.
|
|
112
|
+
* v2 keeps TR on canonical-hybrid where it's cheapest and
|
|
113
|
+
* accuracy-equivalent.
|
|
114
|
+
*
|
|
115
|
+
* Phase B measured: 75.6% [71.8, 79.2] at $0.2434/correct, 65.6s avg
|
|
116
|
+
* latency.
|
|
117
|
+
*/
|
|
118
|
+
export declare const MAXIMIZE_ACCURACY_TABLE: RoutingTable;
|
|
119
|
+
/**
|
|
120
|
+
* Convenience registry of all three preset tables, keyed by preset name.
|
|
121
|
+
* Useful when surfacing presets through a CLI flag or config field where
|
|
122
|
+
* the preset name is a string and the consumer needs the table object.
|
|
123
|
+
*/
|
|
124
|
+
export declare const PRESET_TABLES: Readonly<Record<MemoryRouterPreset, RoutingTable>>;
|
|
125
|
+
//# sourceMappingURL=routing-tables.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"routing-tables.d.ts","sourceRoot":"","sources":["../../src/memory-router/routing-tables.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAMH;;;;;GAKG;AACH,eAAO,MAAM,uBAAuB,sJAO1B,CAAC;AAEX;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,CAAC,OAAO,uBAAuB,CAAC,CAAC,MAAM,CAAC,CAAC;AAE3E;;;;;;;;;;;;;;;GAeG;AACH,MAAM,MAAM,eAAe,GACvB,kBAAkB,GAClB,0BAA0B,GAC1B,0BAA0B,CAAC;AAE/B;;;GAGG;AACH,MAAM,MAAM,kBAAkB,GAC1B,eAAe,GACf,UAAU,GACV,mBAAmB,CAAC;AAExB;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,MAAM,EAAE,kBAAkB,CAAC;IACpC,QAAQ,CAAC,cAAc,EAAE,QAAQ,CAAC,MAAM,CAAC,mBAAmB,EAAE,eAAe,CAAC,CAAC,CAAC;CACjF;AAMD;;;;;;;;;;;;;;;GAeG;AACH,eAAO,MAAM,mBAAmB,EAAE,YAUhB,CAAC;AAEnB;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,cAAc,EAAE,YAUX,CAAC;AAEnB;;;;;;;;;;;;;;;GAeG;AACH,eAAO,MAAM,uBAAuB,EAAE,YAUpB,CAAC;AAEnB;;;;GAIG;AACH,eAAO,MAAM,aAAa,EAAE,QAAQ,CAAC,MAAM,CAAC,kBAAkB,EAAE,YAAY,CAAC,CAKzE,CAAC"}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file routing-tables.ts
|
|
3
|
+
* @description Preset routing tables for {@link MemoryRouter}.
|
|
4
|
+
*
|
|
5
|
+
* The MemoryRouter dispatches each query to one of the available
|
|
6
|
+
* {@link MemoryBackendId} backends based on the classifier-predicted
|
|
7
|
+
* {@link MemoryQueryCategory}. The mapping from category to backend is a
|
|
8
|
+
* "routing table" — a frozen object that callers can pass through unchanged
|
|
9
|
+
* for the shipping defaults, or override per-category for custom workloads.
|
|
10
|
+
*
|
|
11
|
+
* Three preset tables ship out of the box, each calibrated from Phase B
|
|
12
|
+
* N=500 LongMemEval-S measurements:
|
|
13
|
+
*
|
|
14
|
+
* - {@link MINIMIZE_COST_TABLE}: cheapest Pareto-dominant backend per
|
|
15
|
+
* category. Pays the OM premium only on multi-session and
|
|
16
|
+
* single-session-preference (the categories where the architectural lift
|
|
17
|
+
* exceeds the cost premium).
|
|
18
|
+
* - {@link BALANCED_TABLE}: trades modest cost for large latency wins on
|
|
19
|
+
* knowledge-update and temporal-reasoning.
|
|
20
|
+
* - {@link MAXIMIZE_ACCURACY_TABLE}: highest-accuracy backend per category;
|
|
21
|
+
* ties broken by cost. v2 (post-Phase-B-2026-04-24) routes
|
|
22
|
+
* temporal-reasoning back to canonical-hybrid after Phase B revealed the
|
|
23
|
+
* v1 routing's accuracy gain was within CI noise but paid OM ingest cost.
|
|
24
|
+
*
|
|
25
|
+
* @module @framers/agentos/memory-router/routing-tables
|
|
26
|
+
*/
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// Public types
|
|
29
|
+
// ============================================================================
|
|
30
|
+
/**
|
|
31
|
+
* The six question categories the LLM-as-judge classifier produces.
|
|
32
|
+
* Calibrated from LongMemEval-S categories; mappings to other benchmark
|
|
33
|
+
* taxonomies (e.g. LOCOMO single-hop / multi-hop / temporal /
|
|
34
|
+
* open-domain / adversarial) are handled at adapter boundaries.
|
|
35
|
+
*/
|
|
36
|
+
export const MEMORY_QUERY_CATEGORIES = [
|
|
37
|
+
'single-session-user',
|
|
38
|
+
'single-session-assistant',
|
|
39
|
+
'single-session-preference',
|
|
40
|
+
'knowledge-update',
|
|
41
|
+
'multi-session',
|
|
42
|
+
'temporal-reasoning',
|
|
43
|
+
];
|
|
44
|
+
// ============================================================================
|
|
45
|
+
// Preset tables
|
|
46
|
+
// ============================================================================
|
|
47
|
+
/**
|
|
48
|
+
* Preset: minimize-cost.
|
|
49
|
+
*
|
|
50
|
+
* Pareto-dominant cheapest backend per category. Pays the OM premium only
|
|
51
|
+
* on the two categories where the architectural lift earns it
|
|
52
|
+
* (multi-session +6.8pp, single-session-preference +3.3pp). Every other
|
|
53
|
+
* category routes to canonical-hybrid where Phase B measurements show the
|
|
54
|
+
* cheaper backend either dominates or matches within CI noise.
|
|
55
|
+
*
|
|
56
|
+
* Phase B simulation: 73.9% accuracy at $0.092/correct; oracle ceiling
|
|
57
|
+
* 76.0% at $0.157/correct. **Pareto-dominates the all-Tier-2b flat
|
|
58
|
+
* baseline by 4.77x cost reduction at +0.5pp accuracy** on the
|
|
59
|
+
* LongMemEval-S Phase B distribution.
|
|
60
|
+
*
|
|
61
|
+
* Recommended default for cost-sensitive workloads.
|
|
62
|
+
*/
|
|
63
|
+
export const MINIMIZE_COST_TABLE = Object.freeze({
|
|
64
|
+
preset: 'minimize-cost',
|
|
65
|
+
defaultMapping: Object.freeze({
|
|
66
|
+
'single-session-assistant': 'canonical-hybrid',
|
|
67
|
+
'single-session-user': 'canonical-hybrid',
|
|
68
|
+
'temporal-reasoning': 'canonical-hybrid',
|
|
69
|
+
'knowledge-update': 'canonical-hybrid',
|
|
70
|
+
'multi-session': 'observational-memory-v11',
|
|
71
|
+
'single-session-preference': 'observational-memory-v11',
|
|
72
|
+
}),
|
|
73
|
+
});
|
|
74
|
+
/**
|
|
75
|
+
* Preset: balanced.
|
|
76
|
+
*
|
|
77
|
+
* Trades 1.6x cost for >10x latency reductions on knowledge-update and
|
|
78
|
+
* temporal-reasoning. Phase B measurements show Tier 2a v10 ties Tier 1
|
|
79
|
+
* canonical on accuracy for these two categories at much lower latency
|
|
80
|
+
* (4-19s vs 80-100s) — the latency win comes from skipping per-turn
|
|
81
|
+
* cognitive replay in favor of synthesized observations.
|
|
82
|
+
*
|
|
83
|
+
* Phase B simulation: 74.5% accuracy at $0.205/correct; 2.12x cheaper
|
|
84
|
+
* than Tier 2b flat with comparable accuracy.
|
|
85
|
+
*
|
|
86
|
+
* Recommended for interactive workloads where latency matters and the
|
|
87
|
+
* cost premium over minimize-cost is acceptable.
|
|
88
|
+
*/
|
|
89
|
+
export const BALANCED_TABLE = Object.freeze({
|
|
90
|
+
preset: 'balanced',
|
|
91
|
+
defaultMapping: Object.freeze({
|
|
92
|
+
'single-session-assistant': 'canonical-hybrid',
|
|
93
|
+
'single-session-user': 'canonical-hybrid',
|
|
94
|
+
'temporal-reasoning': 'observational-memory-v10',
|
|
95
|
+
'knowledge-update': 'observational-memory-v10',
|
|
96
|
+
'multi-session': 'observational-memory-v11',
|
|
97
|
+
'single-session-preference': 'observational-memory-v11',
|
|
98
|
+
}),
|
|
99
|
+
});
|
|
100
|
+
/**
|
|
101
|
+
* Preset: maximize-accuracy (v2).
|
|
102
|
+
*
|
|
103
|
+
* Highest-accuracy backend per category, ties broken by cost. v2
|
|
104
|
+
* (2026-04-24, post-Phase-B) routes temporal-reasoning back to
|
|
105
|
+
* canonical-hybrid after Phase B revealed:
|
|
106
|
+
* - v1 routing (TR -> Tier 2a) paid OM ingest cost for a within-CI
|
|
107
|
+
* accuracy gain (71.0% Tier 2a vs 70.2% Tier 1) on a hold-out slice;
|
|
108
|
+
* - combined with classifier misroutes the aggregate fell below the
|
|
109
|
+
* 74% acceptance floor at 73.8%.
|
|
110
|
+
* v2 keeps TR on canonical-hybrid where it's cheapest and
|
|
111
|
+
* accuracy-equivalent.
|
|
112
|
+
*
|
|
113
|
+
* Phase B measured: 75.6% [71.8, 79.2] at $0.2434/correct, 65.6s avg
|
|
114
|
+
* latency.
|
|
115
|
+
*/
|
|
116
|
+
export const MAXIMIZE_ACCURACY_TABLE = Object.freeze({
|
|
117
|
+
preset: 'maximize-accuracy',
|
|
118
|
+
defaultMapping: Object.freeze({
|
|
119
|
+
'single-session-assistant': 'canonical-hybrid',
|
|
120
|
+
'single-session-user': 'observational-memory-v11',
|
|
121
|
+
'temporal-reasoning': 'canonical-hybrid',
|
|
122
|
+
'knowledge-update': 'observational-memory-v11',
|
|
123
|
+
'multi-session': 'observational-memory-v11',
|
|
124
|
+
'single-session-preference': 'observational-memory-v11',
|
|
125
|
+
}),
|
|
126
|
+
});
|
|
127
|
+
/**
|
|
128
|
+
* Convenience registry of all three preset tables, keyed by preset name.
|
|
129
|
+
* Useful when surfacing presets through a CLI flag or config field where
|
|
130
|
+
* the preset name is a string and the consumer needs the table object.
|
|
131
|
+
*/
|
|
132
|
+
export const PRESET_TABLES = Object.freeze({
|
|
133
|
+
'minimize-cost': MINIMIZE_COST_TABLE,
|
|
134
|
+
'balanced': BALANCED_TABLE,
|
|
135
|
+
'maximize-accuracy': MAXIMIZE_ACCURACY_TABLE,
|
|
136
|
+
});
|
|
137
|
+
//# sourceMappingURL=routing-tables.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"routing-tables.js","sourceRoot":"","sources":["../../src/memory-router/routing-tables.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,+EAA+E;AAC/E,eAAe;AACf,+EAA+E;AAE/E;;;;;GAKG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,qBAAqB;IACrB,0BAA0B;IAC1B,2BAA2B;IAC3B,kBAAkB;IAClB,eAAe;IACf,oBAAoB;CACZ,CAAC;AA+CX,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAiB,MAAM,CAAC,MAAM,CAAC;IAC7D,MAAM,EAAE,eAAwB;IAChC,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC;QAC5B,0BAA0B,EAAE,kBAAkB;QAC9C,qBAAqB,EAAE,kBAAkB;QACzC,oBAAoB,EAAE,kBAAkB;QACxC,kBAAkB,EAAE,kBAAkB;QACtC,eAAe,EAAE,0BAA0B;QAC3C,2BAA2B,EAAE,0BAA0B;KACxD,CAAC;CACH,CAAiB,CAAC;AAEnB;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,MAAM,cAAc,GAAiB,MAAM,CAAC,MAAM,CAAC;IACxD,MAAM,EAAE,UAAmB;IAC3B,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC;QAC5B,0BAA0B,EAAE,kBAAkB;QAC9C,qBAAqB,EAAE,kBAAkB;QACzC,oBAAoB,EAAE,0BAA0B;QAChD,kBAAkB,EAAE,0BAA0B;QAC9C,eAAe,EAAE,0BAA0B;QAC3C,2BAA2B,EAAE,0BAA0B;KACxD,CAAC;CACH,CAAiB,CAAC;AAEnB;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAiB,MAAM,CAAC,MAAM,CAAC;IACjE,MAAM,EAAE,mBAA4B;IACpC,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC;QAC5B,0BAA0B,EAAE,kBAAkB;QAC9C,qBAAqB,EAAE,0BAA0B;QACjD,oBAAoB,EAAE,kBAAkB;QACxC,kBAAkB,EAAE,0BAA0B;QAC9C,eAAe,EAAE,0BAA0B;QAC3C,2BAA2B,EAAE,0BAA0B;KACxD,CAAC;CACH,CAAiB,CAAC;AAEnB;;;;GAIG;AACH,MAAM,CAAC,MAAM,aAAa,GACxB,MAAM,CAAC,MAAM,CAAC;IACZ,eAAe,EAAE,mBAAmB;IACpC,UAAU,EAAE,cAAc;IAC1B,mBAAmB,EAAE,uBAAuB;CAC7C,CAAC,CAAC"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file select-backend.ts
|
|
3
|
+
* @description Pure function that turns a classifier-predicted category
|
|
4
|
+
* + a {@link MemoryRouterConfig} into a {@link MemoryRoutingDecision}.
|
|
5
|
+
*
|
|
6
|
+
* Stateless. Deterministic. No I/O. Suitable for use inside hot dispatch
|
|
7
|
+
* loops and inside cache-key construction (the function's output is a
|
|
8
|
+
* pure function of its inputs).
|
|
9
|
+
*
|
|
10
|
+
* The decision carries:
|
|
11
|
+
* - the chosen {@link MemoryBackendId},
|
|
12
|
+
* - the predicted category (and optional ground-truth for telemetry),
|
|
13
|
+
* - the estimated USD cost of the routing pick,
|
|
14
|
+
* - the budget ceiling (if any) and whether the pick exceeded it,
|
|
15
|
+
* - a human-readable reason explaining the routing path taken.
|
|
16
|
+
*
|
|
17
|
+
* @module @framers/agentos/memory-router/select-backend
|
|
18
|
+
*/
|
|
19
|
+
import type { MemoryBackendCostPoint } from './backend-costs.js';
|
|
20
|
+
import type { MemoryBackendId, MemoryQueryCategory, MemoryRouterPreset, RoutingTable } from './routing-tables.js';
|
|
21
|
+
/**
|
|
22
|
+
* Budget enforcement modes:
|
|
23
|
+
* - `hard`: throw {@link MemoryRouterBudgetExceededError} if the
|
|
24
|
+
* routing-table pick exceeds the per-query USD budget. Lets callers
|
|
25
|
+
* escalate at the application layer (e.g. fall back to a reduced
|
|
26
|
+
* pipeline or surface a 402-style error).
|
|
27
|
+
* - `soft`: exceed the budget only when the picked backend has a
|
|
28
|
+
* better USD-per-correct ratio than the cheapest backend that fits.
|
|
29
|
+
* Prefers accuracy-economical overflows.
|
|
30
|
+
* - `cheapest-fallback`: silently downgrade to the cheapest backend
|
|
31
|
+
* that fits the budget. Suitable for cost-strict workloads where
|
|
32
|
+
* correctness gracefully degrades.
|
|
33
|
+
*/
|
|
34
|
+
export type MemoryBudgetMode = 'hard' | 'soft' | 'cheapest-fallback';
|
|
35
|
+
/**
|
|
36
|
+
* Configuration object for {@link selectBackend}. Bundles the routing
|
|
37
|
+
* table, cost data, and budget policy into a single value the function
|
|
38
|
+
* can reason about deterministically.
|
|
39
|
+
*/
|
|
40
|
+
export interface MemoryRouterConfig {
|
|
41
|
+
readonly table: RoutingTable;
|
|
42
|
+
readonly budgetPerQuery: number | null;
|
|
43
|
+
readonly budgetMode: MemoryBudgetMode;
|
|
44
|
+
readonly backendCosts: Readonly<Record<MemoryBackendId, MemoryBackendCostPoint>>;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Output of {@link selectBackend}. The chosen backend plus full telemetry
|
|
48
|
+
* about how the routing decision was made.
|
|
49
|
+
*/
|
|
50
|
+
export interface MemoryRoutingDecision {
|
|
51
|
+
readonly predictedCategory: MemoryQueryCategory;
|
|
52
|
+
/**
|
|
53
|
+
* Optional ground-truth category, for telemetry only. When the caller
|
|
54
|
+
* has access to gold labels (e.g. during benchmarking), passing them
|
|
55
|
+
* through here lets downstream analysis distinguish classifier
|
|
56
|
+
* misroutes from architectural misses without needing a second pass.
|
|
57
|
+
*/
|
|
58
|
+
readonly groundTruthCategory: MemoryQueryCategory | null;
|
|
59
|
+
readonly chosenBackend: MemoryBackendId;
|
|
60
|
+
readonly chosenBackendReason: string;
|
|
61
|
+
readonly estimatedCostUsd: number;
|
|
62
|
+
readonly budgetCeiling: number | null;
|
|
63
|
+
readonly budgetExceeded: boolean;
|
|
64
|
+
readonly preset: MemoryRouterPreset;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Thrown when the predicted category is not in the routing table. Should
|
|
68
|
+
* never fire with the three shipping presets (each covers all six
|
|
69
|
+
* categories) but guards custom-table misuse.
|
|
70
|
+
*/
|
|
71
|
+
export declare class MemoryRouterUnknownCategoryError extends Error {
|
|
72
|
+
readonly category: string;
|
|
73
|
+
constructor(category: string);
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Thrown by `hard` budget mode when the routing-table pick exceeds the
|
|
77
|
+
* per-query USD ceiling. Carries the picked backend + cost + budget so
|
|
78
|
+
* application-layer fallbacks can decide what to do (fall back to a
|
|
79
|
+
* different memory architecture, return a typed 402 to the user, etc).
|
|
80
|
+
*/
|
|
81
|
+
export declare class MemoryRouterBudgetExceededError extends Error {
|
|
82
|
+
readonly backend: MemoryBackendId;
|
|
83
|
+
readonly cost: number;
|
|
84
|
+
readonly budget: number;
|
|
85
|
+
constructor(backend: MemoryBackendId, cost: number, budget: number);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Pure routing decision: maps a predicted category to a backend choice
|
|
89
|
+
* given a routing table + budget policy + cost-points data.
|
|
90
|
+
*
|
|
91
|
+
* Algorithm:
|
|
92
|
+
* 1. Look up the table's preferred backend for the predicted category.
|
|
93
|
+
* Throw if missing (custom-table misuse).
|
|
94
|
+
* 2. If no budget is set, return the table's pick.
|
|
95
|
+
* 3. If the pick fits the budget, return it.
|
|
96
|
+
* 4. If the pick exceeds:
|
|
97
|
+
* - `hard`: throw {@link MemoryRouterBudgetExceededError}.
|
|
98
|
+
* - `cheapest-fallback`: pick the cheapest backend that fits;
|
|
99
|
+
* if none fits, pick the absolute cheapest and flag exceeded.
|
|
100
|
+
* - `soft`: keep the pick if its $/correct beats the cheapest fits;
|
|
101
|
+
* otherwise downgrade to the cheapest fits. Globally-no-fit case
|
|
102
|
+
* falls through to absolute-cheapest with budgetExceeded=true.
|
|
103
|
+
*
|
|
104
|
+
* @param args
|
|
105
|
+
* @param args.predictedCategory - Category predicted by the LLM-as-judge classifier.
|
|
106
|
+
* @param args.groundTruthCategory - Gold-label category for telemetry, or null in production.
|
|
107
|
+
* @param args.config - Routing table + budget policy + cost-points map.
|
|
108
|
+
*
|
|
109
|
+
* @returns A {@link MemoryRoutingDecision} describing the chosen backend.
|
|
110
|
+
*
|
|
111
|
+
* @throws {@link MemoryRouterUnknownCategoryError} when the table does not
|
|
112
|
+
* cover `predictedCategory`.
|
|
113
|
+
* @throws {@link MemoryRouterBudgetExceededError} when `budgetMode === 'hard'`
|
|
114
|
+
* and the routing-table pick exceeds the budget.
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```ts
|
|
118
|
+
* const decision = selectBackend({
|
|
119
|
+
* predictedCategory: 'multi-session',
|
|
120
|
+
* groundTruthCategory: null,
|
|
121
|
+
* config: {
|
|
122
|
+
* table: MINIMIZE_COST_TABLE,
|
|
123
|
+
* budgetPerQuery: 0.05,
|
|
124
|
+
* budgetMode: 'cheapest-fallback',
|
|
125
|
+
* backendCosts: DEFAULT_MEMORY_BACKEND_COSTS,
|
|
126
|
+
* },
|
|
127
|
+
* });
|
|
128
|
+
* console.log(decision.chosenBackend); // 'observational-memory-v11' (fits)
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
export declare function selectBackend(args: {
|
|
132
|
+
predictedCategory: MemoryQueryCategory;
|
|
133
|
+
groundTruthCategory: MemoryQueryCategory | null;
|
|
134
|
+
config: MemoryRouterConfig;
|
|
135
|
+
}): MemoryRoutingDecision;
|
|
136
|
+
//# sourceMappingURL=select-backend.d.ts.map
|