@framers/agentos 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ingest-router/IngestRouter.d.ts +72 -0
- package/dist/ingest-router/IngestRouter.d.ts.map +1 -0
- package/dist/ingest-router/IngestRouter.js +98 -0
- package/dist/ingest-router/IngestRouter.js.map +1 -0
- package/dist/ingest-router/classifier.d.ts +63 -0
- package/dist/ingest-router/classifier.d.ts.map +1 -0
- package/dist/ingest-router/classifier.js +111 -0
- package/dist/ingest-router/classifier.js.map +1 -0
- package/dist/ingest-router/costs.d.ts +48 -0
- package/dist/ingest-router/costs.d.ts.map +1 -0
- package/dist/ingest-router/costs.js +63 -0
- package/dist/ingest-router/costs.js.map +1 -0
- package/dist/ingest-router/dispatcher.d.ts +35 -0
- package/dist/ingest-router/dispatcher.d.ts.map +1 -0
- package/dist/ingest-router/dispatcher.js +32 -0
- package/dist/ingest-router/dispatcher.js.map +1 -0
- package/dist/ingest-router/index.d.ts +43 -0
- package/dist/ingest-router/index.d.ts.map +1 -0
- package/dist/ingest-router/index.js +37 -0
- package/dist/ingest-router/index.js.map +1 -0
- package/dist/ingest-router/routing-tables.d.ts +122 -0
- package/dist/ingest-router/routing-tables.d.ts.map +1 -0
- package/dist/ingest-router/routing-tables.js +145 -0
- package/dist/ingest-router/routing-tables.js.map +1 -0
- package/dist/ingest-router/select-strategy.d.ts +67 -0
- package/dist/ingest-router/select-strategy.d.ts.map +1 -0
- package/dist/ingest-router/select-strategy.js +100 -0
- package/dist/ingest-router/select-strategy.js.map +1 -0
- package/dist/memory-router/MemoryRouter.d.ts +195 -0
- package/dist/memory-router/MemoryRouter.d.ts.map +1 -0
- package/dist/memory-router/MemoryRouter.js +155 -0
- package/dist/memory-router/MemoryRouter.js.map +1 -0
- package/dist/memory-router/adaptive.d.ts +142 -0
- package/dist/memory-router/adaptive.d.ts.map +1 -0
- package/dist/memory-router/adaptive.js +202 -0
- package/dist/memory-router/adaptive.js.map +1 -0
- package/dist/memory-router/backend-costs.d.ts +67 -0
- package/dist/memory-router/backend-costs.d.ts.map +1 -0
- package/dist/memory-router/backend-costs.js +136 -0
- package/dist/memory-router/backend-costs.js.map +1 -0
- package/dist/memory-router/classifier.d.ts +169 -0
- package/dist/memory-router/classifier.d.ts.map +1 -0
- package/dist/memory-router/classifier.js +193 -0
- package/dist/memory-router/classifier.js.map +1 -0
- package/dist/memory-router/dispatcher.d.ts +115 -0
- package/dist/memory-router/dispatcher.d.ts.map +1 -0
- package/dist/memory-router/dispatcher.js +84 -0
- package/dist/memory-router/dispatcher.js.map +1 -0
- package/dist/memory-router/index.d.ts +126 -0
- package/dist/memory-router/index.d.ts.map +1 -0
- package/dist/memory-router/index.js +122 -0
- package/dist/memory-router/index.js.map +1 -0
- package/dist/memory-router/routing-tables.d.ts +125 -0
- package/dist/memory-router/routing-tables.d.ts.map +1 -0
- package/dist/memory-router/routing-tables.js +137 -0
- package/dist/memory-router/routing-tables.js.map +1 -0
- package/dist/memory-router/select-backend.d.ts +136 -0
- package/dist/memory-router/select-backend.d.ts.map +1 -0
- package/dist/memory-router/select-backend.js +210 -0
- package/dist/memory-router/select-backend.js.map +1 -0
- package/dist/multi-stage-guardrails/index.d.ts +190 -0
- package/dist/multi-stage-guardrails/index.d.ts.map +1 -0
- package/dist/multi-stage-guardrails/index.js +186 -0
- package/dist/multi-stage-guardrails/index.js.map +1 -0
- package/dist/read-router/ReadRouter.d.ts +58 -0
- package/dist/read-router/ReadRouter.d.ts.map +1 -0
- package/dist/read-router/ReadRouter.js +91 -0
- package/dist/read-router/ReadRouter.js.map +1 -0
- package/dist/read-router/classifier.d.ts +54 -0
- package/dist/read-router/classifier.d.ts.map +1 -0
- package/dist/read-router/classifier.js +104 -0
- package/dist/read-router/classifier.js.map +1 -0
- package/dist/read-router/costs.d.ts +23 -0
- package/dist/read-router/costs.d.ts.map +1 -0
- package/dist/read-router/costs.js +51 -0
- package/dist/read-router/costs.js.map +1 -0
- package/dist/read-router/dispatcher.d.ts +33 -0
- package/dist/read-router/dispatcher.d.ts.map +1 -0
- package/dist/read-router/dispatcher.js +29 -0
- package/dist/read-router/dispatcher.js.map +1 -0
- package/dist/read-router/index.d.ts +23 -0
- package/dist/read-router/index.d.ts.map +1 -0
- package/dist/read-router/index.js +17 -0
- package/dist/read-router/index.js.map +1 -0
- package/dist/read-router/routing-tables.d.ts +85 -0
- package/dist/read-router/routing-tables.d.ts.map +1 -0
- package/dist/read-router/routing-tables.js +79 -0
- package/dist/read-router/routing-tables.js.map +1 -0
- package/dist/read-router/select-strategy.d.ts +42 -0
- package/dist/read-router/select-strategy.d.ts.map +1 -0
- package/dist/read-router/select-strategy.js +92 -0
- package/dist/read-router/select-strategy.js.map +1 -0
- package/package.json +21 -1
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file adaptive.ts
|
|
3
|
+
* @description Self-calibrating routing-table generator.
|
|
4
|
+
*
|
|
5
|
+
* The shipping {@link MINIMIZE_COST_TABLE} / {@link BALANCED_TABLE} /
|
|
6
|
+
* {@link MAXIMIZE_ACCURACY_TABLE} are calibrated from LongMemEval-S
|
|
7
|
+
* Phase B N=500 measurements. For workloads whose cost-accuracy profile
|
|
8
|
+
* diverges from that distribution, those tables are not optimal.
|
|
9
|
+
*
|
|
10
|
+
* AdaptiveMemoryRouter takes a workload-specific calibration dataset
|
|
11
|
+
* (a list of {category, backend, costUsd, correct} samples) and derives
|
|
12
|
+
* a routing table from it. Same MemoryRouter API; different table
|
|
13
|
+
* source.
|
|
14
|
+
*
|
|
15
|
+
* Calibration workflow:
|
|
16
|
+
* 1. Run a Phase A sweep on your workload (a few hundred queries
|
|
17
|
+
* across a small subset of expected categories, dispatched to all
|
|
18
|
+
* candidate backends).
|
|
19
|
+
* 2. Each sample contributes one (category, backend, costUsd, correct)
|
|
20
|
+
* data point.
|
|
21
|
+
* 3. AdaptiveMemoryRouter aggregates these into per-(category, backend)
|
|
22
|
+
* mean cost + mean accuracy.
|
|
23
|
+
* 4. Apply a preset selection rule:
|
|
24
|
+
* - 'minimize-cost': cheapest backend within 2pp of best accuracy;
|
|
25
|
+
* if none within tolerance, pick best accuracy.
|
|
26
|
+
* - 'maximize-accuracy': highest accuracy; ties broken by cost.
|
|
27
|
+
* - 'balanced': best $/correct (mean cost divided by mean
|
|
28
|
+
* accuracy).
|
|
29
|
+
* 5. Categories with insufficient samples fall back to the static
|
|
30
|
+
* preset table.
|
|
31
|
+
*
|
|
32
|
+
* The router is otherwise identical to {@link MemoryRouter} — same
|
|
33
|
+
* decide() / decideAndDispatch() / budget-aware dispatch.
|
|
34
|
+
*
|
|
35
|
+
* @module @framers/agentos/memory-router/adaptive
|
|
36
|
+
*/
|
|
37
|
+
import { MemoryRouter } from './MemoryRouter.js';
|
|
38
|
+
import { PRESET_TABLES, } from './routing-tables.js';
|
|
39
|
+
// ============================================================================
|
|
40
|
+
// Aggregation
|
|
41
|
+
// ============================================================================
|
|
42
|
+
/**
|
|
43
|
+
* Roll up raw calibration samples into per-(category, backend) cells.
|
|
44
|
+
* Each cell carries n, meanCost, meanAccuracy.
|
|
45
|
+
*/
|
|
46
|
+
export function aggregateCalibration(samples) {
|
|
47
|
+
const acc = {};
|
|
48
|
+
for (const s of samples) {
|
|
49
|
+
if (!acc[s.category])
|
|
50
|
+
acc[s.category] = {};
|
|
51
|
+
if (!acc[s.category][s.backend]) {
|
|
52
|
+
acc[s.category][s.backend] = { n: 0, sumCost: 0, sumCorrect: 0 };
|
|
53
|
+
}
|
|
54
|
+
const cell = acc[s.category][s.backend];
|
|
55
|
+
cell.n += 1;
|
|
56
|
+
cell.sumCost += s.costUsd;
|
|
57
|
+
cell.sumCorrect += s.correct;
|
|
58
|
+
}
|
|
59
|
+
const out = {};
|
|
60
|
+
for (const cat of Object.keys(acc)) {
|
|
61
|
+
out[cat] = {};
|
|
62
|
+
const inner = out[cat];
|
|
63
|
+
for (const backend of Object.keys(acc[cat])) {
|
|
64
|
+
const cell = acc[cat][backend];
|
|
65
|
+
inner[backend] = {
|
|
66
|
+
n: cell.n,
|
|
67
|
+
meanCost: cell.sumCost / cell.n,
|
|
68
|
+
meanAccuracy: cell.sumCorrect / cell.n,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return out;
|
|
73
|
+
}
|
|
74
|
+
// ============================================================================
|
|
75
|
+
// Per-category selection
|
|
76
|
+
// ============================================================================
|
|
77
|
+
/**
|
|
78
|
+
* Select a backend for one category from aggregated calibration data
|
|
79
|
+
* using the named preset rule. Falls back to the preset's static table
|
|
80
|
+
* when calibration is insufficient.
|
|
81
|
+
*/
|
|
82
|
+
export function selectByPreset(args) {
|
|
83
|
+
const { category, agg, preset, minSamplesPerCell = 1, accuracyTolerance = 0.02, } = args;
|
|
84
|
+
const fallbackTable = PRESET_TABLES[preset];
|
|
85
|
+
const fallback = fallbackTable.defaultMapping[category];
|
|
86
|
+
const cells = agg[category];
|
|
87
|
+
if (!cells)
|
|
88
|
+
return fallback;
|
|
89
|
+
// Filter cells meeting min-sample threshold.
|
|
90
|
+
const eligible = Object.entries(cells)
|
|
91
|
+
.filter(([, cell]) => cell.n >= minSamplesPerCell);
|
|
92
|
+
if (eligible.length === 0)
|
|
93
|
+
return fallback;
|
|
94
|
+
if (preset === 'maximize-accuracy') {
|
|
95
|
+
return eligible.reduce((best, [backend, cell]) => {
|
|
96
|
+
const [bestBackend, bestCell] = best;
|
|
97
|
+
if (cell.meanAccuracy > bestCell.meanAccuracy)
|
|
98
|
+
return [backend, cell];
|
|
99
|
+
if (cell.meanAccuracy === bestCell.meanAccuracy) {
|
|
100
|
+
return cell.meanCost < bestCell.meanCost ? [backend, cell] : best;
|
|
101
|
+
}
|
|
102
|
+
return best;
|
|
103
|
+
}, eligible[0])[0];
|
|
104
|
+
}
|
|
105
|
+
if (preset === 'balanced') {
|
|
106
|
+
// best $/correct (skip zero-accuracy cells to avoid div-by-zero)
|
|
107
|
+
const valid = eligible.filter(([, cell]) => cell.meanAccuracy > 0);
|
|
108
|
+
if (valid.length === 0)
|
|
109
|
+
return fallback;
|
|
110
|
+
return valid.reduce((best, [backend, cell]) => {
|
|
111
|
+
const [bestBackend, bestCell] = best;
|
|
112
|
+
const cellCpc = cell.meanCost / cell.meanAccuracy;
|
|
113
|
+
const bestCpc = bestCell.meanCost / bestCell.meanAccuracy;
|
|
114
|
+
return cellCpc < bestCpc ? [backend, cell] : best;
|
|
115
|
+
}, valid[0])[0];
|
|
116
|
+
}
|
|
117
|
+
// minimize-cost: cheapest within accuracyTolerance of best accuracy.
|
|
118
|
+
const bestAccuracy = Math.max(...eligible.map(([, cell]) => cell.meanAccuracy));
|
|
119
|
+
const withinTolerance = eligible.filter(([, cell]) => bestAccuracy - cell.meanAccuracy <= accuracyTolerance);
|
|
120
|
+
if (withinTolerance.length === 0) {
|
|
121
|
+
// No candidates within tolerance is impossible (the best-accuracy
|
|
122
|
+
// backend itself qualifies), but guard anyway.
|
|
123
|
+
return fallback;
|
|
124
|
+
}
|
|
125
|
+
return withinTolerance.reduce((best, [backend, cell]) => {
|
|
126
|
+
const [bestBackend, bestCell] = best;
|
|
127
|
+
return cell.meanCost < bestCell.meanCost ? [backend, cell] : best;
|
|
128
|
+
}, withinTolerance[0])[0];
|
|
129
|
+
}
|
|
130
|
+
// ============================================================================
|
|
131
|
+
// Table construction
|
|
132
|
+
// ============================================================================
|
|
133
|
+
const ALL_CATEGORIES = [
|
|
134
|
+
'single-session-user',
|
|
135
|
+
'single-session-assistant',
|
|
136
|
+
'single-session-preference',
|
|
137
|
+
'knowledge-update',
|
|
138
|
+
'multi-session',
|
|
139
|
+
'temporal-reasoning',
|
|
140
|
+
];
|
|
141
|
+
/**
|
|
142
|
+
* Build a complete frozen routing table from calibration samples + a
|
|
143
|
+
* preset rule. Categories without enough calibration fall back to the
|
|
144
|
+
* preset's static table.
|
|
145
|
+
*/
|
|
146
|
+
export function buildAdaptiveRoutingTable(args) {
|
|
147
|
+
const { samples, preset, minSamplesPerCell, accuracyTolerance, fallbackTable, } = args;
|
|
148
|
+
const agg = aggregateCalibration(samples);
|
|
149
|
+
const fb = fallbackTable ?? PRESET_TABLES[preset];
|
|
150
|
+
const mapping = {};
|
|
151
|
+
for (const cat of ALL_CATEGORIES) {
|
|
152
|
+
mapping[cat] = selectByPreset({
|
|
153
|
+
category: cat,
|
|
154
|
+
agg,
|
|
155
|
+
preset,
|
|
156
|
+
minSamplesPerCell,
|
|
157
|
+
accuracyTolerance,
|
|
158
|
+
});
|
|
159
|
+
// selectByPreset falls back to the preset's STATIC default mapping,
|
|
160
|
+
// not the caller-supplied fallback table. Apply the explicit
|
|
161
|
+
// fallback only when the static fallback wasn't applied because of
|
|
162
|
+
// missing data — easiest is to override after the fact.
|
|
163
|
+
if (!agg[cat] && fb !== PRESET_TABLES[preset]) {
|
|
164
|
+
mapping[cat] = fb.defaultMapping[cat];
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return Object.freeze({
|
|
168
|
+
preset: preset,
|
|
169
|
+
defaultMapping: Object.freeze(mapping),
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
// ============================================================================
|
|
173
|
+
// AdaptiveMemoryRouter class
|
|
174
|
+
// ============================================================================
|
|
175
|
+
/**
|
|
176
|
+
* Memory router whose routing table is derived from a calibration
|
|
177
|
+
* dataset rather than a static preset. Otherwise identical API to
|
|
178
|
+
* {@link MemoryRouter}.
|
|
179
|
+
*/
|
|
180
|
+
export class AdaptiveMemoryRouter extends MemoryRouter {
|
|
181
|
+
constructor(options) {
|
|
182
|
+
const derivedTable = buildAdaptiveRoutingTable({
|
|
183
|
+
samples: options.calibrationSamples,
|
|
184
|
+
preset: options.preset,
|
|
185
|
+
minSamplesPerCell: options.minSamplesPerCell,
|
|
186
|
+
accuracyTolerance: options.accuracyTolerance,
|
|
187
|
+
});
|
|
188
|
+
super({
|
|
189
|
+
...options,
|
|
190
|
+
preset: options.preset,
|
|
191
|
+
routingTable: derivedTable,
|
|
192
|
+
});
|
|
193
|
+
this.derivedTable = derivedTable;
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Inspect the derived routing table for debugging / telemetry.
|
|
197
|
+
*/
|
|
198
|
+
getRoutingTable() {
|
|
199
|
+
return this.derivedTable;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
//# sourceMappingURL=adaptive.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adaptive.js","sourceRoot":"","sources":["../../src/memory-router/adaptive.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AAEH,OAAO,EAAE,YAAY,EAA4B,MAAM,mBAAmB,CAAC;AAC3E,OAAO,EACL,aAAa,GAKd,MAAM,qBAAqB,CAAC;AA4F7B,+EAA+E;AAC/E,cAAc;AACd,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAClC,OAAqC;IAErC,MAAM,GAAG,GAAuF,EAAE,CAAC;IAEnG,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;YAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC;QAC3C,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;QACpE,CAAC;QACD,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAE,CAAC,CAAC,CAAC,OAAO,CAAE,CAAC;QAC1C,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QACZ,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC;QAC1B,IAAI,CAAC,UAAU,IAAI,CAAC,CAAC,OAAO,CAAC;IAC/B,CAAC;IAED,MAAM,GAAG,GAA0B,EAAE,CAAC;IACtC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAA0B,EAAE,CAAC;QAC5D,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC;QACd,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CAAE,CAAC;QACxB,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAE,CAAsB,EAAE,CAAC;YAClE,MAAM,IAAI,GAAG,GAAG,CAAC,GAAG,CAAE,CAAC,OAAO,CAAE,CAAC;YACjC,KAAK,CAAC,OAAO,CAAC,GAAG;gBACf,CAAC,EAAE,IAAI,CAAC,CAAC;gBACT,QAAQ,EAAE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;gBAC/B,YAAY,EAAE,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC;aACvC,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,+EAA+E;AAC/E,yBAAyB;AACzB,+EAA+E;AAE/E;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,IAAwB;IACrD,MAAM,EACJ,QAAQ,EACR,GAAG,EACH,MAAM,EACN,iBAAiB,GAAG,CAAC,EACrB,iBAAiB,GAAG,IAAI,GACzB,GAAG,IAAI,CAAC;IAET,MAAM,aAAa,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,QAAQ,GAAG,aAAa,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAExD,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC5B,IAAI,CAAC,KAAK;QAAE,OAAO,QAAQ,CAAC;IAE5B,6CAA6C;IAC7C,MAAM,QAAQ,GAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAA0C;SAC7E,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,iBAAiB,CAAC,CAAC;IAErD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE3C,IAAI,MAAM,KAAK,mBAAmB,EAAE,CAAC;QACnC,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;YAC/C,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,GAAG,IAAI,CAAC;YACrC,IAAI,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,YAAY;gBAAE,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YACtE,IAAI,IAAI,CAAC,YAAY,KAAK,QAAQ,CAAC,YAAY,EAAE,CAAC;gBAChD,OAAO,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACpE,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,IAAI,MAAM,KAAK,UAAU,EAAE,CAAC;QAC1B,iEAAiE;QACjE,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC;QACnE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,QAAQ,CAAC;QACxC,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;YAC5C,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,GAAG,IAAI,CAAC;YACrC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC;YAClD,MAAM,OAAO,GAAG,QAAQ,CAAC,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC;YAC1D,OAAO,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACpD,CAAC,EAAE,KAAK,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;IAED,qEAAqE;IACrE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAC3B,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CACjD,CAAC;IACF,MAAM,eAAe,GAAG,QAAQ,CAAC,MAAM,CACrC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,iBAAiB,CACpE,CAAC;IAEF,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,kEAAkE;QAClE,+CAA+C;QAC/C,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,OAAO,eAAe,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;QACtD,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,GAAG,IAAI,CAAC;QACrC,OAAO,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACpE,CAAC,EAAE,eAAe,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7B,CAAC;AAED,+EAA+E;AAC/E,qBAAqB;AACrB,+EAA+E;AAE/E,MAAM,cAAc,GAAmC;IACrD,qBAAqB;IACrB,0BAA0B;IAC1B,2BAA2B;IAC3B,kBAAkB;IAClB,eAAe;IACf,oBAAoB;CACrB,CAAC;AAEF;;;;GAIG;AACH,MAAM,UAAU,yBAAyB,CACvC,IAAmC;IAEnC,MAAM,EACJ,OAAO,EACP,MAAM,EACN,iBAAiB,EACjB,iBAAiB,EACjB,aAAa,GACd,GAAG,IAAI,CAAC;IAET,MAAM,GAAG,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,aAAa,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;IAElD,MAAM,OAAO,GAAiD,EAG7D,CAAC;IACF,KAAK,MAAM,GAAG,IAAI,cAAc,EAAE,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,GAAG,cAAc,CAAC;YAC5B,QAAQ,EAAE,GAAG;YACb,GAAG;YACH,MAAM;YACN,iBAAiB;YACjB,iBAAiB;SAClB,CAAC,CAAC;QACH,oEAAoE;QACpE,6DAA6D;QAC7D,mEAAmE;QACnE,wDAAwD;QACxD,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC;YAC9C,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,CAAC;QACnB,MAAM,EAAE,MAA4B;QACpC,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC;KACvC,CAAiB,CAAC;AACrB,CAAC;AAED,+EAA+E;AAC/E,6BAA6B;AAC7B,+EAA+E;AAE/E;;;;GAIG;AACH,MAAM,OAAO,oBAAqB,SAAQ,YAAY;IAGpD,YAAY,OAAoC;QAC9C,MAAM,YAAY,GAAG,yBAAyB,CAAC;YAC7C,OAAO,EAAE,OAAO,CAAC,kBAAkB;YACnC,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,iBAAiB,EAAE,OAAO,CAAC,iBAAiB;YAC5C,iBAAiB,EAAE,OAAO,CAAC,iBAAiB;SAC7C,CAAC,CAAC;QAEH,KAAK,CAAC;YACJ,GAAG,OAAO;YACV,MAAM,EAAE,OAAO,CAAC,MAA4B;YAC5C,YAAY,EAAE,YAAY;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;CACF"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file backend-costs.ts
|
|
3
|
+
* @description Per-backend per-category cost-accuracy-latency points
|
|
4
|
+
* measured on LongMemEval-S Phase B (N=500). The {@link MemoryRouter} uses
|
|
5
|
+
* these to:
|
|
6
|
+
* - estimate the per-query USD cost of a routing decision before executing,
|
|
7
|
+
* - apply budget constraints (`hard` / `soft` / `cheapest-fallback`),
|
|
8
|
+
* - pick the cheapest backend that fits a budget when downgrading.
|
|
9
|
+
*
|
|
10
|
+
* Numbers come from the canonical Phase B run JSONs:
|
|
11
|
+
* - canonical-hybrid: results/runs/2026-04-20T20-03-14-675 (Tier 1)
|
|
12
|
+
* - observational-memory-v10: results/runs/2026-04-23T04-14-40-609 (Tier 2a v10)
|
|
13
|
+
* - observational-memory-v11: results/runs/2026-04-23T17-27-28-793 (Tier 2b v11)
|
|
14
|
+
*
|
|
15
|
+
* The per-tier `avgCostPerQuery` is the totalUsd-divided-by-n_cases
|
|
16
|
+
* average; on routed configurations the actual per-call cost depends on
|
|
17
|
+
* which backend the dispatcher picked and which category the call hit, so
|
|
18
|
+
* the per-category breakdown below is what the router actually consumes.
|
|
19
|
+
*
|
|
20
|
+
* @module @framers/agentos/memory-router/backend-costs
|
|
21
|
+
*/
|
|
22
|
+
import type { MemoryBackendId, MemoryQueryCategory } from './routing-tables.js';
|
|
23
|
+
/**
|
|
24
|
+
* Cost-accuracy-latency point for one backend across the six categories.
|
|
25
|
+
* The router compares these to make budget-aware decisions.
|
|
26
|
+
*/
|
|
27
|
+
export interface MemoryBackendCostPoint {
|
|
28
|
+
readonly backend: MemoryBackendId;
|
|
29
|
+
/** Average USD per query across all categories (Phase B aggregate). */
|
|
30
|
+
readonly avgCostPerQuery: number;
|
|
31
|
+
/** Per-category accuracy at this backend (Phase B N=500). */
|
|
32
|
+
readonly perCategoryAccuracy: Readonly<Record<MemoryQueryCategory, number>>;
|
|
33
|
+
/** Per-category USD per query at this backend (Phase B N=500). */
|
|
34
|
+
readonly perCategoryCostPerQuery: Readonly<Record<MemoryQueryCategory, number>>;
|
|
35
|
+
/** Per-category average latency in ms at this backend (Phase B N=500). */
|
|
36
|
+
readonly perCategoryLatencyMs: Readonly<Record<MemoryQueryCategory, number>>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* canonical-hybrid: BM25 + dense + RRF + Cohere rerank-v3.5 over raw
|
|
40
|
+
* memory traces. Phase B measured 73.2% [69.2, 77.0] aggregate at
|
|
41
|
+
* $0.0213/correct.
|
|
42
|
+
*/
|
|
43
|
+
export declare const TIER_1_CANONICAL_COSTS: MemoryBackendCostPoint;
|
|
44
|
+
/**
|
|
45
|
+
* observational-memory-v10: synthesized observation log + classifier-driven
|
|
46
|
+
* dispatch inside the OM pipeline (no verbatim citation). Phase B measured
|
|
47
|
+
* 74.6% [70.8, 78.4] aggregate at $0.3265/correct, 12s avg latency.
|
|
48
|
+
*/
|
|
49
|
+
export declare const TIER_2A_V10_COSTS: MemoryBackendCostPoint;
|
|
50
|
+
/**
|
|
51
|
+
* observational-memory-v11: v10 + conditional verbatim citation rule for
|
|
52
|
+
* knowledge-update and single-session-user categories. Phase B measured
|
|
53
|
+
* 75.4% [71.6, 79.0] aggregate at $0.4362/correct, 14s avg latency.
|
|
54
|
+
*/
|
|
55
|
+
export declare const TIER_2B_V11_COSTS: MemoryBackendCostPoint;
|
|
56
|
+
/**
|
|
57
|
+
* Default cost-points registry. Indexed by {@link MemoryBackendId} so the
|
|
58
|
+
* router can look up the picked backend's cost on any category.
|
|
59
|
+
*
|
|
60
|
+
* Custom deployments can substitute their own cost-points by passing a
|
|
61
|
+
* different `backendCosts` map into the {@link MemoryRouter} config —
|
|
62
|
+
* useful when a workload diverges from the LongMemEval-S Phase B
|
|
63
|
+
* distribution and the calibrator wants to plug in measurements from
|
|
64
|
+
* their own benchmark.
|
|
65
|
+
*/
|
|
66
|
+
export declare const DEFAULT_MEMORY_BACKEND_COSTS: Readonly<Record<MemoryBackendId, MemoryBackendCostPoint>>;
|
|
67
|
+
//# sourceMappingURL=backend-costs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"backend-costs.d.ts","sourceRoot":"","sources":["../../src/memory-router/backend-costs.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAEhF;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,OAAO,EAAE,eAAe,CAAC;IAClC,uEAAuE;IACvE,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,6DAA6D;IAC7D,QAAQ,CAAC,mBAAmB,EAAE,QAAQ,CAAC,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC,CAAC;IAC5E,kEAAkE;IAClE,QAAQ,CAAC,uBAAuB,EAAE,QAAQ,CAAC,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC,CAAC;IAChF,0EAA0E;IAC1E,QAAQ,CAAC,oBAAoB,EAAE,QAAQ,CAAC,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC,CAAC;CAC9E;AAED;;;;GAIG;AACH,eAAO,MAAM,sBAAsB,EAAE,sBA2BT,CAAC;AAE7B;;;;GAIG;AACH,eAAO,MAAM,iBAAiB,EAAE,sBA2BJ,CAAC;AAE7B;;;;GAIG;AACH,eAAO,MAAM,iBAAiB,EAAE,sBA2BJ,CAAC;AAE7B;;;;;;;;;GASG;AACH,eAAO,MAAM,4BAA4B,EAAE,QAAQ,CACjD,MAAM,CAAC,eAAe,EAAE,sBAAsB,CAAC,CAK/C,CAAC"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file backend-costs.ts
|
|
3
|
+
* @description Per-backend per-category cost-accuracy-latency points
|
|
4
|
+
* measured on LongMemEval-S Phase B (N=500). The {@link MemoryRouter} uses
|
|
5
|
+
* these to:
|
|
6
|
+
* - estimate the per-query USD cost of a routing decision before executing,
|
|
7
|
+
* - apply budget constraints (`hard` / `soft` / `cheapest-fallback`),
|
|
8
|
+
* - pick the cheapest backend that fits a budget when downgrading.
|
|
9
|
+
*
|
|
10
|
+
* Numbers come from the canonical Phase B run JSONs:
|
|
11
|
+
* - canonical-hybrid: results/runs/2026-04-20T20-03-14-675 (Tier 1)
|
|
12
|
+
* - observational-memory-v10: results/runs/2026-04-23T04-14-40-609 (Tier 2a v10)
|
|
13
|
+
* - observational-memory-v11: results/runs/2026-04-23T17-27-28-793 (Tier 2b v11)
|
|
14
|
+
*
|
|
15
|
+
* The per-tier `avgCostPerQuery` is the totalUsd-divided-by-n_cases
|
|
16
|
+
* average; on routed configurations the actual per-call cost depends on
|
|
17
|
+
* which backend the dispatcher picked and which category the call hit, so
|
|
18
|
+
* the per-category breakdown below is what the router actually consumes.
|
|
19
|
+
*
|
|
20
|
+
* @module @framers/agentos/memory-router/backend-costs
|
|
21
|
+
*/
|
|
22
|
+
/**
|
|
23
|
+
* canonical-hybrid: BM25 + dense + RRF + Cohere rerank-v3.5 over raw
|
|
24
|
+
* memory traces. Phase B measured 73.2% [69.2, 77.0] aggregate at
|
|
25
|
+
* $0.0213/correct.
|
|
26
|
+
*/
|
|
27
|
+
export const TIER_1_CANONICAL_COSTS = Object.freeze({
|
|
28
|
+
backend: 'canonical-hybrid',
|
|
29
|
+
avgCostPerQuery: 0.0156,
|
|
30
|
+
perCategoryAccuracy: Object.freeze({
|
|
31
|
+
'single-session-user': 0.971,
|
|
32
|
+
'single-session-assistant': 0.893,
|
|
33
|
+
'single-session-preference': 0.600,
|
|
34
|
+
'knowledge-update': 0.868,
|
|
35
|
+
'multi-session': 0.549,
|
|
36
|
+
'temporal-reasoning': 0.702,
|
|
37
|
+
}),
|
|
38
|
+
perCategoryCostPerQuery: Object.freeze({
|
|
39
|
+
'single-session-user': 0.0191,
|
|
40
|
+
'single-session-assistant': 0.0175,
|
|
41
|
+
'single-session-preference': 0.0206,
|
|
42
|
+
'knowledge-update': 0.0189,
|
|
43
|
+
'multi-session': 0.0196,
|
|
44
|
+
'temporal-reasoning': 0.0202,
|
|
45
|
+
}),
|
|
46
|
+
perCategoryLatencyMs: Object.freeze({
|
|
47
|
+
'single-session-user': 104837,
|
|
48
|
+
'single-session-assistant': 55252,
|
|
49
|
+
'single-session-preference': 58373,
|
|
50
|
+
'knowledge-update': 82807,
|
|
51
|
+
'multi-session': 131188,
|
|
52
|
+
'temporal-reasoning': 100881,
|
|
53
|
+
}),
|
|
54
|
+
});
|
|
55
|
+
/**
|
|
56
|
+
* observational-memory-v10: synthesized observation log + classifier-driven
|
|
57
|
+
* dispatch inside the OM pipeline (no verbatim citation). Phase B measured
|
|
58
|
+
* 74.6% [70.8, 78.4] aggregate at $0.3265/correct, 12s avg latency.
|
|
59
|
+
*/
|
|
60
|
+
export const TIER_2A_V10_COSTS = Object.freeze({
|
|
61
|
+
backend: 'observational-memory-v10',
|
|
62
|
+
avgCostPerQuery: 0.2436,
|
|
63
|
+
perCategoryAccuracy: Object.freeze({
|
|
64
|
+
'single-session-user': 0.971,
|
|
65
|
+
'single-session-assistant': 0.839,
|
|
66
|
+
'single-session-preference': 0.600,
|
|
67
|
+
'knowledge-update': 0.859,
|
|
68
|
+
'multi-session': 0.602,
|
|
69
|
+
'temporal-reasoning': 0.710,
|
|
70
|
+
}),
|
|
71
|
+
perCategoryCostPerQuery: Object.freeze({
|
|
72
|
+
'single-session-user': 0.0214,
|
|
73
|
+
'single-session-assistant': 0.0195,
|
|
74
|
+
'single-session-preference': 0.0206,
|
|
75
|
+
'knowledge-update': 0.0306,
|
|
76
|
+
'multi-session': 0.0308,
|
|
77
|
+
'temporal-reasoning': 0.0206,
|
|
78
|
+
}),
|
|
79
|
+
perCategoryLatencyMs: Object.freeze({
|
|
80
|
+
'single-session-user': 7649,
|
|
81
|
+
'single-session-assistant': 5668,
|
|
82
|
+
'single-session-preference': 4469,
|
|
83
|
+
'knowledge-update': 19569,
|
|
84
|
+
'multi-session': 21360,
|
|
85
|
+
'temporal-reasoning': 4236,
|
|
86
|
+
}),
|
|
87
|
+
});
|
|
88
|
+
/**
|
|
89
|
+
* observational-memory-v11: v10 + conditional verbatim citation rule for
|
|
90
|
+
* knowledge-update and single-session-user categories. Phase B measured
|
|
91
|
+
* 75.4% [71.6, 79.0] aggregate at $0.4362/correct, 14s avg latency.
|
|
92
|
+
*/
|
|
93
|
+
export const TIER_2B_V11_COSTS = Object.freeze({
|
|
94
|
+
backend: 'observational-memory-v11',
|
|
95
|
+
avgCostPerQuery: 0.3289,
|
|
96
|
+
perCategoryAccuracy: Object.freeze({
|
|
97
|
+
'single-session-user': 0.986,
|
|
98
|
+
'single-session-assistant': 0.839,
|
|
99
|
+
'single-session-preference': 0.633,
|
|
100
|
+
'knowledge-update': 0.872,
|
|
101
|
+
'multi-session': 0.617,
|
|
102
|
+
'temporal-reasoning': 0.692,
|
|
103
|
+
}),
|
|
104
|
+
perCategoryCostPerQuery: Object.freeze({
|
|
105
|
+
'single-session-user': 0.0212,
|
|
106
|
+
'single-session-assistant': 0.0192,
|
|
107
|
+
'single-session-preference': 0.0206,
|
|
108
|
+
'knowledge-update': 0.0307,
|
|
109
|
+
'multi-session': 0.0336,
|
|
110
|
+
'temporal-reasoning': 0.0209,
|
|
111
|
+
}),
|
|
112
|
+
perCategoryLatencyMs: Object.freeze({
|
|
113
|
+
'single-session-user': 6676,
|
|
114
|
+
'single-session-assistant': 6879,
|
|
115
|
+
'single-session-preference': 8822,
|
|
116
|
+
'knowledge-update': 21085,
|
|
117
|
+
'multi-session': 27423,
|
|
118
|
+
'temporal-reasoning': 5025,
|
|
119
|
+
}),
|
|
120
|
+
});
|
|
121
|
+
/**
|
|
122
|
+
* Default cost-points registry. Indexed by {@link MemoryBackendId} so the
|
|
123
|
+
* router can look up the picked backend's cost on any category.
|
|
124
|
+
*
|
|
125
|
+
* Custom deployments can substitute their own cost-points by passing a
|
|
126
|
+
* different `backendCosts` map into the {@link MemoryRouter} config —
|
|
127
|
+
* useful when a workload diverges from the LongMemEval-S Phase B
|
|
128
|
+
* distribution and the calibrator wants to plug in measurements from
|
|
129
|
+
* their own benchmark.
|
|
130
|
+
*/
|
|
131
|
+
export const DEFAULT_MEMORY_BACKEND_COSTS = Object.freeze({
|
|
132
|
+
'canonical-hybrid': TIER_1_CANONICAL_COSTS,
|
|
133
|
+
'observational-memory-v10': TIER_2A_V10_COSTS,
|
|
134
|
+
'observational-memory-v11': TIER_2B_V11_COSTS,
|
|
135
|
+
});
|
|
136
|
+
//# sourceMappingURL=backend-costs.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"backend-costs.js","sourceRoot":"","sources":["../../src/memory-router/backend-costs.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAoBH;;;;GAIG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAA2B,MAAM,CAAC,MAAM,CAAC;IAC1E,OAAO,EAAE,kBAA2B;IACpC,eAAe,EAAE,MAAM;IACvB,mBAAmB,EAAE,MAAM,CAAC,MAAM,CAAC;QACjC,qBAAqB,EAAE,KAAK;QAC5B,0BAA0B,EAAE,KAAK;QACjC,2BAA2B,EAAE,KAAK;QAClC,kBAAkB,EAAE,KAAK;QACzB,eAAe,EAAE,KAAK;QACtB,oBAAoB,EAAE,KAAK;KAC5B,CAAC;IACF,uBAAuB,EAAE,MAAM,CAAC,MAAM,CAAC;QACrC,qBAAqB,EAAE,MAAM;QAC7B,0BAA0B,EAAE,MAAM;QAClC,2BAA2B,EAAE,MAAM;QACnC,kBAAkB,EAAE,MAAM;QAC1B,eAAe,EAAE,MAAM;QACvB,oBAAoB,EAAE,MAAM;KAC7B,CAAC;IACF,oBAAoB,EAAE,MAAM,CAAC,MAAM,CAAC;QAClC,qBAAqB,EAAE,MAAM;QAC7B,0BAA0B,EAAE,KAAK;QACjC,2BAA2B,EAAE,KAAK;QAClC,kBAAkB,EAAE,KAAK;QACzB,eAAe,EAAE,MAAM;QACvB,oBAAoB,EAAE,MAAM;KAC7B,CAAC;CACH,CAA2B,CAAC;AAE7B;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAA2B,MAAM,CAAC,MAAM,CAAC;IACrE,OAAO,EAAE,0BAAmC;IAC5C,eAAe,EAAE,MAAM;IACvB,mBAAmB,EAAE,MAAM,CAAC,MAAM,CAAC;QACjC,qBAAqB,EAAE,KAAK;QAC5B,0BAA0B,EAAE,KAAK;QACjC,2BAA2B,EAAE,KAAK;QAClC,kBAAkB,EAAE,KAAK;QACzB,eAAe,EAAE,KAAK;QACtB,oBAAoB,EAAE,KAAK;KAC5B,CAAC;IACF,uBAAuB,EAAE,MAAM,CAAC,MAAM,CAAC;QACrC,qBAAqB,EAAE,MAAM;QAC7B,0BAA0B,EAAE,MAAM;QAClC,2BAA2B,EAAE,MAAM;QACnC,kBAAkB,EAAE,MAAM;QAC1B,eAAe,EAAE,MAAM;QACvB,oBAAoB,EAAE,MAAM;KAC7B,CAAC;IACF,oBAAoB,EAAE,MAAM,CAAC,MAAM,CAAC;QAClC,qBAAqB,EAAE,IAAI;QAC3B,0BAA0B,EAAE,IAAI;QAChC,2BAA2B,EAAE,IAAI;QACjC,kBAAkB,EAAE,KAAK;QACzB,eAAe,EAAE,KAAK;QACtB,oBAAoB,EAAE,IAAI;KAC3B,CAAC;CACH,CAA2B,CAAC;AAE7B;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAA2B,MAAM,CAAC,MAAM,CAAC;IACrE,OAAO,EAAE,0BAAmC;IAC5C,eAAe,EAAE,MAAM;IACvB,mBAAmB,EAAE,MAAM,CAAC,MAAM,CAAC;QACjC,qBAAqB,EAAE,KAAK;QAC5B,0BAA0B,EAAE,KAAK;QACjC,2BAA2B,EAAE,KAAK;QAClC,kBAAkB,EAAE,KAAK;QACzB,eAAe,EAAE,KAAK;QACtB,oBAAoB,EAAE,KAAK;KAC5B,CAAC;IACF,uBAAuB,EAAE,MAAM,CAAC,MAAM,CAAC;QACrC,qBAAqB,EAAE,MAAM;QAC7B,0BAA0B,EAAE,MAAM;QAClC,2BAA2B,EAAE,MAAM;QACnC,kBAAkB,EAAE,MAAM;QAC1B,eAAe,EAAE,MAAM;QACvB,oBAAoB,EAAE,MAAM;KAC7B,CAAC;IACF,oBAAoB,EAAE,MAAM,CAAC,MAAM,CAAC;QAClC,qBAAqB,EAAE,IAAI;QAC3B,0BAA0B,EAAE,IAAI;QAChC,2BAA2B,EAAE,IAAI;QACjC,kBAAkB,EAAE,KAAK;QACzB,eAAe,EAAE,KAAK;QACtB,oBAAoB,EAAE,IAAI;KAC3B,CAAC;CACH,CAA2B,CAAC;AAE7B;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAErC,MAAM,CAAC,MAAM,CAAC;IAChB,kBAAkB,EAAE,sBAAsB;IAC1C,0BAA0B,EAAE,iBAAiB;IAC7C,0BAA0B,EAAE,iBAAiB;CAC9C,CAAC,CAAC"}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file classifier.ts
|
|
3
|
+
* @description The LLM-as-judge classifier that the {@link MemoryRouter}
|
|
4
|
+
* uses to pick a {@link MemoryQueryCategory} for each incoming query.
|
|
5
|
+
*
|
|
6
|
+
* The classifier is deliberately abstracted behind {@link IMemoryClassifier}
|
|
7
|
+
* so callers can swap:
|
|
8
|
+
* - the LLM client (any provider — OpenAI, Anthropic, local, mock) via
|
|
9
|
+
* the {@link IMemoryClassifierLLM} adapter interface,
|
|
10
|
+
* - the prompt variant (base vs few-shot) per-call,
|
|
11
|
+
* - the classifier implementation entirely (e.g. a keyword-matcher or a
|
|
12
|
+
* small custom ML model) by implementing {@link IMemoryClassifier}.
|
|
13
|
+
*
|
|
14
|
+
* The reference implementation, {@link LLMMemoryClassifier}, runs the
|
|
15
|
+
* gpt-5-mini-style cheap single-shot discriminator prompt and robustly
|
|
16
|
+
* parses the output, falling back to `multi-session` on unparseable
|
|
17
|
+
* responses (the safest default — multi-session routes cover cross-session
|
|
18
|
+
* synthesis which handles most misidentified question types gracefully).
|
|
19
|
+
*
|
|
20
|
+
* @module @framers/agentos/memory-router/classifier
|
|
21
|
+
*/
|
|
22
|
+
import { type MemoryQueryCategory } from './routing-tables.js';
|
|
23
|
+
/**
|
|
24
|
+
* Minimal LLM-call interface the built-in classifier needs. Agentos
|
|
25
|
+
* consumers wire their preferred provider to this shape via an adapter
|
|
26
|
+
* — we intentionally do NOT import any provider SDK here so the
|
|
27
|
+
* memory-router module stays provider-agnostic.
|
|
28
|
+
*/
|
|
29
|
+
export interface MemoryClassifierLLMRequest {
|
|
30
|
+
/** System prompt. The classifier supplies this based on prompt variant. */
|
|
31
|
+
readonly system: string;
|
|
32
|
+
/** User prompt. The classifier renders `Question: {q}\n\nCategory:`. */
|
|
33
|
+
readonly user: string;
|
|
34
|
+
/** Max tokens to generate. Classifier passes ≤16 (bare category token). */
|
|
35
|
+
readonly maxTokens: number;
|
|
36
|
+
/** Temperature. Classifier passes 0 for determinism. */
|
|
37
|
+
readonly temperature: number;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Response shape the built-in classifier expects from the adapter.
|
|
41
|
+
*/
|
|
42
|
+
export interface MemoryClassifierLLMResponse {
|
|
43
|
+
/** The model's raw text. Whitespace is tolerated; the parser normalizes it. */
|
|
44
|
+
readonly text: string;
|
|
45
|
+
/** Input token count, for cost tracking. */
|
|
46
|
+
readonly tokensIn: number;
|
|
47
|
+
/** Output token count, for cost tracking. */
|
|
48
|
+
readonly tokensOut: number;
|
|
49
|
+
/** Model identifier the LLM reports. */
|
|
50
|
+
readonly model: string;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* The LLM-client adapter the built-in classifier expects. Adapt any
|
|
54
|
+
* provider SDK (OpenAI, Anthropic, a provider-router, a mock) to this
|
|
55
|
+
* shape before passing into {@link LLMMemoryClassifier}.
|
|
56
|
+
*/
|
|
57
|
+
export interface IMemoryClassifierLLM {
|
|
58
|
+
invoke(request: MemoryClassifierLLMRequest): Promise<MemoryClassifierLLMResponse>;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Options passed per-call to {@link IMemoryClassifier.classify}. The
|
|
62
|
+
* classifier reads these to pick a prompt variant; everything else is
|
|
63
|
+
* constructor-scoped.
|
|
64
|
+
*/
|
|
65
|
+
export interface MemoryClassifierClassifyOptions {
|
|
66
|
+
/**
|
|
67
|
+
* Use the few-shot prompt variant instead of the base prompt. The
|
|
68
|
+
* few-shot prompt includes explicit Question/Category pairs targeting
|
|
69
|
+
* known confusion patterns (SSU-vs-SSA, SSP-vs-SSA, MS-vs-KU). Default
|
|
70
|
+
* false — the base prompt is ~2.5x cheaper per-classification-token and
|
|
71
|
+
* matches the shipping Tier 3 v10 classifier configuration.
|
|
72
|
+
*/
|
|
73
|
+
readonly useFewShotPrompt?: boolean;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Result of a classification call. The returned category is always a
|
|
77
|
+
* valid {@link MemoryQueryCategory}; parse failures map to the safe
|
|
78
|
+
* fallback 'multi-session'.
|
|
79
|
+
*/
|
|
80
|
+
export interface MemoryClassifierResult {
|
|
81
|
+
readonly category: MemoryQueryCategory;
|
|
82
|
+
readonly tokensIn: number;
|
|
83
|
+
readonly tokensOut: number;
|
|
84
|
+
readonly model: string;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* The public classifier contract. Implementations decide how to turn a
|
|
88
|
+
* query into a category — via LLM, keyword heuristic, small ML model, or
|
|
89
|
+
* mock.
|
|
90
|
+
*/
|
|
91
|
+
export interface IMemoryClassifier {
|
|
92
|
+
classify(query: string, options?: MemoryClassifierClassifyOptions): Promise<MemoryClassifierResult>;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Base classifier prompt. Lists the six category tokens with one-sentence
|
|
96
|
+
* definitions and a few examples per category, then instructs the model
|
|
97
|
+
* to emit ONLY the bare category token.
|
|
98
|
+
*/
|
|
99
|
+
export declare const CLASSIFIER_SYSTEM_PROMPT = "You are classifying a memory-system question into one of six categories.\n\nReturn ONLY the category token (no explanation, no quotes, no punctuation).\n\nCategories:\n- single-session-user: the question asks about something the USER said, did, or stated in a specific past session. Answer is in one session. Examples: \"What did I tell you about my favorite dessert?\", \"Where did I say I moved to last month?\"\n- single-session-assistant: the question asks about something the ASSISTANT said, generated, or recommended in a specific session. Answer is in one session. Examples: \"What recipe did you suggest for the birthday party?\", \"What books did you recommend to me?\"\n- single-session-preference: the question asks about a preference the user stated in passing. Answer is in one session. Examples: \"Do I prefer tea or coffee?\", \"What's my favorite type of movie?\"\n- knowledge-update: the question asks about current state where the answer EVOLVED across sessions (supersession). Examples: \"What's my current job title?\", \"Where do I live now?\", \"What's my latest project?\"\n- multi-session: the question requires combining information from 2+ separate sessions. Examples: \"How many different languages have I mentioned studying?\", \"Which authors did you recommend across our conversations?\"\n- temporal-reasoning: the question asks about the order, timing, or duration of events across time. Examples: \"In what order did I visit the three countries?\", \"How many months ago did I start the new job?\"";
|
|
100
|
+
/**
|
|
101
|
+
* Few-shot variant of the classifier prompt. Adds explicit
|
|
102
|
+
* Question/Category pairs targeting confusion patterns observed in the
|
|
103
|
+
* gpt-5-mini base-prompt classifier on LongMemEval Tier A:
|
|
104
|
+
* - SSA confused as SSU (YOU-said vs I-said distinction)
|
|
105
|
+
* - SSP confused as SSA (preferences phrased like recommendations)
|
|
106
|
+
* - MS confused as KU (cross-session vs current-state)
|
|
107
|
+
*
|
|
108
|
+
* Used when {@link MemoryClassifierClassifyOptions.useFewShotPrompt} is true.
|
|
109
|
+
*/
|
|
110
|
+
export declare const CLASSIFIER_SYSTEM_PROMPT_FEWSHOT = "You are classifying a memory-system question into one of six categories.\n\nReturn ONLY the category token (no explanation, no quotes, no punctuation).\n\nCategories:\n- single-session-user: the question asks about something the USER said, did, or stated in a specific past session. Answer is in one session.\n- single-session-assistant: the question asks about something the ASSISTANT said, generated, or recommended in a specific session. Answer is in one session.\n- single-session-preference: the question asks about a preference the user stated in passing. Answer is in one session.\n- knowledge-update: the question asks about current state where the answer EVOLVED across sessions (supersession). The user wants the LATEST value of an attribute that has changed over time.\n- multi-session: the question requires combining information from 2+ separate sessions. Counting, listing, or aggregating items the user mentioned across sessions.\n- temporal-reasoning: the question asks about the order, timing, or duration of events across time.\n\nExamples:\n\nQuestion: What did I tell you my favorite ice cream flavor was?\nCategory: single-session-user\n\nQuestion: Where did I say I moved to last month?\nCategory: single-session-user\n\nQuestion: What book did you recommend to me last week?\nCategory: single-session-assistant\n\nQuestion: What recipe did you suggest for the birthday party?\nCategory: single-session-assistant\n\nQuestion: Do I prefer working in the morning or evening?\nCategory: single-session-preference\n\nQuestion: What's my favorite type of movie?\nCategory: single-session-preference\n\nQuestion: What's my current job title?\nCategory: knowledge-update\n\nQuestion: Where do I live now?\nCategory: knowledge-update\n\nQuestion: How many different programming languages have I mentioned learning?\nCategory: multi-session\n\nQuestion: Which authors have you recommended to me across our conversations?\nCategory: multi-session\n\nQuestion: In what order did I visit the three European cities?\nCategory: temporal-reasoning\n\nQuestion: How many weeks ago did I start the new job?\nCategory: temporal-reasoning";
|
|
111
|
+
/**
|
|
112
|
+
* Default fallback category used when the classifier's LLM output cannot
|
|
113
|
+
* be parsed into a known category token. multi-session is chosen because
|
|
114
|
+
* its routing target (OM-based cross-session synthesis under max-accuracy,
|
|
115
|
+
* canonical-hybrid under min-cost) degrades gracefully on most other
|
|
116
|
+
* question types.
|
|
117
|
+
*/
|
|
118
|
+
export declare const SAFE_FALLBACK_CATEGORY: MemoryQueryCategory;
|
|
119
|
+
/**
|
|
120
|
+
* Strips common LLM-output decorations so the parser can match the bare
|
|
121
|
+
* category token:
|
|
122
|
+
* - keeps only the first non-empty line,
|
|
123
|
+
* - strips common label prefixes ("category:", "type:", "answer:"),
|
|
124
|
+
* - strips surrounding quotes / backticks,
|
|
125
|
+
* - strips trailing sentence punctuation,
|
|
126
|
+
* - lower-cases the result.
|
|
127
|
+
*/
|
|
128
|
+
export declare function normalizeClassifierOutput(raw: string): string;
|
|
129
|
+
/**
|
|
130
|
+
* Parse a normalized classifier output into a known category token, or
|
|
131
|
+
* return the safe fallback if no match is found.
|
|
132
|
+
*/
|
|
133
|
+
export declare function parseClassifierOutput(raw: string): MemoryQueryCategory;
|
|
134
|
+
/**
|
|
135
|
+
* Constructor options for {@link LLMMemoryClassifier}.
|
|
136
|
+
*/
|
|
137
|
+
export interface LLMMemoryClassifierOptions {
|
|
138
|
+
/** LLM adapter the classifier calls. */
|
|
139
|
+
readonly llm: IMemoryClassifierLLM;
|
|
140
|
+
/**
|
|
141
|
+
* Max output tokens. Default 16 — the classifier only needs to emit
|
|
142
|
+
* one bare category token. Callers rarely need to change this.
|
|
143
|
+
*/
|
|
144
|
+
readonly maxTokens?: number;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* The built-in LLM-based classifier. Runs the category-discrimination
|
|
148
|
+
* prompt on the configured LLM adapter and parses the response robustly.
|
|
149
|
+
*
|
|
150
|
+
* @example
|
|
151
|
+
* ```ts
|
|
152
|
+
* import { LLMMemoryClassifier } from '../memory-router';
|
|
153
|
+
*
|
|
154
|
+
* const classifier = new LLMMemoryClassifier({
|
|
155
|
+
* llm: createOpenAIClassifierAdapter('gpt-5-mini'),
|
|
156
|
+
* });
|
|
157
|
+
* const { category } = await classifier.classify(
|
|
158
|
+
* "What's my current job title?",
|
|
159
|
+
* );
|
|
160
|
+
* // => { category: 'knowledge-update', tokensIn: 412, tokensOut: 4, model: 'gpt-5-mini-2025-08-07' }
|
|
161
|
+
* ```
|
|
162
|
+
*/
|
|
163
|
+
export declare class LLMMemoryClassifier implements IMemoryClassifier {
|
|
164
|
+
private readonly llm;
|
|
165
|
+
private readonly maxTokens;
|
|
166
|
+
constructor(options: LLMMemoryClassifierOptions);
|
|
167
|
+
classify(query: string, options?: MemoryClassifierClassifyOptions): Promise<MemoryClassifierResult>;
|
|
168
|
+
}
|
|
169
|
+
//# sourceMappingURL=classifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../../src/memory-router/classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,qBAAqB,CAAC;AAM7B;;;;;GAKG;AACH,MAAM,WAAW,0BAA0B;IACzC,2EAA2E;IAC3E,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,wEAAwE;IACxE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,2EAA2E;IAC3E,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,wDAAwD;IACxD,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,2BAA2B;IAC1C,+EAA+E;IAC/E,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,4CAA4C;IAC5C,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,6CAA6C;IAC7C,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,wCAAwC;IACxC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED;;;;GAIG;AACH,MAAM,WAAW,oBAAoB;IACnC,MAAM,CACJ,OAAO,EAAE,0BAA0B,GAClC,OAAO,CAAC,2BAA2B,CAAC,CAAC;CACzC;AAMD;;;;GAIG;AACH,MAAM,WAAW,+BAA+B;IAC9C;;;;;;OAMG;IACH,QAAQ,CAAC,gBAAgB,CAAC,EAAE,OAAO,CAAC;CACrC;AAED;;;;GAIG;AACH,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,QAAQ,EAAE,mBAAmB,CAAC;IACvC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED;;;;GAIG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CACN,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,+BAA+B,GACxC,OAAO,CAAC,sBAAsB,CAAC,CAAC;CACpC;AAMD;;;;GAIG;AACH,eAAO,MAAM,wBAAwB,8/CAU0K,CAAC;AAEhN;;;;;;;;;GASG;AACH,eAAO,MAAM,gCAAgC,kmEAgDhB,CAAC;AAM9B;;;;;;GAMG;AACH,eAAO,MAAM,sBAAsB,EAAE,mBAAqC,CAAC;AAE3E;;;;;;;;GAQG;AACH,wBAAgB,yBAAyB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAe7D;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,GAAG,mBAAmB,CAYtE;AAMD;;GAEG;AACH,MAAM,WAAW,0BAA0B;IACzC,wCAAwC;IACxC,QAAQ,CAAC,GAAG,EAAE,oBAAoB,CAAC;IACnC;;;OAGG;IACH,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,mBAAoB,YAAW,iBAAiB;IAC3D,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAuB;IAC3C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,0BAA0B;IAKzC,QAAQ,CACZ,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,+BAA+B,GACxC,OAAO,CAAC,sBAAsB,CAAC;CAoBnC"}
|