@framers/agentos 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ingest-router/IngestRouter.d.ts +72 -0
- package/dist/ingest-router/IngestRouter.d.ts.map +1 -0
- package/dist/ingest-router/IngestRouter.js +98 -0
- package/dist/ingest-router/IngestRouter.js.map +1 -0
- package/dist/ingest-router/classifier.d.ts +63 -0
- package/dist/ingest-router/classifier.d.ts.map +1 -0
- package/dist/ingest-router/classifier.js +111 -0
- package/dist/ingest-router/classifier.js.map +1 -0
- package/dist/ingest-router/costs.d.ts +48 -0
- package/dist/ingest-router/costs.d.ts.map +1 -0
- package/dist/ingest-router/costs.js +63 -0
- package/dist/ingest-router/costs.js.map +1 -0
- package/dist/ingest-router/dispatcher.d.ts +35 -0
- package/dist/ingest-router/dispatcher.d.ts.map +1 -0
- package/dist/ingest-router/dispatcher.js +32 -0
- package/dist/ingest-router/dispatcher.js.map +1 -0
- package/dist/ingest-router/index.d.ts +43 -0
- package/dist/ingest-router/index.d.ts.map +1 -0
- package/dist/ingest-router/index.js +37 -0
- package/dist/ingest-router/index.js.map +1 -0
- package/dist/ingest-router/routing-tables.d.ts +122 -0
- package/dist/ingest-router/routing-tables.d.ts.map +1 -0
- package/dist/ingest-router/routing-tables.js +145 -0
- package/dist/ingest-router/routing-tables.js.map +1 -0
- package/dist/ingest-router/select-strategy.d.ts +67 -0
- package/dist/ingest-router/select-strategy.d.ts.map +1 -0
- package/dist/ingest-router/select-strategy.js +100 -0
- package/dist/ingest-router/select-strategy.js.map +1 -0
- package/dist/memory-router/MemoryRouter.d.ts +195 -0
- package/dist/memory-router/MemoryRouter.d.ts.map +1 -0
- package/dist/memory-router/MemoryRouter.js +155 -0
- package/dist/memory-router/MemoryRouter.js.map +1 -0
- package/dist/memory-router/adaptive.d.ts +142 -0
- package/dist/memory-router/adaptive.d.ts.map +1 -0
- package/dist/memory-router/adaptive.js +202 -0
- package/dist/memory-router/adaptive.js.map +1 -0
- package/dist/memory-router/backend-costs.d.ts +67 -0
- package/dist/memory-router/backend-costs.d.ts.map +1 -0
- package/dist/memory-router/backend-costs.js +136 -0
- package/dist/memory-router/backend-costs.js.map +1 -0
- package/dist/memory-router/classifier.d.ts +169 -0
- package/dist/memory-router/classifier.d.ts.map +1 -0
- package/dist/memory-router/classifier.js +193 -0
- package/dist/memory-router/classifier.js.map +1 -0
- package/dist/memory-router/dispatcher.d.ts +115 -0
- package/dist/memory-router/dispatcher.d.ts.map +1 -0
- package/dist/memory-router/dispatcher.js +84 -0
- package/dist/memory-router/dispatcher.js.map +1 -0
- package/dist/memory-router/index.d.ts +126 -0
- package/dist/memory-router/index.d.ts.map +1 -0
- package/dist/memory-router/index.js +122 -0
- package/dist/memory-router/index.js.map +1 -0
- package/dist/memory-router/routing-tables.d.ts +125 -0
- package/dist/memory-router/routing-tables.d.ts.map +1 -0
- package/dist/memory-router/routing-tables.js +137 -0
- package/dist/memory-router/routing-tables.js.map +1 -0
- package/dist/memory-router/select-backend.d.ts +136 -0
- package/dist/memory-router/select-backend.d.ts.map +1 -0
- package/dist/memory-router/select-backend.js +210 -0
- package/dist/memory-router/select-backend.js.map +1 -0
- package/dist/multi-stage-guardrails/index.d.ts +190 -0
- package/dist/multi-stage-guardrails/index.d.ts.map +1 -0
- package/dist/multi-stage-guardrails/index.js +186 -0
- package/dist/multi-stage-guardrails/index.js.map +1 -0
- package/dist/read-router/ReadRouter.d.ts +58 -0
- package/dist/read-router/ReadRouter.d.ts.map +1 -0
- package/dist/read-router/ReadRouter.js +91 -0
- package/dist/read-router/ReadRouter.js.map +1 -0
- package/dist/read-router/classifier.d.ts +54 -0
- package/dist/read-router/classifier.d.ts.map +1 -0
- package/dist/read-router/classifier.js +104 -0
- package/dist/read-router/classifier.js.map +1 -0
- package/dist/read-router/costs.d.ts +23 -0
- package/dist/read-router/costs.d.ts.map +1 -0
- package/dist/read-router/costs.js +51 -0
- package/dist/read-router/costs.js.map +1 -0
- package/dist/read-router/dispatcher.d.ts +33 -0
- package/dist/read-router/dispatcher.d.ts.map +1 -0
- package/dist/read-router/dispatcher.js +29 -0
- package/dist/read-router/dispatcher.js.map +1 -0
- package/dist/read-router/index.d.ts +23 -0
- package/dist/read-router/index.d.ts.map +1 -0
- package/dist/read-router/index.js +17 -0
- package/dist/read-router/index.js.map +1 -0
- package/dist/read-router/routing-tables.d.ts +85 -0
- package/dist/read-router/routing-tables.d.ts.map +1 -0
- package/dist/read-router/routing-tables.js +79 -0
- package/dist/read-router/routing-tables.js.map +1 -0
- package/dist/read-router/select-strategy.d.ts +42 -0
- package/dist/read-router/select-strategy.d.ts.map +1 -0
- package/dist/read-router/select-strategy.js +92 -0
- package/dist/read-router/select-strategy.js.map +1 -0
- package/package.json +21 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/memory-router/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6GG;AAYH,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAoC9D,+EAA+E;AAC/E,SAAS;AACT,+EAA+E;AAE/E,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,uBAAuB,EACvB,aAAa,GACd,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,sBAAsB,EACtB,iBAAiB,EACjB,iBAAiB,EACjB,4BAA4B,GAC7B,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EACL,aAAa,EACb,gCAAgC,EAChC,+BAA+B,GAChC,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,wBAAwB,EACxB,gCAAgC,EAChC,sBAAsB,EACtB,mBAAmB,EACnB,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,wBAAwB,EACxB,6BAA6B,GAC9B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,YAAY,EACZ,kCAAkC,GACnC,MAAM,mBAAmB,CAAC;AAgB3B,OAAO,EACL,oBAAoB,EACpB,cAAc,EACd,yBAAyB,EACzB,oBAAoB,GACrB,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file routing-tables.ts
|
|
3
|
+
* @description Preset routing tables for {@link MemoryRouter}.
|
|
4
|
+
*
|
|
5
|
+
* The MemoryRouter dispatches each query to one of the available
|
|
6
|
+
* {@link MemoryBackendId} backends based on the classifier-predicted
|
|
7
|
+
* {@link MemoryQueryCategory}. The mapping from category to backend is a
|
|
8
|
+
* "routing table" — a frozen object that callers can pass through unchanged
|
|
9
|
+
* for the shipping defaults, or override per-category for custom workloads.
|
|
10
|
+
*
|
|
11
|
+
* Three preset tables ship out of the box, each calibrated from Phase B
|
|
12
|
+
* N=500 LongMemEval-S measurements:
|
|
13
|
+
*
|
|
14
|
+
* - {@link MINIMIZE_COST_TABLE}: cheapest Pareto-dominant backend per
|
|
15
|
+
* category. Pays the OM premium only on multi-session and
|
|
16
|
+
* single-session-preference (the categories where the architectural lift
|
|
17
|
+
* exceeds the cost premium).
|
|
18
|
+
* - {@link BALANCED_TABLE}: trades modest cost for large latency wins on
|
|
19
|
+
* knowledge-update and temporal-reasoning.
|
|
20
|
+
* - {@link MAXIMIZE_ACCURACY_TABLE}: highest-accuracy backend per category;
|
|
21
|
+
* ties broken by cost. v2 (post-Phase-B-2026-04-24) routes
|
|
22
|
+
* temporal-reasoning back to canonical-hybrid after Phase B revealed the
|
|
23
|
+
* v1 routing's accuracy gain was within CI noise but paid OM ingest cost.
|
|
24
|
+
*
|
|
25
|
+
* @module @framers/agentos/memory-router/routing-tables
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* The six question categories the LLM-as-judge classifier produces.
|
|
29
|
+
* Calibrated from LongMemEval-S categories; mappings to other benchmark
|
|
30
|
+
* taxonomies (e.g. LOCOMO single-hop / multi-hop / temporal /
|
|
31
|
+
* open-domain / adversarial) are handled at adapter boundaries.
|
|
32
|
+
*/
|
|
33
|
+
export declare const MEMORY_QUERY_CATEGORIES: readonly ["single-session-user", "single-session-assistant", "single-session-preference", "knowledge-update", "multi-session", "temporal-reasoning"];
|
|
34
|
+
/**
|
|
35
|
+
* The six question categories the LLM-as-judge classifier produces.
|
|
36
|
+
*/
|
|
37
|
+
export type MemoryQueryCategory = (typeof MEMORY_QUERY_CATEGORIES)[number];
|
|
38
|
+
/**
|
|
39
|
+
* The retrieval architecture identifiers the router can dispatch to.
|
|
40
|
+
*
|
|
41
|
+
* - `canonical-hybrid`: BM25 + dense + RRF fusion + Cohere rerank-v3.5
|
|
42
|
+
* over the raw memory traces. The default cheapest-and-fastest path.
|
|
43
|
+
* - `observational-memory-v10`: synthesized observation log fed to the
|
|
44
|
+
* reader, with classifier-driven routing inside the OM pipeline. No
|
|
45
|
+
* verbatim citation rule.
|
|
46
|
+
* - `observational-memory-v11`: same as v10 but with conditional
|
|
47
|
+
* verbatim citation appended for knowledge-update and
|
|
48
|
+
* single-session-user categories. Wins on multi-session and
|
|
49
|
+
* single-session-preference.
|
|
50
|
+
*
|
|
51
|
+
* Backend execution itself lives in {@link MemoryDispatcher}; this type
|
|
52
|
+
* is the contract between the routing decision and the dispatcher.
|
|
53
|
+
*/
|
|
54
|
+
export type MemoryBackendId = 'canonical-hybrid' | 'observational-memory-v10' | 'observational-memory-v11';
|
|
55
|
+
/**
|
|
56
|
+
* The three shipping presets. Each preset corresponds to a distinct point
|
|
57
|
+
* on the Phase B-measured cost-accuracy Pareto frontier.
|
|
58
|
+
*/
|
|
59
|
+
export type MemoryRouterPreset = 'minimize-cost' | 'balanced' | 'maximize-accuracy';
|
|
60
|
+
/**
|
|
61
|
+
* A routing table maps every {@link MemoryQueryCategory} to its preferred
|
|
62
|
+
* {@link MemoryBackendId} for the given preset. Tables ship frozen so
|
|
63
|
+
* consumers cannot mutate the routing surface from outside the module.
|
|
64
|
+
*/
|
|
65
|
+
export interface RoutingTable {
|
|
66
|
+
readonly preset: MemoryRouterPreset;
|
|
67
|
+
readonly defaultMapping: Readonly<Record<MemoryQueryCategory, MemoryBackendId>>;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Preset: minimize-cost.
|
|
71
|
+
*
|
|
72
|
+
* Pareto-dominant cheapest backend per category. Pays the OM premium only
|
|
73
|
+
* on the two categories where the architectural lift earns it
|
|
74
|
+
* (multi-session +6.8pp, single-session-preference +3.3pp). Every other
|
|
75
|
+
* category routes to canonical-hybrid where Phase B measurements show the
|
|
76
|
+
* cheaper backend either dominates or matches within CI noise.
|
|
77
|
+
*
|
|
78
|
+
* Phase B simulation: 73.9% accuracy at $0.092/correct; oracle ceiling
|
|
79
|
+
* 76.0% at $0.157/correct. **Pareto-dominates the all-Tier-2b flat
|
|
80
|
+
* baseline by 4.77x cost reduction at +0.5pp accuracy** on the
|
|
81
|
+
* LongMemEval-S Phase B distribution.
|
|
82
|
+
*
|
|
83
|
+
* Recommended default for cost-sensitive workloads.
|
|
84
|
+
*/
|
|
85
|
+
export declare const MINIMIZE_COST_TABLE: RoutingTable;
|
|
86
|
+
/**
|
|
87
|
+
* Preset: balanced.
|
|
88
|
+
*
|
|
89
|
+
* Trades 1.6x cost for >10x latency reductions on knowledge-update and
|
|
90
|
+
* temporal-reasoning. Phase B measurements show Tier 2a v10 ties Tier 1
|
|
91
|
+
* canonical on accuracy for these two categories at much lower latency
|
|
92
|
+
* (4-19s vs 80-100s) — the latency win comes from skipping per-turn
|
|
93
|
+
* cognitive replay in favor of synthesized observations.
|
|
94
|
+
*
|
|
95
|
+
* Phase B simulation: 74.5% accuracy at $0.205/correct; 2.12x cheaper
|
|
96
|
+
* than Tier 2b flat with comparable accuracy.
|
|
97
|
+
*
|
|
98
|
+
* Recommended for interactive workloads where latency matters and the
|
|
99
|
+
* cost premium over minimize-cost is acceptable.
|
|
100
|
+
*/
|
|
101
|
+
export declare const BALANCED_TABLE: RoutingTable;
|
|
102
|
+
/**
|
|
103
|
+
* Preset: maximize-accuracy (v2).
|
|
104
|
+
*
|
|
105
|
+
* Highest-accuracy backend per category, ties broken by cost. v2
|
|
106
|
+
* (2026-04-24, post-Phase-B) routes temporal-reasoning back to
|
|
107
|
+
* canonical-hybrid after Phase B revealed:
|
|
108
|
+
* - v1 routing (TR -> Tier 2a) paid OM ingest cost for a within-CI
|
|
109
|
+
* accuracy gain (71.0% Tier 2a vs 70.2% Tier 1) on a hold-out slice;
|
|
110
|
+
* - combined with classifier misroutes the aggregate fell below the
|
|
111
|
+
* 74% acceptance floor at 73.8%.
|
|
112
|
+
* v2 keeps TR on canonical-hybrid where it's cheapest and
|
|
113
|
+
* accuracy-equivalent.
|
|
114
|
+
*
|
|
115
|
+
* Phase B measured: 75.6% [71.8, 79.2] at $0.2434/correct, 65.6s avg
|
|
116
|
+
* latency.
|
|
117
|
+
*/
|
|
118
|
+
export declare const MAXIMIZE_ACCURACY_TABLE: RoutingTable;
|
|
119
|
+
/**
|
|
120
|
+
* Convenience registry of all three preset tables, keyed by preset name.
|
|
121
|
+
* Useful when surfacing presets through a CLI flag or config field where
|
|
122
|
+
* the preset name is a string and the consumer needs the table object.
|
|
123
|
+
*/
|
|
124
|
+
export declare const PRESET_TABLES: Readonly<Record<MemoryRouterPreset, RoutingTable>>;
|
|
125
|
+
//# sourceMappingURL=routing-tables.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"routing-tables.d.ts","sourceRoot":"","sources":["../../src/memory-router/routing-tables.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAMH;;;;;GAKG;AACH,eAAO,MAAM,uBAAuB,sJAO1B,CAAC;AAEX;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,CAAC,OAAO,uBAAuB,CAAC,CAAC,MAAM,CAAC,CAAC;AAE3E;;;;;;;;;;;;;;;GAeG;AACH,MAAM,MAAM,eAAe,GACvB,kBAAkB,GAClB,0BAA0B,GAC1B,0BAA0B,CAAC;AAE/B;;;GAGG;AACH,MAAM,MAAM,kBAAkB,GAC1B,eAAe,GACf,UAAU,GACV,mBAAmB,CAAC;AAExB;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,MAAM,EAAE,kBAAkB,CAAC;IACpC,QAAQ,CAAC,cAAc,EAAE,QAAQ,CAAC,MAAM,CAAC,mBAAmB,EAAE,eAAe,CAAC,CAAC,CAAC;CACjF;AAMD;;;;;;;;;;;;;;;GAeG;AACH,eAAO,MAAM,mBAAmB,EAAE,YAUhB,CAAC;AAEnB;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,cAAc,EAAE,YAUX,CAAC;AAEnB;;;;;;;;;;;;;;;GAeG;AACH,eAAO,MAAM,uBAAuB,EAAE,YAUpB,CAAC;AAEnB;;;;GAIG;AACH,eAAO,MAAM,aAAa,EAAE,QAAQ,CAAC,MAAM,CAAC,kBAAkB,EAAE,YAAY,CAAC,CAKzE,CAAC"}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file routing-tables.ts
|
|
3
|
+
* @description Preset routing tables for {@link MemoryRouter}.
|
|
4
|
+
*
|
|
5
|
+
* The MemoryRouter dispatches each query to one of the available
|
|
6
|
+
* {@link MemoryBackendId} backends based on the classifier-predicted
|
|
7
|
+
* {@link MemoryQueryCategory}. The mapping from category to backend is a
|
|
8
|
+
* "routing table" — a frozen object that callers can pass through unchanged
|
|
9
|
+
* for the shipping defaults, or override per-category for custom workloads.
|
|
10
|
+
*
|
|
11
|
+
* Three preset tables ship out of the box, each calibrated from Phase B
|
|
12
|
+
* N=500 LongMemEval-S measurements:
|
|
13
|
+
*
|
|
14
|
+
* - {@link MINIMIZE_COST_TABLE}: cheapest Pareto-dominant backend per
|
|
15
|
+
* category. Pays the OM premium only on multi-session and
|
|
16
|
+
* single-session-preference (the categories where the architectural lift
|
|
17
|
+
* exceeds the cost premium).
|
|
18
|
+
* - {@link BALANCED_TABLE}: trades modest cost for large latency wins on
|
|
19
|
+
* knowledge-update and temporal-reasoning.
|
|
20
|
+
* - {@link MAXIMIZE_ACCURACY_TABLE}: highest-accuracy backend per category;
|
|
21
|
+
* ties broken by cost. v2 (post-Phase-B-2026-04-24) routes
|
|
22
|
+
* temporal-reasoning back to canonical-hybrid after Phase B revealed the
|
|
23
|
+
* v1 routing's accuracy gain was within CI noise but paid OM ingest cost.
|
|
24
|
+
*
|
|
25
|
+
* @module @framers/agentos/memory-router/routing-tables
|
|
26
|
+
*/
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// Public types
|
|
29
|
+
// ============================================================================
|
|
30
|
+
/**
|
|
31
|
+
* The six question categories the LLM-as-judge classifier produces.
|
|
32
|
+
* Calibrated from LongMemEval-S categories; mappings to other benchmark
|
|
33
|
+
* taxonomies (e.g. LOCOMO single-hop / multi-hop / temporal /
|
|
34
|
+
* open-domain / adversarial) are handled at adapter boundaries.
|
|
35
|
+
*/
|
|
36
|
+
export const MEMORY_QUERY_CATEGORIES = [
|
|
37
|
+
'single-session-user',
|
|
38
|
+
'single-session-assistant',
|
|
39
|
+
'single-session-preference',
|
|
40
|
+
'knowledge-update',
|
|
41
|
+
'multi-session',
|
|
42
|
+
'temporal-reasoning',
|
|
43
|
+
];
|
|
44
|
+
// ============================================================================
|
|
45
|
+
// Preset tables
|
|
46
|
+
// ============================================================================
|
|
47
|
+
/**
|
|
48
|
+
* Preset: minimize-cost.
|
|
49
|
+
*
|
|
50
|
+
* Pareto-dominant cheapest backend per category. Pays the OM premium only
|
|
51
|
+
* on the two categories where the architectural lift earns it
|
|
52
|
+
* (multi-session +6.8pp, single-session-preference +3.3pp). Every other
|
|
53
|
+
* category routes to canonical-hybrid where Phase B measurements show the
|
|
54
|
+
* cheaper backend either dominates or matches within CI noise.
|
|
55
|
+
*
|
|
56
|
+
* Phase B simulation: 73.9% accuracy at $0.092/correct; oracle ceiling
|
|
57
|
+
* 76.0% at $0.157/correct. **Pareto-dominates the all-Tier-2b flat
|
|
58
|
+
* baseline by 4.77x cost reduction at +0.5pp accuracy** on the
|
|
59
|
+
* LongMemEval-S Phase B distribution.
|
|
60
|
+
*
|
|
61
|
+
* Recommended default for cost-sensitive workloads.
|
|
62
|
+
*/
|
|
63
|
+
export const MINIMIZE_COST_TABLE = Object.freeze({
|
|
64
|
+
preset: 'minimize-cost',
|
|
65
|
+
defaultMapping: Object.freeze({
|
|
66
|
+
'single-session-assistant': 'canonical-hybrid',
|
|
67
|
+
'single-session-user': 'canonical-hybrid',
|
|
68
|
+
'temporal-reasoning': 'canonical-hybrid',
|
|
69
|
+
'knowledge-update': 'canonical-hybrid',
|
|
70
|
+
'multi-session': 'observational-memory-v11',
|
|
71
|
+
'single-session-preference': 'observational-memory-v11',
|
|
72
|
+
}),
|
|
73
|
+
});
|
|
74
|
+
/**
|
|
75
|
+
* Preset: balanced.
|
|
76
|
+
*
|
|
77
|
+
* Trades 1.6x cost for >10x latency reductions on knowledge-update and
|
|
78
|
+
* temporal-reasoning. Phase B measurements show Tier 2a v10 ties Tier 1
|
|
79
|
+
* canonical on accuracy for these two categories at much lower latency
|
|
80
|
+
* (4-19s vs 80-100s) — the latency win comes from skipping per-turn
|
|
81
|
+
* cognitive replay in favor of synthesized observations.
|
|
82
|
+
*
|
|
83
|
+
* Phase B simulation: 74.5% accuracy at $0.205/correct; 2.12x cheaper
|
|
84
|
+
* than Tier 2b flat with comparable accuracy.
|
|
85
|
+
*
|
|
86
|
+
* Recommended for interactive workloads where latency matters and the
|
|
87
|
+
* cost premium over minimize-cost is acceptable.
|
|
88
|
+
*/
|
|
89
|
+
export const BALANCED_TABLE = Object.freeze({
|
|
90
|
+
preset: 'balanced',
|
|
91
|
+
defaultMapping: Object.freeze({
|
|
92
|
+
'single-session-assistant': 'canonical-hybrid',
|
|
93
|
+
'single-session-user': 'canonical-hybrid',
|
|
94
|
+
'temporal-reasoning': 'observational-memory-v10',
|
|
95
|
+
'knowledge-update': 'observational-memory-v10',
|
|
96
|
+
'multi-session': 'observational-memory-v11',
|
|
97
|
+
'single-session-preference': 'observational-memory-v11',
|
|
98
|
+
}),
|
|
99
|
+
});
|
|
100
|
+
/**
|
|
101
|
+
* Preset: maximize-accuracy (v2).
|
|
102
|
+
*
|
|
103
|
+
* Highest-accuracy backend per category, ties broken by cost. v2
|
|
104
|
+
* (2026-04-24, post-Phase-B) routes temporal-reasoning back to
|
|
105
|
+
* canonical-hybrid after Phase B revealed:
|
|
106
|
+
* - v1 routing (TR -> Tier 2a) paid OM ingest cost for a within-CI
|
|
107
|
+
* accuracy gain (71.0% Tier 2a vs 70.2% Tier 1) on a hold-out slice;
|
|
108
|
+
* - combined with classifier misroutes the aggregate fell below the
|
|
109
|
+
* 74% acceptance floor at 73.8%.
|
|
110
|
+
* v2 keeps TR on canonical-hybrid where it's cheapest and
|
|
111
|
+
* accuracy-equivalent.
|
|
112
|
+
*
|
|
113
|
+
* Phase B measured: 75.6% [71.8, 79.2] at $0.2434/correct, 65.6s avg
|
|
114
|
+
* latency.
|
|
115
|
+
*/
|
|
116
|
+
export const MAXIMIZE_ACCURACY_TABLE = Object.freeze({
|
|
117
|
+
preset: 'maximize-accuracy',
|
|
118
|
+
defaultMapping: Object.freeze({
|
|
119
|
+
'single-session-assistant': 'canonical-hybrid',
|
|
120
|
+
'single-session-user': 'observational-memory-v11',
|
|
121
|
+
'temporal-reasoning': 'canonical-hybrid',
|
|
122
|
+
'knowledge-update': 'observational-memory-v11',
|
|
123
|
+
'multi-session': 'observational-memory-v11',
|
|
124
|
+
'single-session-preference': 'observational-memory-v11',
|
|
125
|
+
}),
|
|
126
|
+
});
|
|
127
|
+
/**
|
|
128
|
+
* Convenience registry of all three preset tables, keyed by preset name.
|
|
129
|
+
* Useful when surfacing presets through a CLI flag or config field where
|
|
130
|
+
* the preset name is a string and the consumer needs the table object.
|
|
131
|
+
*/
|
|
132
|
+
export const PRESET_TABLES = Object.freeze({
|
|
133
|
+
'minimize-cost': MINIMIZE_COST_TABLE,
|
|
134
|
+
'balanced': BALANCED_TABLE,
|
|
135
|
+
'maximize-accuracy': MAXIMIZE_ACCURACY_TABLE,
|
|
136
|
+
});
|
|
137
|
+
//# sourceMappingURL=routing-tables.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"routing-tables.js","sourceRoot":"","sources":["../../src/memory-router/routing-tables.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,+EAA+E;AAC/E,eAAe;AACf,+EAA+E;AAE/E;;;;;GAKG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,qBAAqB;IACrB,0BAA0B;IAC1B,2BAA2B;IAC3B,kBAAkB;IAClB,eAAe;IACf,oBAAoB;CACZ,CAAC;AA+CX,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAiB,MAAM,CAAC,MAAM,CAAC;IAC7D,MAAM,EAAE,eAAwB;IAChC,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC;QAC5B,0BAA0B,EAAE,kBAAkB;QAC9C,qBAAqB,EAAE,kBAAkB;QACzC,oBAAoB,EAAE,kBAAkB;QACxC,kBAAkB,EAAE,kBAAkB;QACtC,eAAe,EAAE,0BAA0B;QAC3C,2BAA2B,EAAE,0BAA0B;KACxD,CAAC;CACH,CAAiB,CAAC;AAEnB;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,MAAM,cAAc,GAAiB,MAAM,CAAC,MAAM,CAAC;IACxD,MAAM,EAAE,UAAmB;IAC3B,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC;QAC5B,0BAA0B,EAAE,kBAAkB;QAC9C,qBAAqB,EAAE,kBAAkB;QACzC,oBAAoB,EAAE,0BAA0B;QAChD,kBAAkB,EAAE,0BAA0B;QAC9C,eAAe,EAAE,0BAA0B;QAC3C,2BAA2B,EAAE,0BAA0B;KACxD,CAAC;CACH,CAAiB,CAAC;AAEnB;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAiB,MAAM,CAAC,MAAM,CAAC;IACjE,MAAM,EAAE,mBAA4B;IACpC,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC;QAC5B,0BAA0B,EAAE,kBAAkB;QAC9C,qBAAqB,EAAE,0BAA0B;QACjD,oBAAoB,EAAE,kBAAkB;QACxC,kBAAkB,EAAE,0BAA0B;QAC9C,eAAe,EAAE,0BAA0B;QAC3C,2BAA2B,EAAE,0BAA0B;KACxD,CAAC;CACH,CAAiB,CAAC;AAEnB;;;;GAIG;AACH,MAAM,CAAC,MAAM,aAAa,GACxB,MAAM,CAAC,MAAM,CAAC;IACZ,eAAe,EAAE,mBAAmB;IACpC,UAAU,EAAE,cAAc;IAC1B,mBAAmB,EAAE,uBAAuB;CAC7C,CAAC,CAAC"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file select-backend.ts
|
|
3
|
+
* @description Pure function that turns a classifier-predicted category
|
|
4
|
+
* + a {@link MemoryRouterConfig} into a {@link MemoryRoutingDecision}.
|
|
5
|
+
*
|
|
6
|
+
* Stateless. Deterministic. No I/O. Suitable for use inside hot dispatch
|
|
7
|
+
* loops and inside cache-key construction (the function's output is a
|
|
8
|
+
* pure function of its inputs).
|
|
9
|
+
*
|
|
10
|
+
* The decision carries:
|
|
11
|
+
* - the chosen {@link MemoryBackendId},
|
|
12
|
+
* - the predicted category (and optional ground-truth for telemetry),
|
|
13
|
+
* - the estimated USD cost of the routing pick,
|
|
14
|
+
* - the budget ceiling (if any) and whether the pick exceeded it,
|
|
15
|
+
* - a human-readable reason explaining the routing path taken.
|
|
16
|
+
*
|
|
17
|
+
* @module @framers/agentos/memory-router/select-backend
|
|
18
|
+
*/
|
|
19
|
+
import type { MemoryBackendCostPoint } from './backend-costs.js';
|
|
20
|
+
import type { MemoryBackendId, MemoryQueryCategory, MemoryRouterPreset, RoutingTable } from './routing-tables.js';
|
|
21
|
+
/**
|
|
22
|
+
* Budget enforcement modes:
|
|
23
|
+
* - `hard`: throw {@link MemoryRouterBudgetExceededError} if the
|
|
24
|
+
* routing-table pick exceeds the per-query USD budget. Lets callers
|
|
25
|
+
* escalate at the application layer (e.g. fall back to a reduced
|
|
26
|
+
* pipeline or surface a 402-style error).
|
|
27
|
+
* - `soft`: exceed the budget only when the picked backend has a
|
|
28
|
+
* better USD-per-correct ratio than the cheapest backend that fits.
|
|
29
|
+
* Prefers accuracy-economical overflows.
|
|
30
|
+
* - `cheapest-fallback`: silently downgrade to the cheapest backend
|
|
31
|
+
* that fits the budget. Suitable for cost-strict workloads where
|
|
32
|
+
* correctness gracefully degrades.
|
|
33
|
+
*/
|
|
34
|
+
export type MemoryBudgetMode = 'hard' | 'soft' | 'cheapest-fallback';
|
|
35
|
+
/**
|
|
36
|
+
* Configuration object for {@link selectBackend}. Bundles the routing
|
|
37
|
+
* table, cost data, and budget policy into a single value the function
|
|
38
|
+
* can reason about deterministically.
|
|
39
|
+
*/
|
|
40
|
+
export interface MemoryRouterConfig {
|
|
41
|
+
readonly table: RoutingTable;
|
|
42
|
+
readonly budgetPerQuery: number | null;
|
|
43
|
+
readonly budgetMode: MemoryBudgetMode;
|
|
44
|
+
readonly backendCosts: Readonly<Record<MemoryBackendId, MemoryBackendCostPoint>>;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Output of {@link selectBackend}. The chosen backend plus full telemetry
|
|
48
|
+
* about how the routing decision was made.
|
|
49
|
+
*/
|
|
50
|
+
export interface MemoryRoutingDecision {
|
|
51
|
+
readonly predictedCategory: MemoryQueryCategory;
|
|
52
|
+
/**
|
|
53
|
+
* Optional ground-truth category, for telemetry only. When the caller
|
|
54
|
+
* has access to gold labels (e.g. during benchmarking), passing them
|
|
55
|
+
* through here lets downstream analysis distinguish classifier
|
|
56
|
+
* misroutes from architectural misses without needing a second pass.
|
|
57
|
+
*/
|
|
58
|
+
readonly groundTruthCategory: MemoryQueryCategory | null;
|
|
59
|
+
readonly chosenBackend: MemoryBackendId;
|
|
60
|
+
readonly chosenBackendReason: string;
|
|
61
|
+
readonly estimatedCostUsd: number;
|
|
62
|
+
readonly budgetCeiling: number | null;
|
|
63
|
+
readonly budgetExceeded: boolean;
|
|
64
|
+
readonly preset: MemoryRouterPreset;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Thrown when the predicted category is not in the routing table. Should
|
|
68
|
+
* never fire with the three shipping presets (each covers all six
|
|
69
|
+
* categories) but guards custom-table misuse.
|
|
70
|
+
*/
|
|
71
|
+
export declare class MemoryRouterUnknownCategoryError extends Error {
|
|
72
|
+
readonly category: string;
|
|
73
|
+
constructor(category: string);
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Thrown by `hard` budget mode when the routing-table pick exceeds the
|
|
77
|
+
* per-query USD ceiling. Carries the picked backend + cost + budget so
|
|
78
|
+
* application-layer fallbacks can decide what to do (fall back to a
|
|
79
|
+
* different memory architecture, return a typed 402 to the user, etc).
|
|
80
|
+
*/
|
|
81
|
+
export declare class MemoryRouterBudgetExceededError extends Error {
|
|
82
|
+
readonly backend: MemoryBackendId;
|
|
83
|
+
readonly cost: number;
|
|
84
|
+
readonly budget: number;
|
|
85
|
+
constructor(backend: MemoryBackendId, cost: number, budget: number);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Pure routing decision: maps a predicted category to a backend choice
|
|
89
|
+
* given a routing table + budget policy + cost-points data.
|
|
90
|
+
*
|
|
91
|
+
* Algorithm:
|
|
92
|
+
* 1. Look up the table's preferred backend for the predicted category.
|
|
93
|
+
* Throw if missing (custom-table misuse).
|
|
94
|
+
* 2. If no budget is set, return the table's pick.
|
|
95
|
+
* 3. If the pick fits the budget, return it.
|
|
96
|
+
* 4. If the pick exceeds:
|
|
97
|
+
* - `hard`: throw {@link MemoryRouterBudgetExceededError}.
|
|
98
|
+
* - `cheapest-fallback`: pick the cheapest backend that fits;
|
|
99
|
+
* if none fits, pick the absolute cheapest and flag exceeded.
|
|
100
|
+
* - `soft`: keep the pick if its $/correct beats the cheapest fits;
|
|
101
|
+
* otherwise downgrade to the cheapest fits. Globally-no-fit case
|
|
102
|
+
* falls through to absolute-cheapest with budgetExceeded=true.
|
|
103
|
+
*
|
|
104
|
+
* @param args
|
|
105
|
+
* @param args.predictedCategory - Category predicted by the LLM-as-judge classifier.
|
|
106
|
+
* @param args.groundTruthCategory - Gold-label category for telemetry, or null in production.
|
|
107
|
+
* @param args.config - Routing table + budget policy + cost-points map.
|
|
108
|
+
*
|
|
109
|
+
* @returns A {@link MemoryRoutingDecision} describing the chosen backend.
|
|
110
|
+
*
|
|
111
|
+
* @throws {@link MemoryRouterUnknownCategoryError} when the table does not
|
|
112
|
+
* cover `predictedCategory`.
|
|
113
|
+
* @throws {@link MemoryRouterBudgetExceededError} when `budgetMode === 'hard'`
|
|
114
|
+
* and the routing-table pick exceeds the budget.
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```ts
|
|
118
|
+
* const decision = selectBackend({
|
|
119
|
+
* predictedCategory: 'multi-session',
|
|
120
|
+
* groundTruthCategory: null,
|
|
121
|
+
* config: {
|
|
122
|
+
* table: MINIMIZE_COST_TABLE,
|
|
123
|
+
* budgetPerQuery: 0.05,
|
|
124
|
+
* budgetMode: 'cheapest-fallback',
|
|
125
|
+
* backendCosts: DEFAULT_MEMORY_BACKEND_COSTS,
|
|
126
|
+
* },
|
|
127
|
+
* });
|
|
128
|
+
* console.log(decision.chosenBackend); // 'observational-memory-v11' (fits)
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
export declare function selectBackend(args: {
|
|
132
|
+
predictedCategory: MemoryQueryCategory;
|
|
133
|
+
groundTruthCategory: MemoryQueryCategory | null;
|
|
134
|
+
config: MemoryRouterConfig;
|
|
135
|
+
}): MemoryRoutingDecision;
|
|
136
|
+
//# sourceMappingURL=select-backend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select-backend.d.ts","sourceRoot":"","sources":["../../src/memory-router/select-backend.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,KAAK,EACV,eAAe,EACf,mBAAmB,EACnB,kBAAkB,EAClB,YAAY,EACb,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;GAYG;AACH,MAAM,MAAM,gBAAgB,GAAG,MAAM,GAAG,MAAM,GAAG,mBAAmB,CAAC;AAErE;;;;GAIG;AACH,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,KAAK,EAAE,YAAY,CAAC;IAC7B,QAAQ,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,QAAQ,CAAC,UAAU,EAAE,gBAAgB,CAAC;IACtC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,eAAe,EAAE,sBAAsB,CAAC,CAAC,CAAC;CAClF;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,iBAAiB,EAAE,mBAAmB,CAAC;IAChD;;;;;OAKG;IACH,QAAQ,CAAC,mBAAmB,EAAE,mBAAmB,GAAG,IAAI,CAAC;IACzD,QAAQ,CAAC,aAAa,EAAE,eAAe,CAAC;IACxC,QAAQ,CAAC,mBAAmB,EAAE,MAAM,CAAC;IACrC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,QAAQ,CAAC,MAAM,EAAE,kBAAkB,CAAC;CACrC;AAED;;;;GAIG;AACH,qBAAa,gCAAiC,SAAQ,KAAK;aAC7B,QAAQ,EAAE,MAAM;gBAAhB,QAAQ,EAAE,MAAM;CAI7C;AAED;;;;;GAKG;AACH,qBAAa,+BAAgC,SAAQ,KAAK;aAEtC,OAAO,EAAE,eAAe;aACxB,IAAI,EAAE,MAAM;aACZ,MAAM,EAAE,MAAM;gBAFd,OAAO,EAAE,eAAe,EACxB,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM;CAQjC;AAOD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE;IAClC,iBAAiB,EAAE,mBAAmB,CAAC;IACvC,mBAAmB,EAAE,mBAAmB,GAAG,IAAI,CAAC;IAChD,MAAM,EAAE,kBAAkB,CAAC;CAC5B,GAAG,qBAAqB,CA6IxB"}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file select-backend.ts
|
|
3
|
+
* @description Pure function that turns a classifier-predicted category
|
|
4
|
+
* + a {@link MemoryRouterConfig} into a {@link MemoryRoutingDecision}.
|
|
5
|
+
*
|
|
6
|
+
* Stateless. Deterministic. No I/O. Suitable for use inside hot dispatch
|
|
7
|
+
* loops and inside cache-key construction (the function's output is a
|
|
8
|
+
* pure function of its inputs).
|
|
9
|
+
*
|
|
10
|
+
* The decision carries:
|
|
11
|
+
* - the chosen {@link MemoryBackendId},
|
|
12
|
+
* - the predicted category (and optional ground-truth for telemetry),
|
|
13
|
+
* - the estimated USD cost of the routing pick,
|
|
14
|
+
* - the budget ceiling (if any) and whether the pick exceeded it,
|
|
15
|
+
* - a human-readable reason explaining the routing path taken.
|
|
16
|
+
*
|
|
17
|
+
* @module @framers/agentos/memory-router/select-backend
|
|
18
|
+
*/
|
|
19
|
+
/**
|
|
20
|
+
* Thrown when the predicted category is not in the routing table. Should
|
|
21
|
+
* never fire with the three shipping presets (each covers all six
|
|
22
|
+
* categories) but guards custom-table misuse.
|
|
23
|
+
*/
|
|
24
|
+
export class MemoryRouterUnknownCategoryError extends Error {
|
|
25
|
+
constructor(category) {
|
|
26
|
+
super(`MemoryRouter: category '${category}' not in routing table`);
|
|
27
|
+
this.category = category;
|
|
28
|
+
this.name = 'MemoryRouterUnknownCategoryError';
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Thrown by `hard` budget mode when the routing-table pick exceeds the
|
|
33
|
+
* per-query USD ceiling. Carries the picked backend + cost + budget so
|
|
34
|
+
* application-layer fallbacks can decide what to do (fall back to a
|
|
35
|
+
* different memory architecture, return a typed 402 to the user, etc).
|
|
36
|
+
*/
|
|
37
|
+
export class MemoryRouterBudgetExceededError extends Error {
|
|
38
|
+
constructor(backend, cost, budget) {
|
|
39
|
+
super(`MemoryRouter: backend '${backend}' cost $${cost.toFixed(4)} ` +
|
|
40
|
+
`exceeds hard budget $${budget.toFixed(4)}`);
|
|
41
|
+
this.backend = backend;
|
|
42
|
+
this.cost = cost;
|
|
43
|
+
this.budget = budget;
|
|
44
|
+
this.name = 'MemoryRouterBudgetExceededError';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Pure routing decision: maps a predicted category to a backend choice
|
|
49
|
+
* given a routing table + budget policy + cost-points data.
|
|
50
|
+
*
|
|
51
|
+
* Algorithm:
|
|
52
|
+
* 1. Look up the table's preferred backend for the predicted category.
|
|
53
|
+
* Throw if missing (custom-table misuse).
|
|
54
|
+
* 2. If no budget is set, return the table's pick.
|
|
55
|
+
* 3. If the pick fits the budget, return it.
|
|
56
|
+
* 4. If the pick exceeds:
|
|
57
|
+
* - `hard`: throw {@link MemoryRouterBudgetExceededError}.
|
|
58
|
+
* - `cheapest-fallback`: pick the cheapest backend that fits;
|
|
59
|
+
* if none fits, pick the absolute cheapest and flag exceeded.
|
|
60
|
+
* - `soft`: keep the pick if its $/correct beats the cheapest fits;
|
|
61
|
+
* otherwise downgrade to the cheapest fits. Globally-no-fit case
|
|
62
|
+
* falls through to absolute-cheapest with budgetExceeded=true.
|
|
63
|
+
*
|
|
64
|
+
* @param args
|
|
65
|
+
* @param args.predictedCategory - Category predicted by the LLM-as-judge classifier.
|
|
66
|
+
* @param args.groundTruthCategory - Gold-label category for telemetry, or null in production.
|
|
67
|
+
* @param args.config - Routing table + budget policy + cost-points map.
|
|
68
|
+
*
|
|
69
|
+
* @returns A {@link MemoryRoutingDecision} describing the chosen backend.
|
|
70
|
+
*
|
|
71
|
+
* @throws {@link MemoryRouterUnknownCategoryError} when the table does not
|
|
72
|
+
* cover `predictedCategory`.
|
|
73
|
+
* @throws {@link MemoryRouterBudgetExceededError} when `budgetMode === 'hard'`
|
|
74
|
+
* and the routing-table pick exceeds the budget.
|
|
75
|
+
*
|
|
76
|
+
* @example
|
|
77
|
+
* ```ts
|
|
78
|
+
* const decision = selectBackend({
|
|
79
|
+
* predictedCategory: 'multi-session',
|
|
80
|
+
* groundTruthCategory: null,
|
|
81
|
+
* config: {
|
|
82
|
+
* table: MINIMIZE_COST_TABLE,
|
|
83
|
+
* budgetPerQuery: 0.05,
|
|
84
|
+
* budgetMode: 'cheapest-fallback',
|
|
85
|
+
* backendCosts: DEFAULT_MEMORY_BACKEND_COSTS,
|
|
86
|
+
* },
|
|
87
|
+
* });
|
|
88
|
+
* console.log(decision.chosenBackend); // 'observational-memory-v11' (fits)
|
|
89
|
+
* ```
|
|
90
|
+
*/
|
|
91
|
+
export function selectBackend(args) {
|
|
92
|
+
const { predictedCategory, groundTruthCategory, config } = args;
|
|
93
|
+
const { table, budgetPerQuery, budgetMode, backendCosts } = config;
|
|
94
|
+
// 1. Validate: category must be in routing table.
|
|
95
|
+
const picked = table.defaultMapping[predictedCategory];
|
|
96
|
+
if (!picked) {
|
|
97
|
+
throw new MemoryRouterUnknownCategoryError(predictedCategory);
|
|
98
|
+
}
|
|
99
|
+
// 2. Compute per-query cost for the picked backend on this category.
|
|
100
|
+
const pickedCost = backendCosts[picked].perCategoryCostPerQuery[predictedCategory];
|
|
101
|
+
// 3. Budget pass-through: no budget OR pick fits.
|
|
102
|
+
if (budgetPerQuery === null || pickedCost <= budgetPerQuery) {
|
|
103
|
+
return {
|
|
104
|
+
predictedCategory,
|
|
105
|
+
groundTruthCategory,
|
|
106
|
+
chosenBackend: picked,
|
|
107
|
+
chosenBackendReason: budgetPerQuery === null
|
|
108
|
+
? 'routing-table pick, no budget'
|
|
109
|
+
: 'routing-table pick fits budget',
|
|
110
|
+
estimatedCostUsd: pickedCost,
|
|
111
|
+
budgetCeiling: budgetPerQuery,
|
|
112
|
+
budgetExceeded: false,
|
|
113
|
+
preset: table.preset,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// 4. Budget exceeded. Hard mode bails immediately.
|
|
117
|
+
if (budgetMode === 'hard') {
|
|
118
|
+
throw new MemoryRouterBudgetExceededError(picked, pickedCost, budgetPerQuery);
|
|
119
|
+
}
|
|
120
|
+
// Find the cheapest backend that fits the budget on this category.
|
|
121
|
+
const candidates = Object.values(backendCosts).map((c) => ({
|
|
122
|
+
backend: c.backend,
|
|
123
|
+
cost: c.perCategoryCostPerQuery[predictedCategory],
|
|
124
|
+
}));
|
|
125
|
+
const fits = candidates.filter((c) => c.cost <= budgetPerQuery);
|
|
126
|
+
const cheapestFits = fits.length > 0
|
|
127
|
+
? fits.reduce((a, b) => (a.cost <= b.cost ? a : b))
|
|
128
|
+
: null;
|
|
129
|
+
// No backend fits at all -> globally cheapest with budgetExceeded=true.
|
|
130
|
+
if (!cheapestFits) {
|
|
131
|
+
const globallyCheapest = candidates.reduce((a, b) => a.cost <= b.cost ? a : b);
|
|
132
|
+
return {
|
|
133
|
+
predictedCategory,
|
|
134
|
+
groundTruthCategory,
|
|
135
|
+
chosenBackend: globallyCheapest.backend,
|
|
136
|
+
chosenBackendReason: 'no backend fits budget; picking absolute cheapest',
|
|
137
|
+
estimatedCostUsd: globallyCheapest.cost,
|
|
138
|
+
budgetCeiling: budgetPerQuery,
|
|
139
|
+
budgetExceeded: true,
|
|
140
|
+
preset: table.preset,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
// cheapest-fallback: silently downgrade.
|
|
144
|
+
if (budgetMode === 'cheapest-fallback') {
|
|
145
|
+
return {
|
|
146
|
+
predictedCategory,
|
|
147
|
+
groundTruthCategory,
|
|
148
|
+
chosenBackend: cheapestFits.backend,
|
|
149
|
+
chosenBackendReason: 'budget downgrade (cheapest-fallback mode)',
|
|
150
|
+
estimatedCostUsd: cheapestFits.cost,
|
|
151
|
+
budgetCeiling: budgetPerQuery,
|
|
152
|
+
budgetExceeded: false,
|
|
153
|
+
preset: table.preset,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
// soft: keep the pick only if it's better $/correct than cheapest-fits.
|
|
157
|
+
const pickedAcc = backendCosts[picked].perCategoryAccuracy[predictedCategory];
|
|
158
|
+
const cheapestAcc = backendCosts[cheapestFits.backend].perCategoryAccuracy[predictedCategory];
|
|
159
|
+
// Edge case: picked has zero accuracy -> always downgrade.
|
|
160
|
+
if (pickedAcc === 0) {
|
|
161
|
+
return {
|
|
162
|
+
predictedCategory,
|
|
163
|
+
groundTruthCategory,
|
|
164
|
+
chosenBackend: cheapestFits.backend,
|
|
165
|
+
chosenBackendReason: 'soft budget downgrade: picked has 0 acc on category',
|
|
166
|
+
estimatedCostUsd: cheapestFits.cost,
|
|
167
|
+
budgetCeiling: budgetPerQuery,
|
|
168
|
+
budgetExceeded: false,
|
|
169
|
+
preset: table.preset,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
// Edge case: cheapest has zero accuracy -> stay with picked even though exceeded.
|
|
173
|
+
if (cheapestAcc === 0) {
|
|
174
|
+
return {
|
|
175
|
+
predictedCategory,
|
|
176
|
+
groundTruthCategory,
|
|
177
|
+
chosenBackend: picked,
|
|
178
|
+
chosenBackendReason: 'soft exceed: cheapest has 0 acc',
|
|
179
|
+
estimatedCostUsd: pickedCost,
|
|
180
|
+
budgetCeiling: budgetPerQuery,
|
|
181
|
+
budgetExceeded: true,
|
|
182
|
+
preset: table.preset,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
const pickedCPC = pickedCost / pickedAcc;
|
|
186
|
+
const cheapestCPC = cheapestFits.cost / cheapestAcc;
|
|
187
|
+
if (pickedCPC <= cheapestCPC) {
|
|
188
|
+
return {
|
|
189
|
+
predictedCategory,
|
|
190
|
+
groundTruthCategory,
|
|
191
|
+
chosenBackend: picked,
|
|
192
|
+
chosenBackendReason: 'soft exceed: better $/correct',
|
|
193
|
+
estimatedCostUsd: pickedCost,
|
|
194
|
+
budgetCeiling: budgetPerQuery,
|
|
195
|
+
budgetExceeded: true,
|
|
196
|
+
preset: table.preset,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
return {
|
|
200
|
+
predictedCategory,
|
|
201
|
+
groundTruthCategory,
|
|
202
|
+
chosenBackend: cheapestFits.backend,
|
|
203
|
+
chosenBackendReason: 'soft budget downgrade: cheaper $/correct',
|
|
204
|
+
estimatedCostUsd: cheapestFits.cost,
|
|
205
|
+
budgetCeiling: budgetPerQuery,
|
|
206
|
+
budgetExceeded: false,
|
|
207
|
+
preset: table.preset,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
//# sourceMappingURL=select-backend.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select-backend.js","sourceRoot":"","sources":["../../src/memory-router/select-backend.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AA0DH;;;;GAIG;AACH,MAAM,OAAO,gCAAiC,SAAQ,KAAK;IACzD,YAA4B,QAAgB;QAC1C,KAAK,CAAC,2BAA2B,QAAQ,wBAAwB,CAAC,CAAC;QADzC,aAAQ,GAAR,QAAQ,CAAQ;QAE1C,IAAI,CAAC,IAAI,GAAG,kCAAkC,CAAC;IACjD,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,+BAAgC,SAAQ,KAAK;IACxD,YACkB,OAAwB,EACxB,IAAY,EACZ,MAAc;QAE9B,KAAK,CACH,0BAA0B,OAAO,WAAW,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YAC5D,wBAAwB,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC9C,CAAC;QAPc,YAAO,GAAP,OAAO,CAAiB;QACxB,SAAI,GAAJ,IAAI,CAAQ;QACZ,WAAM,GAAN,MAAM,CAAQ;QAM9B,IAAI,CAAC,IAAI,GAAG,iCAAiC,CAAC;IAChD,CAAC;CACF;AAOD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,MAAM,UAAU,aAAa,CAAC,IAI7B;IACC,MAAM,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAChE,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,YAAY,EAAE,GAAG,MAAM,CAAC;IAEnE,kDAAkD;IAClD,MAAM,MAAM,GAAG,KAAK,CAAC,cAAc,CAAC,iBAAiB,CAExC,CAAC;IACd,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,gCAAgC,CAAC,iBAAiB,CAAC,CAAC;IAChE,CAAC;IAED,qEAAqE;IACrE,MAAM,UAAU,GACd,YAAY,CAAC,MAAM,CAAC,CAAC,uBAAuB,CAAC,iBAAiB,CAAC,CAAC;IAElE,kDAAkD;IAClD,IAAI,cAAc,KAAK,IAAI,IAAI,UAAU,IAAI,cAAc,EAAE,CAAC;QAC5D,OAAO;YACL,iBAAiB;YACjB,mBAAmB;YACnB,aAAa,EAAE,MAAM;YACrB,mBAAmB,EACjB,cAAc,KAAK,IAAI;gBACrB,CAAC,CAAC,+BAA+B;gBACjC,CAAC,CAAC,gCAAgC;YACtC,gBAAgB,EAAE,UAAU;YAC5B,aAAa,EAAE,cAAc;YAC7B,cAAc,EAAE,KAAK;YACrB,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC;IACJ,CAAC;IAED,mDAAmD;IACnD,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,+BAA+B,CAAC,MAAM,EAAE,UAAU,EAAE,cAAc,CAAC,CAAC;IAChF,CAAC;IAED,mEAAmE;IACnE,MAAM,UAAU,GACd,MAAM,CAAC,MAAM,CAAC,YAAY,CAC3B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACZ,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,IAAI,EAAE,CAAC,CAAC,uBAAuB,CAAC,iBAAiB,CAAC;KACnD,CAAC,CAAC,CAAC;IACJ,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,cAAc,CAAC,CAAC;IAChE,MAAM,YAAY,GAChB,IAAI,CAAC,MAAM,GAAG,CAAC;QACb,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACnD,CAAC,CAAC,IAAI,CAAC;IAEX,wEAAwE;IACxE,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,MAAM,gBAAgB,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClD,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACzB,CAAC;QACF,OAAO;YACL,iBAAiB;YACjB,mBAAmB;YACnB,aAAa,EAAE,gBAAgB,CAAC,OAAO;YACvC,mBAAmB,EAAE,mDAAmD;YACxE,gBAAgB,EAAE,gBAAgB,CAAC,IAAI;YACvC,aAAa,EAAE,cAAc;YAC7B,cAAc,EAAE,IAAI;YACpB,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC;IACJ,CAAC;IAED,yCAAyC;IACzC,IAAI,UAAU,KAAK,mBAAmB,EAAE,CAAC;QACvC,OAAO;YACL,iBAAiB;YACjB,mBAAmB;YACnB,aAAa,EAAE,YAAY,CAAC,OAAO;YACnC,mBAAmB,EAAE,2CAA2C;YAChE,gBAAgB,EAAE,YAAY,CAAC,IAAI;YACnC,aAAa,EAAE,cAAc;YAC7B,cAAc,EAAE,KAAK;YACrB,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC;IACJ,CAAC;IAED,wEAAwE;IACxE,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,iBAAiB,CAAC,CAAC;IAC9E,MAAM,WAAW,GACf,YAAY,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,mBAAmB,CAAC,iBAAiB,CAAC,CAAC;IAE5E,2DAA2D;IAC3D,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QACpB,OAAO;YACL,iBAAiB;YACjB,mBAAmB;YACnB,aAAa,EAAE,YAAY,CAAC,OAAO;YACnC,mBAAmB,EAAE,qDAAqD;YAC1E,gBAAgB,EAAE,YAAY,CAAC,IAAI;YACnC,aAAa,EAAE,cAAc;YAC7B,cAAc,EAAE,KAAK;YACrB,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC;IACJ,CAAC;IAED,kFAAkF;IAClF,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,iBAAiB;YACjB,mBAAmB;YACnB,aAAa,EAAE,MAAM;YACrB,mBAAmB,EAAE,iCAAiC;YACtD,gBAAgB,EAAE,UAAU;YAC5B,aAAa,EAAE,cAAc;YAC7B,cAAc,EAAE,IAAI;YACpB,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,UAAU,GAAG,SAAS,CAAC;IACzC,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,GAAG,WAAW,CAAC;IAEpD,IAAI,SAAS,IAAI,WAAW,EAAE,CAAC;QAC7B,OAAO;YACL,iBAAiB;YACjB,mBAAmB;YACnB,aAAa,EAAE,MAAM;YACrB,mBAAmB,EAAE,+BAA+B;YACpD,gBAAgB,EAAE,UAAU;YAC5B,aAAa,EAAE,cAAc;YAC7B,cAAc,EAAE,IAAI;YACpB,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC;IACJ,CAAC;IAED,OAAO;QACL,iBAAiB;QACjB,mBAAmB;QACnB,aAAa,EAAE,YAAY,CAAC,OAAO;QACnC,mBAAmB,EAAE,0CAA0C;QAC/D,gBAAgB,EAAE,YAAY,CAAC,IAAI;QACnC,aAAa,EAAE,cAAc;QAC7B,cAAc,EAAE,KAAK;QACrB,MAAM,EAAE,KAAK,CAAC,MAAM;KACrB,CAAC;AACJ,CAAC"}
|