pi-model-auto 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +10 -3
- package/src/index.ts +0 -1
- package/src/router-core.ts +12 -35
package/README.md
CHANGED
|
@@ -123,6 +123,8 @@ Quality comes from one benchmark table. Cost starts from the same table, then ap
|
|
|
123
123
|
|
|
124
124
|
The numeric tables live in [`src/canonical-models.ts`](src/canonical-models.ts). The two sources are not mixed.
|
|
125
125
|
|
|
126
|
+
Task difficulty is judged from the request itself — context size, prompt length, keywords, and tool activity — never from your thinking level. Your thinking level (`low`/`medium`/`high`/`xhigh`) controls only how deeply the *chosen* model reasons; it does not change which model is chosen. So leaving thinking on `high` out of habit won't silently push every turn to the most expensive model. When you know a task is harder than it looks, say so in the prompt (or pin with `@strong`).
|
|
127
|
+
|
|
126
128
|
One user turn keeps one model, including tool-call continuations. Automatic routing also avoids quota-cooled plans and avoids switching away from a useful warm cache when the switch is not worth it.
|
|
127
129
|
|
|
128
130
|
## Settings
|
|
@@ -136,7 +138,6 @@ One user turn keeps one model, including tool-call continuations. Automatic rout
|
|
|
136
138
|
| `willingness` | Control how far each difficulty climbs toward stronger models. |
|
|
137
139
|
| `cacheAware` | Keep warm prompt caches when switching is not worth it. Enabled by default. |
|
|
138
140
|
| `quota` | Skip cooled-down plans after rate-limit headers or `429`. Enabled by default. |
|
|
139
|
-
| `forceStrongOnHighReasoning` | Send `high` or `xhigh` reasoning to the top of the frontier. |
|
|
140
141
|
| `weights` | Difficulty-scoring weights. Advanced. |
|
|
141
142
|
| `log` | Append routing decisions to `.pi/router.log`. |
|
|
142
143
|
|
package/package.json
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-model-auto",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "Pi extension package that routes each turn to cheap or strong authenticated models.",
|
|
5
5
|
"type": "module",
|
|
6
|
-
"keywords": [
|
|
6
|
+
"keywords": [
|
|
7
|
+
"pi-package",
|
|
8
|
+
"pi",
|
|
9
|
+
"extension",
|
|
10
|
+
"model-router"
|
|
11
|
+
],
|
|
7
12
|
"license": "MIT",
|
|
8
13
|
"homepage": "https://github.com/maynewong/pi-model-auto#readme",
|
|
9
14
|
"repository": {
|
|
@@ -22,7 +27,9 @@
|
|
|
22
27
|
"LICENSE"
|
|
23
28
|
],
|
|
24
29
|
"pi": {
|
|
25
|
-
"extensions": [
|
|
30
|
+
"extensions": [
|
|
31
|
+
"./src/index.ts"
|
|
32
|
+
]
|
|
26
33
|
},
|
|
27
34
|
"peerDependencies": {
|
|
28
35
|
"@earendil-works/pi-ai": "*",
|
package/src/index.ts
CHANGED
|
@@ -405,7 +405,6 @@ function describeRouter(
|
|
|
405
405
|
"Pi Router",
|
|
406
406
|
`capabilitySource: ${cfg.capabilitySource}`,
|
|
407
407
|
`cacheAware: ${cfg.cacheAware.enabled}`,
|
|
408
|
-
`forceStrongOnHighReasoning: ${cfg.forceStrongOnHighReasoning}`,
|
|
409
408
|
`modelFilter: include=[${cfg.modelFilter.include.join(", ") || "*"}] exclude=[${cfg.modelFilter.exclude.join(", ") || "none"}]`,
|
|
410
409
|
`quota: ${cfg.quota.enabled ? "enabled" : "disabled"}`,
|
|
411
410
|
`cheapPool: ${pool.cheapPool.map((item) => modelKey(item.model)).join(", ") || "none"}`,
|
package/src/router-core.ts
CHANGED
|
@@ -101,7 +101,6 @@ export interface RouterConfig {
|
|
|
101
101
|
contextTokens: number;
|
|
102
102
|
lastUserLen: number;
|
|
103
103
|
keyword: number;
|
|
104
|
-
reasoning: number;
|
|
105
104
|
toolDensity: number;
|
|
106
105
|
};
|
|
107
106
|
log: boolean;
|
|
@@ -110,12 +109,11 @@ export interface RouterConfig {
|
|
|
110
109
|
modelFilter: ModelFilter;
|
|
111
110
|
/** User-supplied metadata for unknown/private/local models. Keys may be provider/id, model id, or normalized model id. */
|
|
112
111
|
modelOverrides: Record<string, ModelOverride>;
|
|
113
|
-
forceStrongOnHighReasoning: boolean;
|
|
114
112
|
/**
|
|
115
113
|
* Willingness to pay for capability, by task hardness: the max extra list-price ($/1M) spent for
|
|
116
114
|
* one more point of quality on the chosen axis. Selection walks the Pareto frontier from the
|
|
117
115
|
* cheapest point upward, taking each step whose marginal $/quality-point is within budget — so the
|
|
118
|
-
* hardness signal (driven by
|
|
116
|
+
* hardness signal (driven by task content) positions us on the frontier and steep low-value
|
|
119
117
|
* steps (a near-tie flagship at 2× price) are only taken at `max`. The single routing knob, axis-
|
|
120
118
|
* agnostic. Raise a row to climb further for that hardness; `max: Infinity` = "top of frontier".
|
|
121
119
|
*/
|
|
@@ -206,17 +204,15 @@ export const DEFAULT_CONFIG: RouterConfig = {
|
|
|
206
204
|
capabilitySource: "ramp",
|
|
207
205
|
threshold: 0.45,
|
|
208
206
|
weights: {
|
|
209
|
-
contextTokens: 0.
|
|
210
|
-
lastUserLen: 0.
|
|
211
|
-
keyword: 0.
|
|
212
|
-
reasoning: 0.15,
|
|
207
|
+
contextTokens: 0.3,
|
|
208
|
+
lastUserLen: 0.18,
|
|
209
|
+
keyword: 0.42,
|
|
213
210
|
toolDensity: 0.1,
|
|
214
211
|
},
|
|
215
212
|
log: false,
|
|
216
213
|
tierModels: {},
|
|
217
214
|
modelFilter: { include: [], exclude: [] },
|
|
218
215
|
modelOverrides: {},
|
|
219
|
-
forceStrongOnHighReasoning: false,
|
|
220
216
|
willingness: RAMP_WILLINGNESS,
|
|
221
217
|
cacheAware: {
|
|
222
218
|
enabled: true,
|
|
@@ -490,8 +486,8 @@ export function decide(
|
|
|
490
486
|
};
|
|
491
487
|
}
|
|
492
488
|
|
|
493
|
-
const score = classify(context,
|
|
494
|
-
const hardnessBucket = autoHardnessBucket(score
|
|
489
|
+
const score = classify(context, cfg);
|
|
490
|
+
const hardnessBucket = autoHardnessBucket(score);
|
|
495
491
|
return {
|
|
496
492
|
cls: hardnessBucket >= 2 ? "strong" : "cheap",
|
|
497
493
|
score,
|
|
@@ -504,38 +500,22 @@ export function decide(
|
|
|
504
500
|
/**
|
|
505
501
|
* Continuous task-hardness bucket (index into HARDNESS_ORDER) for auto mode. The bucket — not a
|
|
506
502
|
* binary cheap/strong split — drives the capability floor, so the whole frontier (incl. mid-tier
|
|
507
|
-
* models) becomes reachable.
|
|
503
|
+
* models) becomes reachable. Driven purely by task content: the thinking level is a passthrough that
|
|
504
|
+
* controls how deeply the *chosen* model reasons, never which model is chosen.
|
|
508
505
|
*/
|
|
509
|
-
export function autoHardnessBucket(score: number
|
|
510
|
-
|
|
511
|
-
const reasoningBucket = reasoningFloorBucket(options?.reasoning);
|
|
512
|
-
return Math.max(scoreBucket, reasoningBucket);
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
function reasoningFloorBucket(reasoning: SimpleStreamOptions["reasoning"] | undefined): number {
|
|
516
|
-
switch (reasoning) {
|
|
517
|
-
case "medium":
|
|
518
|
-
return 1;
|
|
519
|
-
case "high":
|
|
520
|
-
return 2;
|
|
521
|
-
case "xhigh":
|
|
522
|
-
return 3;
|
|
523
|
-
default:
|
|
524
|
-
return 0; // off / low
|
|
525
|
-
}
|
|
506
|
+
export function autoHardnessBucket(score: number): number {
|
|
507
|
+
return score < 0.3 ? 0 : score < 0.52 ? 1 : score < 0.74 ? 2 : 3;
|
|
526
508
|
}
|
|
527
509
|
|
|
528
|
-
export function classify(context: Context,
|
|
510
|
+
export function classify(context: Context, cfg: RouterConfig): number {
|
|
529
511
|
const text = lastUserText(context).toLowerCase();
|
|
530
512
|
const contextTokens = estimateContextTokens(context);
|
|
531
|
-
const reasoning = options?.reasoning && ["medium", "high", "xhigh"].includes(options.reasoning) ? 1 : 0;
|
|
532
513
|
const toolDensity = Math.min(1, countRecentToolResults(context) / 8);
|
|
533
514
|
|
|
534
515
|
const raw =
|
|
535
516
|
normalize(contextTokens, 8_000, 120_000) * cfg.weights.contextTokens +
|
|
536
517
|
normalize(text.length, 120, 1_200) * cfg.weights.lastUserLen +
|
|
537
518
|
keywordScore(text) * cfg.weights.keyword +
|
|
538
|
-
reasoning * cfg.weights.reasoning +
|
|
539
519
|
toolDensity * cfg.weights.toolDensity;
|
|
540
520
|
|
|
541
521
|
return Math.max(0, Math.min(1, raw));
|
|
@@ -641,10 +621,7 @@ export function selectFromPool(
|
|
|
641
621
|
const { eligible, overflow } = eligibleModels(pool, context);
|
|
642
622
|
if (eligible.length === 0) return undefined;
|
|
643
623
|
|
|
644
|
-
|
|
645
|
-
if (cfg.forceStrongOnHighReasoning && (options?.reasoning === "high" || options?.reasoning === "xhigh")) {
|
|
646
|
-
bucket = HARDNESS_ORDER.length - 1;
|
|
647
|
-
}
|
|
624
|
+
const bucket = decision.hardnessBucket;
|
|
648
625
|
const hardness = HARDNESS_ORDER[Math.max(0, Math.min(HARDNESS_ORDER.length - 1, bucket))];
|
|
649
626
|
|
|
650
627
|
// `fast` is orthogonal: gate on a low capability floor, then maximize throughput.
|