pi-model-auto 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -123,6 +123,8 @@ Quality comes from one benchmark table. Cost starts from the same table, then ap
123
123
 
124
124
  The numeric tables live in [`src/canonical-models.ts`](src/canonical-models.ts). The two sources are not mixed.
125
125
 
126
+ Task difficulty is judged from the request itself — context size, prompt length, keywords, and tool activity — never from your thinking level. Your thinking level (`low`/`medium`/`high`/`xhigh`) controls only how deeply the *chosen* model reasons; it does not change which model is chosen. So leaving thinking on `high` out of habit won't silently push every turn to the most expensive model. When you know a task is harder than it looks, say so in the prompt (or pin with `@strong`).
127
+
126
128
  One user turn keeps one model, including tool-call continuations. Automatic routing also avoids quota-cooled plans and avoids switching away from a useful warm cache when the switch is not worth it.
127
129
 
128
130
  ## Settings
@@ -136,7 +138,6 @@ One user turn keeps one model, including tool-call continuations. Automatic rout
136
138
  | `willingness` | Control how far each difficulty climbs toward stronger models. |
137
139
  | `cacheAware` | Keep warm prompt caches when switching is not worth it. Enabled by default. |
138
140
  | `quota` | Skip cooled-down plans after rate-limit headers or `429`. Enabled by default. |
139
- | `forceStrongOnHighReasoning` | Send `high` or `xhigh` reasoning to the top of the frontier. |
140
141
  | `weights` | Difficulty-scoring weights. Advanced. |
141
142
  | `log` | Append routing decisions to `.pi/router.log`. |
142
143
 
package/package.json CHANGED
@@ -1,9 +1,14 @@
1
1
  {
2
2
  "name": "pi-model-auto",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Pi extension package that routes each turn to cheap or strong authenticated models.",
5
5
  "type": "module",
6
- "keywords": ["pi-package", "pi", "extension", "model-router"],
6
+ "keywords": [
7
+ "pi-package",
8
+ "pi",
9
+ "extension",
10
+ "model-router"
11
+ ],
7
12
  "license": "MIT",
8
13
  "homepage": "https://github.com/maynewong/pi-model-auto#readme",
9
14
  "repository": {
@@ -22,7 +27,9 @@
22
27
  "LICENSE"
23
28
  ],
24
29
  "pi": {
25
- "extensions": ["./src/index.ts"]
30
+ "extensions": [
31
+ "./src/index.ts"
32
+ ]
26
33
  },
27
34
  "peerDependencies": {
28
35
  "@earendil-works/pi-ai": "*",
package/src/index.ts CHANGED
@@ -405,7 +405,6 @@ function describeRouter(
405
405
  "Pi Router",
406
406
  `capabilitySource: ${cfg.capabilitySource}`,
407
407
  `cacheAware: ${cfg.cacheAware.enabled}`,
408
- `forceStrongOnHighReasoning: ${cfg.forceStrongOnHighReasoning}`,
409
408
  `modelFilter: include=[${cfg.modelFilter.include.join(", ") || "*"}] exclude=[${cfg.modelFilter.exclude.join(", ") || "none"}]`,
410
409
  `quota: ${cfg.quota.enabled ? "enabled" : "disabled"}`,
411
410
  `cheapPool: ${pool.cheapPool.map((item) => modelKey(item.model)).join(", ") || "none"}`,
@@ -101,7 +101,6 @@ export interface RouterConfig {
101
101
  contextTokens: number;
102
102
  lastUserLen: number;
103
103
  keyword: number;
104
- reasoning: number;
105
104
  toolDensity: number;
106
105
  };
107
106
  log: boolean;
@@ -110,12 +109,11 @@ export interface RouterConfig {
110
109
  modelFilter: ModelFilter;
111
110
  /** User-supplied metadata for unknown/private/local models. Keys may be provider/id, model id, or normalized model id. */
112
111
  modelOverrides: Record<string, ModelOverride>;
113
- forceStrongOnHighReasoning: boolean;
114
112
  /**
115
113
  * Willingness to pay for capability, by task hardness: the max extra list-price ($/1M) spent for
116
114
  * one more point of quality on the chosen axis. Selection walks the Pareto frontier from the
117
115
  * cheapest point upward, taking each step whose marginal $/quality-point is within budget — so the
118
- * hardness signal (driven by reasoning level) positions us on the frontier and steep low-value
116
+ * hardness signal (driven by task content) positions us on the frontier and steep low-value
119
117
  * steps (a near-tie flagship at 2× price) are only taken at `max`. The single routing knob, axis-
120
118
  * agnostic. Raise a row to climb further for that hardness; `max: Infinity` = "top of frontier".
121
119
  */
@@ -206,17 +204,15 @@ export const DEFAULT_CONFIG: RouterConfig = {
206
204
  capabilitySource: "ramp",
207
205
  threshold: 0.45,
208
206
  weights: {
209
- contextTokens: 0.25,
210
- lastUserLen: 0.15,
211
- keyword: 0.35,
212
- reasoning: 0.15,
207
+ contextTokens: 0.3,
208
+ lastUserLen: 0.18,
209
+ keyword: 0.42,
213
210
  toolDensity: 0.1,
214
211
  },
215
212
  log: false,
216
213
  tierModels: {},
217
214
  modelFilter: { include: [], exclude: [] },
218
215
  modelOverrides: {},
219
- forceStrongOnHighReasoning: false,
220
216
  willingness: RAMP_WILLINGNESS,
221
217
  cacheAware: {
222
218
  enabled: true,
@@ -490,8 +486,8 @@ export function decide(
490
486
  };
491
487
  }
492
488
 
493
- const score = classify(context, options, cfg);
494
- const hardnessBucket = autoHardnessBucket(score, options);
489
+ const score = classify(context, cfg);
490
+ const hardnessBucket = autoHardnessBucket(score);
495
491
  return {
496
492
  cls: hardnessBucket >= 2 ? "strong" : "cheap",
497
493
  score,
@@ -504,38 +500,22 @@ export function decide(
504
500
  /**
505
501
  * Continuous task-hardness bucket (index into HARDNESS_ORDER) for auto mode. The bucket — not a
506
502
  * binary cheap/strong split — drives the capability floor, so the whole frontier (incl. mid-tier
507
- * models) becomes reachable. Reasoning level is an explicit floor *guarantee*: it can only raise it.
503
+ * models) becomes reachable. Driven purely by task content: the thinking level is a passthrough that
504
+ * controls how deeply the *chosen* model reasons, never which model is chosen.
508
505
  */
509
- export function autoHardnessBucket(score: number, options: SimpleStreamOptions | undefined): number {
510
- const scoreBucket = score < 0.30 ? 0 : score < 0.52 ? 1 : score < 0.74 ? 2 : 3;
511
- const reasoningBucket = reasoningFloorBucket(options?.reasoning);
512
- return Math.max(scoreBucket, reasoningBucket);
513
- }
514
-
515
- function reasoningFloorBucket(reasoning: SimpleStreamOptions["reasoning"] | undefined): number {
516
- switch (reasoning) {
517
- case "medium":
518
- return 1;
519
- case "high":
520
- return 2;
521
- case "xhigh":
522
- return 3;
523
- default:
524
- return 0; // off / low
525
- }
506
+ export function autoHardnessBucket(score: number): number {
507
+ return score < 0.3 ? 0 : score < 0.52 ? 1 : score < 0.74 ? 2 : 3;
526
508
  }
527
509
 
528
- export function classify(context: Context, options: SimpleStreamOptions | undefined, cfg: RouterConfig): number {
510
+ export function classify(context: Context, cfg: RouterConfig): number {
529
511
  const text = lastUserText(context).toLowerCase();
530
512
  const contextTokens = estimateContextTokens(context);
531
- const reasoning = options?.reasoning && ["medium", "high", "xhigh"].includes(options.reasoning) ? 1 : 0;
532
513
  const toolDensity = Math.min(1, countRecentToolResults(context) / 8);
533
514
 
534
515
  const raw =
535
516
  normalize(contextTokens, 8_000, 120_000) * cfg.weights.contextTokens +
536
517
  normalize(text.length, 120, 1_200) * cfg.weights.lastUserLen +
537
518
  keywordScore(text) * cfg.weights.keyword +
538
- reasoning * cfg.weights.reasoning +
539
519
  toolDensity * cfg.weights.toolDensity;
540
520
 
541
521
  return Math.max(0, Math.min(1, raw));
@@ -641,10 +621,7 @@ export function selectFromPool(
641
621
  const { eligible, overflow } = eligibleModels(pool, context);
642
622
  if (eligible.length === 0) return undefined;
643
623
 
644
- let bucket = decision.hardnessBucket;
645
- if (cfg.forceStrongOnHighReasoning && (options?.reasoning === "high" || options?.reasoning === "xhigh")) {
646
- bucket = HARDNESS_ORDER.length - 1;
647
- }
624
+ const bucket = decision.hardnessBucket;
648
625
  const hardness = HARDNESS_ORDER[Math.max(0, Math.min(HARDNESS_ORDER.length - 1, bucket))];
649
626
 
650
627
  // `fast` is orthogonal: gate on a low capability floor, then maximize throughput.