@kylebrodeur/pi-model-router 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ import type { ThinkingLevel } from '@mariozechner/pi-agent-core';
2
+
3
+ // ─── Feature Toggles (added by fork) ──────────────────────────────────────
4
+
5
+ export type RouterFeature =
6
+ | 'ollamaSync'
7
+ | 'rateLimitFallback'
8
+ | 'perTurnRouting'
9
+ | 'intentClassifier'
10
+ | 'costBudgeting'
11
+ | 'phaseMemory'
12
+ | 'contextCompression'
13
+ | 'ledgerIntegration'
14
+ | 'agentBusIntegration';
15
+
16
+ export interface FeatureToggles {
17
+ ollamaSync?: boolean;
18
+ rateLimitFallback?: boolean;
19
+ scopeShim?: boolean;
20
+ respectPiScope?: boolean;
21
+ perTurnRouting?: boolean;
22
+ intentClassifier?: boolean;
23
+ costBudgeting?: boolean;
24
+ phaseMemory?: boolean;
25
+ contextCompression?: boolean;
26
+ /**
27
+ * Log routing decisions to qmd-ledger if available.
28
+ * Progressive: detects pi-qmd-ledger at runtime.
29
+ */
30
+ ledgerIntegration?: boolean;
31
+ /**
32
+ * Publish model changes to pi-agent-bus MessageBus if available.
33
+ * Progressive: detects pi-agent-bus at runtime.
34
+ */
35
+ agentBusIntegration?: boolean;
36
+ }
37
+
38
+ export interface OllamaSyncConfig {
39
+ enabled?: boolean;
40
+ onStartup?: boolean;
41
+ onReload?: boolean;
42
+ addLaunchFlag?: boolean;
43
+ visionKeywords?: string[];
44
+ reasoningKeywords?: string[];
45
+ preferredFamilies?: string[];
46
+ defaultContextWindow?: number;
47
+ largeContextWindow?: number;
48
+ }
49
+
50
+ export interface RateLimitFallbackConfig {
51
+ enabled?: boolean;
52
+ shortDelayThreshold?: number;
53
+ autoFallback?: boolean;
54
+ autoRestore?: boolean;
55
+ restoreCheckInterval?: number;
56
+ fallbackSequence?: string[];
57
+ }
58
+
59
+ // ─── Original Router Types ─────────────────────────────────────────────────
60
+
61
+ export type RouterTier = 'high' | 'medium' | 'low';
62
+ export type RouterPin = RouterTier | 'auto';
63
+ export type RouterPhase = 'planning' | 'implementation' | 'lightweight';
64
+ export type RouterPinByProfile = Partial<Record<string, RouterTier>>;
65
+ export type RouterThinkingByTier = Partial<Record<RouterTier, ThinkingLevel>>;
66
+ export type RouterThinkingByProfile = Record<string, RouterThinkingByTier>;
67
+
68
+ export interface RoutingRule {
69
+ matches: string | string[];
70
+ tier: RouterTier;
71
+ reason?: string;
72
+ }
73
+
74
+ export interface RoutedTierConfig {
75
+ model: string;
76
+ thinking?: ThinkingLevel;
77
+ fallbacks?: string[];
78
+ }
79
+
80
+ export interface RouterProfile {
81
+ high: RoutedTierConfig;
82
+ medium: RoutedTierConfig;
83
+ low: RoutedTierConfig;
84
+ }
85
+
86
+ export interface RouterConfig {
87
+ defaultProfile?: string;
88
+ debug?: boolean;
89
+ classifierModel?: string;
90
+ phaseBias?: number;
91
+ largeContextThreshold?: number;
92
+ maxSessionBudget?: number;
93
+ rules?: RoutingRule[];
94
+ profiles: Record<string, RouterProfile>;
95
+ // ─── Feature toggles (added by fork) ──────────────────────────────
96
+ features?: FeatureToggles;
97
+ ollamaSync?: OllamaSyncConfig;
98
+ rateLimitFallback?: RateLimitFallbackConfig;
99
+ }
100
+
101
+ export interface RoutingDecision {
102
+ profile: string;
103
+ tier: RouterTier;
104
+ phase: RouterPhase;
105
+ targetProvider: string;
106
+ targetModelId: string;
107
+ targetLabel: string;
108
+ reasoning: string;
109
+ thinking: ThinkingLevel;
110
+ timestamp: number;
111
+ isClassifier?: boolean;
112
+ isFallback?: boolean;
113
+ isContextTriggered?: boolean;
114
+ isBudgetForced?: boolean;
115
+ isRuleMatched?: boolean;
116
+ }
117
+
118
+ export interface RouterPersistedState {
119
+ enabled: boolean;
120
+ selectedProfile: string;
121
+ pinTier?: RouterTier;
122
+ pinByProfile?: RouterPinByProfile;
123
+ thinkingByProfile?: RouterThinkingByProfile;
124
+ debugEnabled?: boolean;
125
+ widgetEnabled?: boolean;
126
+ debugHistory?: RoutingDecision[];
127
+ lastPhase?: RouterPhase;
128
+ lastDecision?: RoutingDecision;
129
+ lastNonRouterModel?: string;
130
+ accumulatedCost?: number;
131
+ timestamp: number;
132
+ }
133
+
134
+ export interface ConfigLoadResult {
135
+ config: RouterConfig;
136
+ warnings: string[];
137
+ }
138
+
139
+ export interface ParsedConfigFile {
140
+ config: Partial<RouterConfig>;
141
+ warnings: string[];
142
+ }
143
+
144
+ export interface CustomSessionEntry {
145
+ type: string;
146
+ customType?: string;
147
+ data?: unknown;
148
+ }
@@ -0,0 +1,130 @@
1
+ import type { ExtensionContext } from '@mariozechner/pi-coding-agent';
2
+ import type {
3
+ RoutingDecision,
4
+ RouterConfig,
5
+ RouterPinByProfile,
6
+ RouterThinkingByProfile,
7
+ } from './types';
8
+
9
+ const getEffectiveThinking = (
10
+ thinkingByProfile: RouterThinkingByProfile,
11
+ profileName: string,
12
+ decision: RoutingDecision,
13
+ ) => thinkingByProfile[profileName]?.[decision.tier] ?? decision.thinking;
14
+
15
+ const getDecisionFlags = (decision: RoutingDecision): string[] => {
16
+ const flags: string[] = [];
17
+ if (decision.isFallback) flags.push('fallback');
18
+ if (decision.isContextTriggered) flags.push('context');
19
+ if (decision.isBudgetForced) flags.push('budget-limit');
20
+ if (decision.isRuleMatched) flags.push('rule');
21
+ return flags;
22
+ };
23
+
24
+ export const formatDecision = (decision: RoutingDecision): string => {
25
+ return `${decision.profile}: ${decision.tier} -> ${decision.targetProvider}/${decision.targetModelId} [${decision.thinking}] (${decision.reasoning})`;
26
+ };
27
+
28
+ export const formatPinSummary = (
29
+ pinnedTierByProfile: RouterPinByProfile,
30
+ ): string => {
31
+ const entries = Object.entries(pinnedTierByProfile)
32
+ .sort(([a], [b]) => a.localeCompare(b))
33
+ .map(([profile, tier]) => `${profile}:${tier}`);
34
+ return entries.length > 0 ? entries.join(', ') : 'none';
35
+ };
36
+
37
+ export const formatThinkingSummary = (
38
+ thinkingByProfile: RouterThinkingByProfile,
39
+ ): string => {
40
+ const entries = Object.entries(thinkingByProfile)
41
+ .sort(([a], [b]) => a.localeCompare(b))
42
+ .map(([profile, tierMap]) => {
43
+ const tiers = Object.entries(tierMap)
44
+ .sort(([a], [b]) => a.localeCompare(b))
45
+ .map(([tier, level]) => `${tier}:${level}`);
46
+ return `${profile}(${tiers.join(',')})`;
47
+ });
48
+ return entries.length > 0 ? entries.join(', ') : 'none';
49
+ };
50
+
51
+ export const formatModelRef = (ref: string | undefined): string => {
52
+ return ref ?? 'none';
53
+ };
54
+
55
+ export const updateStatus = (
56
+ ctx: ExtensionContext,
57
+ routerEnabled: boolean,
58
+ selectedProfile: string,
59
+ pinnedTierByProfile: RouterPinByProfile,
60
+ thinkingByProfile: RouterThinkingByProfile,
61
+ lastDecision: RoutingDecision | undefined,
62
+ lastNonRouterModel: string | undefined,
63
+ accumulatedCost: number,
64
+ widgetEnabled: boolean,
65
+ currentConfig: RouterConfig,
66
+ ) => {
67
+ const activeRouterProfile = routerEnabled ? selectedProfile : undefined;
68
+ const statusProfile = selectedProfile;
69
+ const activePin = pinnedTierByProfile[statusProfile];
70
+ const pinLabel = activePin ? ` [pin:${activePin}]` : '';
71
+
72
+ let statusText: string;
73
+ if (activeRouterProfile) {
74
+ const matchesProfile =
75
+ lastDecision && lastDecision.profile === activeRouterProfile;
76
+ const matchesPin = activePin ? lastDecision?.tier === activePin : true;
77
+
78
+ if (lastDecision && matchesProfile && matchesPin) {
79
+ const effectiveThinking = getEffectiveThinking(
80
+ thinkingByProfile,
81
+ activeRouterProfile,
82
+ lastDecision,
83
+ );
84
+ statusText = `router:${activeRouterProfile}${pinLabel} -> ${lastDecision.tier} -> ${lastDecision.targetProvider}/${lastDecision.targetModelId} (${effectiveThinking})`;
85
+ } else {
86
+ statusText = `router:${activeRouterProfile}${pinLabel} -> waiting`;
87
+ }
88
+ } else {
89
+ statusText = `router:off (${selectedProfile}${pinLabel}) -> ${formatModelRef(lastNonRouterModel)}`;
90
+ }
91
+ ctx.ui.setStatus('router', ctx.ui.theme.fg('dim', statusText));
92
+
93
+ if (!widgetEnabled) {
94
+ ctx.ui.setWidget('router', undefined);
95
+ return;
96
+ }
97
+
98
+ const widgetLines = [
99
+ `Router: ${routerEnabled ? 'enabled' : 'disabled'}`,
100
+ `Profile: ${statusProfile}${activeRouterProfile ? ' (active)' : ''}`,
101
+ `Pin: ${activePin ?? 'auto'}`,
102
+ `Cost: $${accumulatedCost.toFixed(4)}` +
103
+ (currentConfig.maxSessionBudget
104
+ ? ` / $${currentConfig.maxSessionBudget.toFixed(2)}`
105
+ : ''),
106
+ ];
107
+ if (lastDecision && lastDecision.profile === statusProfile) {
108
+ const effectiveThinking = getEffectiveThinking(
109
+ thinkingByProfile,
110
+ statusProfile,
111
+ lastDecision,
112
+ );
113
+ const flags = getDecisionFlags(lastDecision);
114
+ const flagsStr = flags.length > 0 ? ` [${flags.join(',')}]` : '';
115
+
116
+ widgetLines.push(
117
+ `Route: ${lastDecision.tier}${flagsStr} -> ${lastDecision.targetProvider}/${lastDecision.targetModelId} (${effectiveThinking})`,
118
+ `Phase: ${lastDecision.phase}`,
119
+ );
120
+ } else if (!routerEnabled && lastNonRouterModel) {
121
+ widgetLines.push(`Fallback: ${lastNonRouterModel}`);
122
+ }
123
+ if (Object.keys(pinnedTierByProfile).length > 1) {
124
+ widgetLines.push(`Pins: ${formatPinSummary(pinnedTierByProfile)}`);
125
+ }
126
+ ctx.ui.setWidget(
127
+ 'router',
128
+ widgetLines.map((line) => ctx.ui.theme.fg('dim', line)),
129
+ );
130
+ };
@@ -0,0 +1,15 @@
1
+ {
2
+ "comment": "Progressive Step 2: Router with ledger + agent-bus integration. Enables agent coordination for model selection.",
3
+ "features": {
4
+ "ollamaSync": true,
5
+ "rateLimitFallback": true,
6
+ "scopeShim": true,
7
+ "perTurnRouting": true,
8
+ "intentClassifier": false,
9
+ "costBudgeting": true,
10
+ "phaseMemory": true,
11
+ "contextCompression": true,
12
+ "ledgerIntegration": true,
13
+ "agentBusIntegration": true
14
+ }
15
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "comment": "Progressive Step 3: Essential config — all integrations enabled. Maximum interoperability with the Pi package ecosystem.",
3
+ "features": {
4
+ "ollamaSync": true,
5
+ "rateLimitFallback": true,
6
+ "scopeShim": true,
7
+ "perTurnRouting": true,
8
+ "intentClassifier": false,
9
+ "costBudgeting": true,
10
+ "phaseMemory": true,
11
+ "contextCompression": true,
12
+ "ledgerIntegration": true,
13
+ "agentBusIntegration": true
14
+ },
15
+ "ollamaSync": {
16
+ "enabled": true,
17
+ "onStartup": true,
18
+ "onReload": true
19
+ },
20
+ "rateLimitFallback": {
21
+ "enabled": true,
22
+ "shortDelayThreshold": 60,
23
+ "autoFallback": false,
24
+ "autoRestore": false,
25
+ "fallbackSequence": ["anthropic/claude-3-haiku-20240307", "ollama/*"]
26
+ },
27
+ "rules": [
28
+ { "matches": "ledger", "tier": "low", "reason": "Ledger operations are lightweight" },
29
+ { "matches": "agent-bus", "tier": "medium", "reason": "Agent coordination needs reliability" }
30
+ ]
31
+ }
@@ -0,0 +1,70 @@
1
+ {
2
+ "features": {
3
+ "ollamaSync": true,
4
+ "rateLimitFallback": true,
5
+ "scopeShim": true,
6
+ "perTurnRouting": true,
7
+ "intentClassifier": false,
8
+ "costBudgeting": true,
9
+ "phaseMemory": true,
10
+ "contextCompression": true
11
+ },
12
+ "ollamaSync": {
13
+ "enabled": true,
14
+ "onStartup": true,
15
+ "onReload": true
16
+ },
17
+ "rateLimitFallback": {
18
+ "enabled": true,
19
+ "shortDelayThreshold": 60,
20
+ "autoFallback": false,
21
+ "autoRestore": false,
22
+ "fallbackSequence": ["anthropic/claude-3-haiku-20240307", "ollama/*"]
23
+ },
24
+ "defaultProfile": "auto",
25
+ "debug": false,
26
+ "classifierModel": "google/gemini-flash-latest",
27
+ "phaseBias": 0.5,
28
+ "maxSessionBudget": 1.0,
29
+ "largeContextThreshold": 100000,
30
+ "rules": [
31
+ {
32
+ "matches": ["deploy", "production", "release"],
33
+ "tier": "high",
34
+ "reason": "Safety check for production tasks"
35
+ },
36
+ { "matches": "changelog", "tier": "low" }
37
+ ],
38
+ "profiles": {
39
+ "auto": {
40
+ "high": {
41
+ "model": "openai/gpt-5.4-pro",
42
+ "thinking": "high",
43
+ "fallbacks": ["anthropic/claude-3-5-sonnet-20241022"]
44
+ },
45
+ "medium": { "model": "google/gemini-flash-latest", "thinking": "medium" },
46
+ "low": { "model": "openai/gpt-5.4-nano", "thinking": "low" }
47
+ },
48
+ "cheap": {
49
+ "high": { "model": "google/gemini-flash-latest", "thinking": "low" },
50
+ "medium": { "model": "openai/gpt-5.4-nano", "thinking": "off" },
51
+ "low": { "model": "google/gemini-flash-lite-latest", "thinking": "off" }
52
+ },
53
+ "deep": {
54
+ "high": { "model": "openai/o1-preview", "thinking": "xhigh" },
55
+ "medium": { "model": "openai/gpt-5.4-pro", "thinking": "medium" },
56
+ "low": { "model": "google/gemini-flash-latest", "thinking": "low" }
57
+ },
58
+ "anthropic": {
59
+ "high": {
60
+ "model": "anthropic/claude-3-5-sonnet-20241022",
61
+ "thinking": "high"
62
+ },
63
+ "medium": {
64
+ "model": "anthropic/claude-3-5-sonnet-20241022",
65
+ "thinking": "medium"
66
+ },
67
+ "low": { "model": "anthropic/claude-3-haiku-20240307", "thinking": "low" }
68
+ }
69
+ }
70
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "comment": "Progressive Step 1: Basic router with qmd-ledger integration. Enables logging routing decisions to ledger.",
3
+ "features": {
4
+ "ollamaSync": true,
5
+ "rateLimitFallback": true,
6
+ "scopeShim": true,
7
+ "perTurnRouting": true,
8
+ "intentClassifier": false,
9
+ "costBudgeting": true,
10
+ "phaseMemory": true,
11
+ "contextCompression": true,
12
+ "ledgerIntegration": true,
13
+ "agentBusIntegration": false
14
+ }
15
+ }
package/package.json ADDED
@@ -0,0 +1,64 @@
1
+ {
2
+ "name": "@kylebrodeur/pi-model-router",
3
+ "version": "0.1.2",
4
+ "type": "module",
5
+ "description": "Intelligent per-turn model router extension for the pi coding agent (Enhanced Fork)",
6
+ "keywords": [
7
+ "pi-package",
8
+ "pi",
9
+ "model-router",
10
+ "llm",
11
+ "coding-agent",
12
+ "extension"
13
+ ],
14
+ "license": "MIT",
15
+ "author": "Kyle Brodeur <kylebrodeur@example.com> (https://github.com/kylebrodeur)",
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "https://github.com/kylebrodeur/pi-model-router.git"
19
+ },
20
+ "homepage": "https://github.com/kylebrodeur/pi-model-router#readme",
21
+ "bugs": {
22
+ "url": "https://github.com/kylebrodeur/pi-model-router/issues"
23
+ },
24
+ "files": [
25
+ "extensions/",
26
+ "docs/",
27
+ "model-router.example.json",
28
+ "model-router.ledger.json",
29
+ "model-router.agent-bus.json",
30
+ "model-router.essential.json",
31
+ "LICENSE",
32
+ "README.md",
33
+ "CHANGELOG.md",
34
+ "LEARNINGS.md",
35
+ "QUICKSTART.md",
36
+ "TESTING.md",
37
+ "CONTRIBUTING.md"
38
+ ],
39
+ "exports": {
40
+ ".": "./extensions/index.ts"
41
+ },
42
+ "pi": {
43
+ "extensions": [
44
+ "./extensions/index.ts"
45
+ ]
46
+ },
47
+ "scripts": {
48
+ "tsc": "tsc --noEmit",
49
+ "build": "tsc",
50
+ "prepublishOnly": "npm run tsc"
51
+ },
52
+ "peerDependencies": {
53
+ "@mariozechner/pi-agent-core": "*",
54
+ "@mariozechner/pi-ai": "*",
55
+ "@mariozechner/pi-coding-agent": ">=0.70.2",
56
+ "@mariozechner/pi-tui": "*",
57
+ "typebox": "*"
58
+ },
59
+ "devDependencies": {
60
+ "@mariozechner/pi-coding-agent": "^0.70.2",
61
+ "prettier": "^3.8.1",
62
+ "typescript": "^6.0.2"
63
+ }
64
+ }