npm - @kylebrodeur/pi-model-router - Versions diffs - 0.1.2 - Mend

@kylebrodeur/pi-model-router 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +42 -0
package/CONTRIBUTING.md +310 -0
package/LEARNINGS.md +181 -0
package/LICENSE +21 -0
package/QUICKSTART.md +111 -0
package/README.md +195 -0
package/TESTING.md +374 -0
package/docs/ARCHITECTURE.md +54 -0
package/docs/UPSTREAM_ISSUE_scoped_models.md +94 -0
package/extensions/commands.ts +1068 -0
package/extensions/config.ts +415 -0
package/extensions/constants.ts +1 -0
package/extensions/index.ts +583 -0
package/extensions/ollama-sync.ts +254 -0
package/extensions/provider.ts +558 -0
package/extensions/rate-limit.ts +317 -0
package/extensions/routing.ts +418 -0
package/extensions/scope-shim.ts +213 -0
package/extensions/state.ts +49 -0
package/extensions/types.ts +148 -0
package/extensions/ui.ts +130 -0
package/model-router.agent-bus.json +15 -0
package/model-router.essential.json +31 -0
package/model-router.example.json +70 -0
package/model-router.ledger.json +15 -0
package/package.json +64 -0

package/extensions/routing.ts ADDED Viewed

@@ -0,0 +1,418 @@
+import { streamSimple, type Context, type Message } from '@mariozechner/pi-ai';
+import type { ExtensionContext } from '@mariozechner/pi-coding-agent';
+import type {
+  RouterTier,
+  RouterPhase,
+  RouterProfile,
+  RoutingDecision,
+  RoutingRule,
+  RouterThinkingByTier,
+} from './types';
+import { parseCanonicalModelRef, isRouterTier } from './config';
+export const extractTextFromContent = (
+  content: string | Message['content'],
+): string => {
+  if (typeof content === 'string') {
+    return content;
+  }
+  return content
+    .map((part) => {
+      if (part.type === 'text') return part.text;
+      if (part.type === 'thinking') return part.thinking;
+      if (part.type === 'toolCall')
+        return `${part.name} ${JSON.stringify(part.arguments)}`;
+      return '';
+    })
+    .filter(Boolean)
+    .join('\n');
+};
+export const getLastUserText = (context: Context): string => {
+  for (let i = context.messages.length - 1; i >= 0; i--) {
+    const message = context.messages[i];
+    if (message.role === 'user') {
+      return extractTextFromContent(message.content).trim();
+    }
+  }
+  return '';
+};
+export const getRecentConversationText = (
+  context: Context,
+  limit = 6,
+): string => {
+  return context.messages
+    .slice(-limit)
+    .map((message) => extractTextFromContent(message.content).trim())
+    .filter(Boolean)
+    .join('\n')
+    .toLowerCase();
+};
+export const countToolResults = (context: Context): number => {
+  return context.messages.filter((message) => message.role === 'toolResult')
+    .length;
+};
+export const countWords = (text: string): number => {
+  return text.split(/\s+/).filter(Boolean).length;
+};
+export const hasImageAttachment = (context: Context): boolean => {
+  return context.messages.some(
+    (message) =>
+      Array.isArray(message.content) &&
+      message.content.some((part) => part.type === 'image'),
+  );
+};
+export const containsAny = (text: string, keywords: string[]): boolean => {
+  return keywords.some((keyword) => text.includes(keyword));
+};
+export const phaseForTier = (tier: RouterTier): RouterPhase => {
+  if (tier === 'high') return 'planning';
+  if (tier === 'medium') return 'implementation';
+  return 'lightweight';
+};
+export const buildRoutingDecision = (
+  profileName: string,
+  profile: RouterProfile,
+  tier: RouterTier,
+  phase: RouterPhase,
+  reasoning: string,
+  thinkingOverrides?: RouterThinkingByTier,
+  isClassifier?: boolean,
+): RoutingDecision => {
+  const routed = profile[tier];
+  const { provider, modelId } = parseCanonicalModelRef(routed.model);
+  const baseThinking =
+    routed.thinking ??
+    (tier === 'high' ? 'high' : tier === 'low' ? 'low' : 'medium');
+  const effectiveThinking = thinkingOverrides?.[tier] ?? baseThinking;
+  return {
+    profile: profileName,
+    tier,
+    phase,
+    targetProvider: provider,
+    targetModelId: modelId,
+    targetLabel: routed.model,
+    reasoning,
+    thinking: effectiveThinking,
+    timestamp: Date.now(),
+    isClassifier,
+  };
+};
+export const decideRouting = (
+  context: Context,
+  profileName: string,
+  profile: RouterProfile,
+  previousDecision: RoutingDecision | undefined,
+  pinnedTier?: RouterTier,
+  thinkingOverrides?: RouterThinkingByTier,
+  phaseBias = 0.5,
+  rules?: RoutingRule[],
+  isBudgetExceeded = false,
+): RoutingDecision => {
+  const prompt = getLastUserText(context).toLowerCase();
+  const recentConversation = getRecentConversationText(context);
+  const toolResultCount = countToolResults(context);
+  const wordCount = countWords(prompt);
+  const multiLinePrompt = prompt.split('\n').length >= 4;
+  const explicitHighHints = [
+    'best',
+    'deep',
+    'deeply',
+    'carefully',
+    'thoroughly',
+    'robust',
+    'comprehensive',
+    'step by step',
+    'think hard',
+    'highest quality',
+  ];
+  const explicitLowHints = [
+    'fast',
+    'cheap',
+    'quick',
+    'quickly',
+    'brief',
+    'briefly',
+    'one sentence',
+    'one line',
+    'tiny',
+    'small',
+  ];
+  const planningKeywords = [
+    'plan',
+    'planning',
+    'architecture',
+    'architect',
+    'design',
+    'tradeoff',
+    'trade-off',
+    'research',
+    'investigate',
+    'root cause',
+    'analyze',
+    'analysis',
+    'migration',
+    'strategy',
+    'compare',
+    'options',
+    'approach',
+  ];
+  const summaryKeywords = [
+    'summarize',
+    'summary',
+    'changelog',
+    'rewrite',
+    'reformat',
+    'format',
+    'rename',
+    'explain briefly',
+    'recap',
+    'tl;dr',
+  ];
+  const implementationKeywords = [
+    'implement',
+    'code',
+    'fix',
+    'update',
+    'edit',
+    'write',
+    'refactor',
+    'add tests',
+    'patch',
+    'change',
+    'apply',
+    'continue',
+    'resume',
+    'make the changes',
+    'go ahead',
+  ];
+  const lookupKeywords = [
+    'where is',
+    'which file',
+    'show me',
+    'list',
+    'what files',
+    'find',
+    'grep',
+  ];
+  let phase: RouterPhase = previousDecision?.phase ?? 'implementation';
+  let tier: RouterTier = 'medium';
+  let reasoning = 'Defaulted to medium tier for general coding work.';
+  let isRuleMatched = false;
+  if (pinnedTier) {
+    phase = phaseForTier(pinnedTier);
+    tier = pinnedTier;
+    reasoning = `Pinned to ${pinnedTier} tier via /router-pin.`;
+  } else {
+    // Check custom rules first
+    if (rules) {
+      for (const rule of rules) {
+        const matches = Array.isArray(rule.matches)
+          ? rule.matches
+          : [rule.matches];
+        if (containsAny(prompt, matches)) {
+          tier = rule.tier;
+          phase = phaseForTier(tier);
+          reasoning =
+            rule.reason ??
+            `Matched custom routing rule for: ${matches.join(', ')}`;
+          isRuleMatched = true;
+          break;
+        }
+      }
+    }
+    if (!isRuleMatched) {
+      // Sticky phase adjustments
+      const highThreshold = Math.max(
+        40,
+        120 - (previousDecision?.phase === 'planning' ? phaseBias * 80 : 0),
+      );
+      const lowThreshold = Math.max(
+        4,
+        12 -
+          (previousDecision?.phase === 'implementation' ||
+          previousDecision?.phase === 'planning'
+            ? phaseBias * 8
+            : 0),
+      );
+      if (containsAny(prompt, explicitHighHints)) {
+        phase = 'planning';
+        tier = 'high';
+        reasoning =
+          'Detected an explicit request for deeper or higher-quality reasoning.';
+      } else if (containsAny(prompt, explicitLowHints)) {
+        phase = 'lightweight';
+        tier = 'low';
+        reasoning =
+          'Detected an explicit request for a faster or lighter response.';
+      } else if (containsAny(prompt, summaryKeywords)) {
+        phase = 'lightweight';
+        tier = 'low';
+        reasoning = 'Detected summary or lightweight transformation keywords.';
+      } else if (
+        containsAny(prompt, planningKeywords) ||
+        prompt.startsWith('why ') ||
+        wordCount >= highThreshold ||
+        multiLinePrompt
+      ) {
+        phase = 'planning';
+        tier = 'high';
+        reasoning =
+          previousDecision?.phase === 'planning'
+            ? 'Continued planning phase based on complexity or keywords.'
+            : 'Detected planning, broad analysis, or a high-complexity request.';
+      } else if (containsAny(prompt, implementationKeywords)) {
+        phase = 'implementation';
+        tier = 'medium';
+        reasoning =
+          'Detected implementation-oriented work with bounded execution scope.';
+      } else if (
+        containsAny(prompt, lookupKeywords) &&
+        wordCount <= 24 &&
+        toolResultCount === 0
+      ) {
+        phase = 'lightweight';
+        tier = 'low';
+        reasoning = 'Detected a short read-only lookup request.';
+      } else if (
+        previousDecision?.phase === 'planning' &&
+        toolResultCount === 0 &&
+        wordCount > lowThreshold
+      ) {
+        phase = 'planning';
+        tier = 'high';
+        reasoning =
+          'Kept the planning-phase bias because the conversation still looks exploratory.';
+      } else if (
+        toolResultCount > 0 ||
+        previousDecision?.phase === 'implementation' ||
+        recentConversation.includes('plan:')
+      ) {
+        phase = 'implementation';
+        tier = 'medium';
+        reasoning =
+          'Detected active implementation work from prior tools or recent plan execution context.';
+      } else if (wordCount <= lowThreshold) {
+        phase = 'lightweight';
+        tier = 'low';
+        reasoning = 'Detected a short bounded request.';
+      }
+    }
+  }
+  let isBudgetForced = false;
+  if (isBudgetExceeded && tier === 'high') {
+    tier = 'medium';
+    phase = 'implementation';
+    reasoning = `Budget exceeded. Downgraded from high to medium tier. (Original: ${reasoning})`;
+    isBudgetForced = true;
+  }
+  const decision = buildRoutingDecision(
+    profileName,
+    profile,
+    tier,
+    phase,
+    reasoning,
+    thinkingOverrides,
+    false,
+  );
+  decision.isRuleMatched = isRuleMatched;
+  decision.isBudgetForced = isBudgetForced;
+  return decision;
+};
+export const runClassifier = async (
+  classifierModelRef: string,
+  modelRegistry: ExtensionContext['modelRegistry'],
+  context: Context,
+  currentPhase?: RouterPhase,
+): Promise<{ tier: RouterTier; reasoning: string } | undefined> => {
+  try {
+    const { provider, modelId } = parseCanonicalModelRef(classifierModelRef);
+    const model = modelRegistry.find(provider, modelId);
+    if (!model) return undefined;
+    const auth = await modelRegistry.getApiKeyAndHeaders(model);
+    if (!auth.ok || !auth.apiKey) return undefined;
+    const apiKey = auth.apiKey;
+    const headers = auth.headers;
+    const promptText = getLastUserText(context);
+    const historyText = getRecentConversationText(context, 4);
+    const classifierPrompt = `You are a model router classifier. Your job is to categorize the user's latest request into one of three tiers: "high", "medium", or "low".
+Tiers:
+- high: Architecture, design, planning, tradeoff analysis, broad debugging, large refactors, codebase research.
+- medium: Implementation of a known plan, multi-file edits, normal coding work, focused debugging, tests/fixes.
+- low: Summaries, changelogs, formatting, quick explanations, small bounded transforms, simple read-only lookup.
+${currentPhase ? `Current conversation phase: ${currentPhase}\n` : ''}
+Recent history:
+${historyText}
+Latest user message:
+${promptText}
+Return your decision in exactly two lines:
+Tier: [high|medium|low]
+Reasoning: [one short sentence]
+${currentPhase === 'planning' ? 'Consider that the conversation is currently in a planning phase. Bias toward "high" unless the request is clearly a simple implementation or summary.' : ''}
+${currentPhase === 'implementation' ? 'Consider that the conversation is currently in an implementation phase. Bias toward "medium" unless the request is clearly planning or a simple summary.' : ''}`;
+    const classifierContext: Context = {
+      ...context,
+      messages: [
+        { role: 'user', content: classifierPrompt, timestamp: Date.now() },
+      ],
+    };
+    const stream = streamSimple(model, classifierContext, { apiKey, headers });
+    let fullText = '';
+    for await (const event of stream) {
+      if (
+        event.type === 'text_delta' &&
+        typeof (event as any).delta === 'string'
+      ) {
+        fullText += (event as any).delta;
+      }
+    }
+    const lines = fullText.trim().split('\n');
+    const tierLine = lines.find((l) => l.toLowerCase().startsWith('tier:'));
+    const reasoningLine = lines.find((l) =>
+      l.toLowerCase().startsWith('reasoning:'),
+    );
+    if (tierLine) {
+      const tierValue = tierLine.split(':')[1].trim().toLowerCase();
+      if (isRouterTier(tierValue)) {
+        return {
+          tier: tierValue,
+          reasoning: reasoningLine
+            ? reasoningLine.split(':')[1].trim()
+            : 'Classifier decision.',
+        };
+      }
+    }
+  } catch (error) {
+    // Ignore classifier errors and fall back to heuristics
+  }
+  return undefined;
+};

package/extensions/scope-shim.ts ADDED Viewed

@@ -0,0 +1,213 @@
+/**
+ * Scope Shim — work around the lack of ExtensionAPI.setScopedModels().
+ *
+ * Today: writes `enabledModels` to Pi settings.json directly, then
+ * optionally reloads the session so Pi picks up the new scope.
+ * Tomorrow: swap `writeSettingsScope()` for `pi.setScopedModels()`
+ * when upstream exposes it.
+ */
+import { readFileSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { getAgentDir } from '@mariozechner/pi-coding-agent';
+import type { Model } from '@mariozechner/pi-ai';
+import type {
+  ExtensionAPI,
+  ExtensionContext,
+} from '@mariozechner/pi-coding-agent';
+import type { RouterProfile, RouterConfig } from './types';
+import { parseCanonicalModelRef } from './config';
+// ─── Types ──────────────────────────────────────────────────────────────────
+export interface ScopeModelRef {
+  modelRef: string;
+  thinkingLevel?: string;
+}
+export interface ScopeSettingsResult {
+  enabledModels: string[] | undefined;
+  success: boolean;
+  message: string;
+}
+// ─── Derive scope from profile ──────────────────────────────────────────────
+/** Derive the full scope from ALL router profiles (deduplicated, in config order). */
+export const deriveRouterScope = (config: RouterConfig): ScopeModelRef[] => {
+  const seen = new Set<string>();
+  const result: ScopeModelRef[] = [];
+  for (const [, profile] of Object.entries(config.profiles)) {
+    for (const tier of ['high', 'medium', 'low'] as const) {
+      const tierConfig = profile[tier];
+      const refs = [tierConfig.model, ...(tierConfig.fallbacks ?? [])];
+      for (const modelRef of refs) {
+        if (!seen.has(modelRef)) {
+          seen.add(modelRef);
+          result.push({ modelRef, thinkingLevel: tierConfig.thinking });
+        }
+      }
+    }
+  }
+  return result;
+};
+/** Resolve scope refs into actual Model objects from the registry. */
+export const resolveScopeFromRegistry = (
+  scope: ScopeModelRef[],
+  ctx: ExtensionContext,
+): { model: Model<any>; thinkingLevel?: string }[] => {
+  const resolved: { model: Model<any>; thinkingLevel?: string }[] = [];
+  for (const entry of scope) {
+    try {
+      const { provider, modelId } = parseCanonicalModelRef(entry.modelRef);
+      const model = ctx.modelRegistry.find(provider, modelId);
+      if (model) {
+        resolved.push({ model, thinkingLevel: entry.thinkingLevel });
+      }
+    } catch {
+      // skip invalid refs
+    }
+  }
+  return resolved;
+};
+// ─── Settings.json file operations (shim until upstream API) ──────────────────
+const getSettingsPath = (): string => {
+  return join(getAgentDir(), 'settings.json');
+};
+/** Read Pi settings.json as safely as possible. */
+const readPiSettings = (): Record<string, unknown> => {
+  try {
+    const raw = readFileSync(getSettingsPath(), 'utf-8');
+    const parsed = JSON.parse(raw) as unknown;
+    if (typeof parsed === 'object' && parsed !== null) {
+      return parsed as Record<string, unknown>;
+    }
+  } catch {
+    // ignore — may not exist or be invalid JSON
+  }
+  return {};
+};
+/** Write Pi settings.json atomically. */
+const writePiSettings = (settings: Record<string, unknown>): void => {
+  writeFileSync(getSettingsPath(), JSON.stringify(settings, null, 2));
+};
+/**
+ * Write the current profile's model refs to Pi settings.json `enabledModels`.
+ *
+ * This is the **shim** — once upstream exposes:
+ *   `pi.setScopedModels(models: ScopedModelEntry[])`
+ * replace this entire function with that call.
+ */
+export const writeSettingsScope = (
+  scope: ScopeModelRef[],
+  mergeIntoExisting = false,
+): ScopeSettingsResult => {
+  const patterns = scope.map((s) => s.modelRef);
+  try {
+    const settings = readPiSettings();
+    if (mergeIntoExisting && Array.isArray(settings.enabledModels)) {
+      // prepend router's models, then existing (deduplicated)
+      const merged = patterns.filter(
+        (p) => !(settings.enabledModels as string[]).includes(p),
+      );
+      settings.enabledModels = [
+        ...merged,
+        ...(settings.enabledModels as string[]),
+      ];
+    } else {
+      settings.enabledModels = patterns;
+    }
+    writePiSettings(settings);
+    return {
+      enabledModels: settings.enabledModels as string[],
+      success: true,
+      message: `Updated settings.enabledModels with ${patterns.length} model(s). Run /reload or start a new session to apply.`,
+    };
+  } catch (error) {
+    return {
+      enabledModels: undefined,
+      success: false,
+      message: `Failed to write settings.json: ${error instanceof Error ? error.message : String(error)}`,
+    };
+  }
+};
+/**
+ * Clear `enabledModels` from Pi settings.json (restores all models).
+ */
+export const resetSettingsScope = (): ScopeSettingsResult => {
+  try {
+    const settings = readPiSettings();
+    if (settings.enabledModels === undefined) {
+      return {
+        enabledModels: undefined,
+        success: true,
+        message: 'No router scope override in settings.json. Nothing to reset.',
+      };
+    }
+    delete settings.enabledModels;
+    writePiSettings(settings);
+    return {
+      enabledModels: undefined,
+      success: true,
+      message:
+        'Cleared settings.enabledModels. Run /reload or start a new session to apply.',
+    };
+  } catch (error) {
+    return {
+      enabledModels: undefined,
+      success: false,
+      message: `Failed to write settings.json: ${error instanceof Error ? error.message : String(error)}`,
+    };
+  }
+};
+/**
+ * Read the current `enabledModels` from Pi settings.json.
+ */
+export const readSettingsScope = (): ScopeSettingsResult => {
+  const settings = readPiSettings();
+  const enabledModels = Array.isArray(settings.enabledModels)
+    ? (settings.enabledModels as string[])
+    : undefined;
+  return {
+    enabledModels,
+    success: true,
+    message: enabledModels
+      ? `Current enabledModels: ${enabledModels.join(', ')}`
+      : 'No enabledModels set in settings.json (all models available).',
+  };
+};
+// ─── Future upstream migration (commented, ready to swap) ─────────────────────
+/*
+When upstream exposes `pi.setScopedModels`:
+  export const applyRouterScopeUpstream = (
+    pi: ExtensionAPI,
+    config: RouterConfig,
+    ctx: ExtensionContext,
+  ): void => {
+    const scope = deriveRouterScope(config);
+    const resolved = resolveScopeFromRegistry(scope, ctx);
+    // @ts-expect-error — not yet in types
+    pi.setScopedModels?.(
+      resolved.map(({ model, thinkingLevel }) => ({
+        model,
+        thinkingLevel: thinkingLevel as ThinkingLevel,
+      })),
+    );
+  };
+*/

package/extensions/state.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import type {
+  RouterPinByProfile,
+  RouterThinkingByProfile,
+  RoutingDecision,
+  RouterPersistedState,
+} from './types';
+export const isRouterPersistedState = (
+  value: unknown,
+): value is RouterPersistedState => {
+  if (typeof value !== 'object' || value === null) {
+    return false;
+  }
+  const v = value as any;
+  return (
+    typeof v.enabled === 'boolean' &&
+    typeof v.selectedProfile === 'string' &&
+    typeof v.timestamp === 'number'
+  );
+};
+export const buildPersistedState = (
+  routerEnabled: boolean,
+  selectedProfile: string,
+  pinnedTierByProfile: RouterPinByProfile,
+  thinkingByProfile: RouterThinkingByProfile,
+  debugEnabled: boolean,
+  widgetEnabled: boolean,
+  debugHistory: RoutingDecision[],
+  lastDecision: RoutingDecision | undefined,
+  lastNonRouterModel: string | undefined,
+  accumulatedCost: number,
+): RouterPersistedState => {
+  return {
+    enabled: routerEnabled,
+    selectedProfile,
+    pinTier: pinnedTierByProfile[selectedProfile],
+    pinByProfile: { ...pinnedTierByProfile },
+    thinkingByProfile: { ...thinkingByProfile },
+    debugEnabled,
+    widgetEnabled,
+    debugHistory,
+    lastPhase: lastDecision?.phase,
+    lastDecision,
+    lastNonRouterModel,
+    accumulatedCost,
+    timestamp: Date.now(),
+  };
+};