npm - @firstpick/pi-extension-stats - Versions diffs - 0.1.7 → 0.1.9 - Mend

@firstpick/pi-extension-stats 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -9,7 +9,7 @@ Token and cost analytics for Pi session history.
 - Parses local Pi session `.jsonl` files for the current workspace.
 - Aggregates usage by UTC day.
 - Displays compact daily token bars and cost bars with totals.
-- Shows input/output/cache breakdown, prompt-injection estimate (`PI: X tok`) with source split-up, cache hit rate, estimated cache savings, cost burn rate, and top model usage.
+- Shows input/output/cache breakdown, estimated initial prompt input (`PI: X tok`) with source split-up, cache hit rate, estimated cache savings, cost burn rate, and top model usage.
 - Highlights highest-cost day, projected 30-day cost, most expensive sessions, and model cost efficiency.
 ## Install
@@ -26,13 +26,30 @@ No required configuration.
 - `/stats [days|all]` — show token usage dashboard (default: last 14 days).
 - `/stats tokens` — show current context token breakdown by source/type.
-- `/stats-pi` — show prompt-injection token breakdown.
+- `/stats-pi` — show estimated initial prompt input token breakdown. It counts Pi's system prompt text, active provider-level tool schemas, framing overhead, and optional historical calibration.
+- `/calibrate` — start an isolated calibration session with a fixed probe prompt, then update `/stats-pi` and the footer `PI: X tok` estimate from the first assistant response usage. `/calibrate current` reuses the current branch if it already has a suitable first-turn usage sample.
 - `/stats-last [days|all]` — show non-zero daily usage graph.
 - `/stats-most-expense [days|all]` — show most expensive sessions.
 - `/stats-model-compare [days|all]` — show model token/cost comparison.
 - `/stats-cost-trend [days|all]` — show cost trend and projections.
 - `/stats-cache [days|all]` — show cache efficiency and token mix.
+## Prompt input estimate
+`/stats-pi` and the `PI: ~X tok` value in `/stats` estimate the full initial model input, not just raw prompt text. `/stats-pi` can be run before any LLM prompt in a fresh session.
+The calculation is intentionally provider-agnostic:
+```text
+promptTextTokens = weighted text estimate of ctx.getSystemPrompt()
+toolSchemaTokens = weighted text estimate of active tool definitions JSON
+framingTokens = conservative message/request framing allowance
+baseEstimate = promptTextTokens + toolSchemaTokens + framingTokens
+estimatedInitialInput = baseEstimate × historicalCalibrationMultiplier
+```
+The historical multiplier is learned opportunistically from future sessions by comparing the pre-call estimate with the provider-reported first assistant `usage.input + usage.cacheRead + usage.cacheWrite` after subtracting the first user prompt estimate. `/calibrate` performs the same calculation on demand by opening an isolated session and sending a fixed probe prompt; `/calibrate current` can reuse the current branch once its first assistant response has usage data. Without samples, `/stats-pi` reports an uncalibrated estimate and a conservative range. Provider-reported usage in Pi session JSONL remains the authoritative post-call value.
 ## Tools
 None.

package/index.ts CHANGED Viewed

@@ -2,7 +2,18 @@ import * as fs from "node:fs";
 import * as path from "node:path";
 import { buildSessionContext, formatSkillsForPrompt } from "@earendil-works/pi-coding-agent";
 import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
-import { estimatePromptInjectionTokens, estimateTokensFromCharCount, formatTokens } from "@firstpick/pi-utils";
+import {
+  appendInitialPromptCalibrationRecord,
+  buildInitialPromptCalibrationRecord,
+  collectInitialPromptCalibration,
+  estimateInitialPromptInput,
+  estimatePromptInjectionTokens,
+  estimateTokensFromCharCount,
+  formatTokens,
+  type InitialPromptCalibration,
+  type InitialPromptInputEstimate,
+  type InitialPromptToolInfo,
+} from "@firstpick/pi-utils";
 type DayUsage = {
   input: number;
@@ -31,6 +42,12 @@ type UsageRecord = {
 type Totals = DayUsage;
+type PendingInitialPromptMeasurement = {
+  estimate: InitialPromptInputEstimate;
+  firstUserTokens: number;
+  skipReason?: string;
+};
 type PromptInjectionSource = {
   label: string;
   chars: number;
@@ -44,6 +61,8 @@ type TokenBreakdownSource = {
 const DEFAULT_DAYS = 14;
 const MAX_BAR_WIDTH = 24;
 const COST_BAR_WIDTH = 10;
+const FIRST_USER_MESSAGE_FRAMING_TOKENS = 16;
+const CALIBRATION_PROMPT = "Calibration probe: reply with exactly `calibration-ok` and no other text.";
 function addPromptSource(sources: PromptInjectionSource[], label: string, content: string | undefined): number {
   if (!content) return 0;
@@ -172,22 +191,65 @@ function buildPromptInjectionSources(systemPrompt: string, options: BuildSystemP
   return sources;
 }
-function formatPromptInjectionLines(systemPrompt: string, options: BuildSystemPromptOptions | null): string[] {
-  const sources = buildPromptInjectionSources(systemPrompt, options)
-    .map((source) => ({ ...source, tokens: estimateTokensFromCharCount(source.chars) }))
+function formatTokenCell(tokens: number): string {
+  return tokens < 0 ? `-${formatTokens(Math.abs(tokens))}` : formatTokens(tokens);
+}
+function formatCalibrationSummary(estimate: InitialPromptInputEstimate): string {
+  if (estimate.calibrationSamples <= 0) return "uncalibrated";
+  const sampleLabel = estimate.calibrationSamples === 1 ? "sample" : "samples";
+  return `learned scale ×${estimate.calibrationMultiplier.toFixed(2)} from ${estimate.calibrationSamples} ${sampleLabel}`;
+}
+function distributeCalibratedTokens<T extends { tokens: number }>(sources: T[], calibratedTotal: number): T[] {
+  const uncalibratedTotal = sources.reduce((sum, source) => sum + source.tokens, 0);
+  if (uncalibratedTotal <= 0 || calibratedTotal <= 0) return sources.map((source) => ({ ...source, tokens: 0 }));
+  const exact = sources.map((source, index) => {
+    const scaled = (source.tokens / uncalibratedTotal) * calibratedTotal;
+    const tokens = Math.floor(scaled);
+    return { index, tokens, remainder: scaled - tokens };
+  });
+  let remaining = calibratedTotal - exact.reduce((sum, source) => sum + source.tokens, 0);
+  for (const source of [...exact].sort((a, b) => b.remainder - a.remainder || a.index - b.index)) {
+    if (remaining <= 0) break;
+    source.tokens += 1;
+    remaining -= 1;
+  }
+  return sources.map((source, index) => ({ ...source, tokens: exact[index]?.tokens ?? 0 }));
+}
+function formatPromptInjectionLines(systemPrompt: string, options: BuildSystemPromptOptions | null, estimate: InitialPromptInputEstimate): string[] {
+  const promptSources = buildPromptInjectionSources(systemPrompt, options)
+    .map((source) => ({
+      ...source,
+      tokens: systemPrompt.length > 0 ? Math.round((source.chars / systemPrompt.length) * estimate.promptText) : estimateTokensFromCharCount(source.chars),
+    }))
     .sort((a, b) => b.tokens - a.tokens || b.chars - a.chars);
-  const totalTokens = estimatePromptInjectionTokens(systemPrompt);
+  const uncalibratedSources = [
+    estimate.toolSchemas > 0
+      ? { label: `Active tool schemas (${estimate.toolCount})`, chars: 0, tokens: estimate.toolSchemas }
+      : null,
+    estimate.framing > 0 ? { label: "Provider/request framing", chars: 0, tokens: estimate.framing } : null,
+    ...promptSources,
+  ].filter((source): source is { label: string; chars: number; tokens: number } => !!source && source.tokens !== 0);
+  const sources = distributeCalibratedTokens(uncalibratedSources, estimate.total)
+    .filter((source) => source.tokens !== 0)
+    .sort((a, b) => Math.abs(b.tokens) - Math.abs(a.tokens) || b.chars - a.chars);
   const labelWidth = Math.max("Source".length, ...sources.map((source) => source.label.length));
-  const tokenWidth = Math.max("Tokens".length, ...sources.map((source) => formatTokens(source.tokens).length));
+  const tokenWidth = Math.max("Tokens".length, ...sources.map((source) => formatTokenCell(source.tokens).length));
   const percentWidth = "%".length;
   const separator = `├${"─".repeat(labelWidth + 2)}┼${"─".repeat(tokenWidth + 2)}┼${"─".repeat(percentWidth + 6)}┤`;
   const rows = sources.map((source) => {
-    const percent = totalTokens > 0 ? `${((source.tokens / totalTokens) * 100).toFixed(1)}%` : "0.0%";
-    return `│ ${source.label.padEnd(labelWidth)} │ ${formatTokens(source.tokens).padStart(tokenWidth)} │ ${percent.padStart(percentWidth + 4)} │`;
+    const percent = estimate.total > 0 ? `${((source.tokens / estimate.total) * 100).toFixed(1)}%` : "0.0%";
+    return `│ ${source.label.padEnd(labelWidth)} │ ${formatTokenCell(source.tokens).padStart(tokenWidth)} │ ${percent.padStart(percentWidth + 4)} │`;
   });
+  const range = estimate.low !== estimate.high ? ` · range ${formatTokens(estimate.low)}–${formatTokens(estimate.high)}` : "";
   return [
-    `Prompt injection: PI: ${formatTokens(totalTokens)} tok`,
+    `Prompt injection: PI: ~${formatTokens(estimate.total)} tok initial input (${estimate.confidence}${range})`,
+    `Unscaled basis: prompt text ${formatTokens(estimate.promptText)} + tool schemas ${formatTokens(estimate.toolSchemas)} + framing ${formatTokens(estimate.framing)} · displayed rows are proportionally scaled (${formatCalibrationSummary(estimate)})`,
     `┌${"─".repeat(labelWidth + 2)}┬${"─".repeat(tokenWidth + 2)}┬${"─".repeat(percentWidth + 6)}┐`,
     `│ ${"Source".padEnd(labelWidth)} │ ${"Tokens".padStart(tokenWidth)} │ ${"%".padStart(percentWidth + 4)} │`,
     separator,
@@ -606,9 +668,129 @@ function buildCacheEfficiencyLines(totals: Totals): string[] {
 export default function statsExtension(pi: ExtensionAPI) {
   let latestSystemPromptOptions: BuildSystemPromptOptions | null = null;
+  let pendingInitialPromptMeasurement: PendingInitialPromptMeasurement | null = null;
+  const getToolEstimateInputs = (): { activeTools: string[]; allTools: InitialPromptToolInfo[] } => {
+    let activeTools: string[] = [];
+    let allTools: InitialPromptToolInfo[] = [];
-  pi.on("before_agent_start", async (event) => {
+    try {
+      activeTools = pi.getActiveTools();
+    } catch {
+      activeTools = [];
+    }
+    try {
+      allTools = pi.getAllTools().map((tool) => ({
+        name: tool.name,
+        description: tool.description,
+        parameters: tool.parameters,
+      }));
+    } catch {
+      allTools = [];
+    }
+    return { activeTools, allTools };
+  };
+  const getPromptCalibration = (ctx: ExtensionCommandContext): InitialPromptCalibration | null => {
+    return collectInitialPromptCalibration(ctx.sessionManager.getSessionDir());
+  };
+  const estimateInitialPromptForContext = (systemPrompt: string, calibration?: InitialPromptCalibration | null): InitialPromptInputEstimate => {
+    const { activeTools, allTools } = getToolEstimateInputs();
+    return estimateInitialPromptInput({ systemPrompt, activeTools, allTools, calibration });
+  };
+  const branchHasAssistantUsage = (ctx: { sessionManager: { getBranch(): unknown[] } }): boolean => {
+    try {
+      return ctx.sessionManager.getBranch().some((entry) => {
+        const record = (entry && typeof entry === "object" ? entry : {}) as Record<string, any>;
+        return record.type === "message" && record.message?.role === "assistant" && !!record.message?.usage;
+      });
+    } catch {
+      return true;
+    }
+  };
+  const calibrateFromCurrentBranch = (ctx: ExtensionCommandContext): { ok: true; record: NonNullable<ReturnType<typeof buildInitialPromptCalibrationRecord>> } | { ok: false; reason: string } => {
+    let firstUserTokens: number | null = null;
+    let firstAssistantWithUsage: Record<string, any> | null = null;
+    for (const entry of ctx.sessionManager.getBranch()) {
+      const record = (entry && typeof entry === "object" ? entry : {}) as Record<string, any>;
+      if (record.type !== "message") continue;
+      const message = (record.message && typeof record.message === "object" ? record.message : {}) as Record<string, any>;
+      if (message.role === "user" && firstUserTokens === null) {
+        firstUserTokens = estimatePromptInjectionTokens(stringifyContextValue(message.content)) + FIRST_USER_MESSAGE_FRAMING_TOKENS;
+      }
+      if (message.role === "assistant" && message.usage) {
+        firstAssistantWithUsage = message;
+        break;
+      }
+    }
+    if (firstUserTokens === null) return { ok: false, reason: "No initial user message found in the current branch." };
+    if (!firstAssistantWithUsage) return { ok: false, reason: "No assistant response with usage data found yet. Run /calibrate after the first assistant response finishes." };
+    const usage = firstAssistantWithUsage.usage as Record<string, unknown>;
+    const actualInitialInputTokens =
+      (Number(usage.input ?? 0) || 0) + (Number(usage.cacheRead ?? 0) || 0) + (Number(usage.cacheWrite ?? 0) || 0);
+    if (actualInitialInputTokens <= 0) return { ok: false, reason: "The first assistant response has no input/cache token usage to calibrate from." };
+    const estimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), null);
+    const record = buildInitialPromptCalibrationRecord({
+      estimate,
+      actualInitialInputTokens,
+      firstUserTokens,
+      provider: String(firstAssistantWithUsage.provider ?? ctx.model?.provider ?? "unknown"),
+      model: String(firstAssistantWithUsage.responseModel ?? firstAssistantWithUsage.model ?? ctx.model?.id ?? "unknown"),
+    });
+    if (!record) return { ok: false, reason: "Calibration sample was outside the accepted sanity range (0.25×–4×)." };
+    return { ok: true, record };
+  };
+  pi.on("session_start", async () => {
+    pendingInitialPromptMeasurement = null;
+  });
+  pi.on("before_agent_start", async (event, ctx) => {
     latestSystemPromptOptions = event.systemPromptOptions;
+    if (!branchHasAssistantUsage(ctx)) {
+      pendingInitialPromptMeasurement = {
+        estimate: estimateInitialPromptForContext(event.systemPrompt, null),
+        firstUserTokens: estimatePromptInjectionTokens(event.prompt) + FIRST_USER_MESSAGE_FRAMING_TOKENS,
+        skipReason: event.images && event.images.length > 0 ? "image prompt" : undefined,
+      };
+    }
+  });
+  pi.on("agent_start", async (_event, ctx) => {
+    if (!pendingInitialPromptMeasurement || pendingInitialPromptMeasurement.skipReason) return;
+    pendingInitialPromptMeasurement.estimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), null);
+  });
+  pi.on("message_end", async (event) => {
+    if (!pendingInitialPromptMeasurement) return;
+    const pending = pendingInitialPromptMeasurement;
+    const message = (event.message && typeof event.message === "object" ? event.message : {}) as Record<string, any>;
+    if (message.role !== "assistant" || !message.usage) return;
+    pendingInitialPromptMeasurement = null;
+    if (pending.skipReason) return;
+    const usage = message.usage as Record<string, unknown>;
+    const actualInitialInputTokens =
+      (Number(usage.input ?? 0) || 0) + (Number(usage.cacheRead ?? 0) || 0) + (Number(usage.cacheWrite ?? 0) || 0);
+    const record = buildInitialPromptCalibrationRecord({
+      estimate: pending.estimate,
+      actualInitialInputTokens,
+      firstUserTokens: pending.firstUserTokens,
+      provider: String(message.provider ?? "unknown"),
+      model: String(message.responseModel ?? message.model ?? "unknown"),
+    });
+    if (record) appendInitialPromptCalibrationRecord(pi.appendEntry, record);
   });
   const showCurrentContextTokens = (ctx: ExtensionCommandContext) => {
@@ -633,8 +815,9 @@ export default function statsExtension(pi: ExtensionAPI) {
     const byDay = aggregateUsageByDay(records);
     const dayKeys = getScopeDayKeys(byDay, parsedArgs);
     const totals = sumUsage(byDay, dayKeys);
+    const calibration = collectInitialPromptCalibration(sessionDir);
     const scopeLabel = parsedArgs.mode === "all" ? "all days" : `last ${parsedArgs.days} days`;
-    return { files, records, byDay, dayKeys, totals, scopeLabel };
+    return { files, records, byDay, dayKeys, totals, calibration, scopeLabel };
   };
   const parseStatsCommandArgs = (args: string, ctx: ExtensionCommandContext) => {
@@ -702,14 +885,51 @@ export default function statsExtension(pi: ExtensionAPI) {
   );
   registerScopedStatsCommand("stats-last", "Show non-zero daily usage graph. Usage: /stats-last [days|all]", (data, ctx) => {
-    const promptInjectionTokens = estimatePromptInjectionTokens(ctx.getSystemPrompt());
-    return [`📊 Token stats (${data.scopeLabel}, ${data.files.length} sessions) · PI: ${formatTokens(promptInjectionTokens)} tok`, "", ...buildGraphLines(data.byDay, data.dayKeys, true)];
+    const promptEstimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), data.calibration);
+    return [`📊 Token stats (${data.scopeLabel}, ${data.files.length} sessions) · PI: ~${formatTokens(promptEstimate.total)} tok`, "", ...buildGraphLines(data.byDay, data.dayKeys, true)];
   });
   pi.registerCommand("stats-pi", {
-    description: "Show prompt-injection token breakdown.",
+    description: "Show estimated initial prompt input token breakdown.",
     handler: async (_args, ctx) => {
-      ctx.ui.notify(formatPromptInjectionLines(ctx.getSystemPrompt(), latestSystemPromptOptions).join("\n"), "info");
+      const systemPrompt = ctx.getSystemPrompt();
+      const promptEstimate = estimateInitialPromptForContext(systemPrompt, getPromptCalibration(ctx));
+      ctx.ui.notify(formatPromptInjectionLines(systemPrompt, latestSystemPromptOptions, promptEstimate).join("\n"), "info");
+    },
+  });
+  pi.registerCommand("calibrate", {
+    description: "Start an isolated calibration turn to calibrate PI initial prompt token estimates.",
+    handler: async (args, ctx) => {
+      const mode = args.trim().toLowerCase();
+      if (mode === "current" || mode === "here") {
+        const result = calibrateFromCurrentBranch(ctx);
+        if (!result.ok) {
+          ctx.ui.notify(`Calibration failed: ${result.reason}`, "warning");
+          return;
+        }
+        appendInitialPromptCalibrationRecord(pi.appendEntry, result.record);
+        const calibration = getPromptCalibration(ctx);
+        const estimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), calibration);
+        ctx.ui.notify(
+          `Calibrated PI estimate: ~${formatTokens(estimate.total)} tok (scale ×${estimate.calibrationMultiplier.toFixed(2)}, ${estimate.calibrationSamples} samples). Run /stats-pi for details.`,
+          "info",
+        );
+        return;
+      }
+      if (!ctx.isIdle()) {
+        ctx.ui.notify("Calibration needs an idle agent so it can start a clean probe turn.", "warning");
+        return;
+      }
+      ctx.ui.notify("Starting isolated calibration session…", "info");
+      await ctx.newSession({
+        withSession: async (newCtx) => {
+          await newCtx.sendUserMessage(CALIBRATION_PROMPT);
+        },
+      });
     },
   });
@@ -726,9 +946,9 @@ export default function statsExtension(pi: ExtensionAPI) {
       if (!data) return;
       const systemPrompt = ctx.getSystemPrompt();
-      const promptInjectionTokens = estimatePromptInjectionTokens(systemPrompt);
+      const promptEstimate = estimateInitialPromptForContext(systemPrompt, data.calibration);
       const graphLines = buildGraphLines(data.byDay, data.dayKeys, true);
-      const promptInjectionLines = formatPromptInjectionLines(systemPrompt, latestSystemPromptOptions);
+      const promptInjectionLines = formatPromptInjectionLines(systemPrompt, latestSystemPromptOptions, promptEstimate);
       const modelLines = formatModelComparisonLines(data.records, data.dayKeys, data.totals).slice(0, 7);
       const sessionLines = formatExpensiveSessionLines(data.records, data.dayKeys).slice(0, 7);
       const commandLines = [
@@ -737,7 +957,7 @@ export default function statsExtension(pi: ExtensionAPI) {
       ];
       ctx.ui.notify(
-        `📊 Token stats (${data.scopeLabel}, ${data.files.length} sessions) · PI: ${formatTokens(promptInjectionTokens)} tok\n\n${graphLines.join("\n")}\n\n${promptInjectionLines.join("\n")}\n\n${buildCostTrendLines(data.byDay, data.dayKeys).join("\n")}\n${buildCacheEfficiencyLines(data.totals).join("\n")}\n\n${modelLines.join("\n")}\n\n${sessionLines.join("\n")}\n\n${commandLines.join("\n")}`,
+        `📊 Token stats (${data.scopeLabel}, ${data.files.length} sessions) · PI: ~${formatTokens(promptEstimate.total)} tok\n\n${graphLines.join("\n")}\n\n${promptInjectionLines.join("\n")}\n\n${buildCostTrendLines(data.byDay, data.dayKeys).join("\n")}\n${buildCacheEfficiencyLines(data.totals).join("\n")}\n\n${modelLines.join("\n")}\n\n${sessionLines.join("\n")}\n\n${commandLines.join("\n")}`,
         "info",
       );
     },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@firstpick/pi-extension-stats",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "description": "Token and cost usage analytics command for Pi session history.",
   "license": "MIT",
   "keywords": [