npm - @firstpick/pi-extension-stats - Versions diffs - 0.1.8 → 0.1.9 - Mend

@firstpick/pi-extension-stats 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -27,6 +27,7 @@ No required configuration.
 - `/stats [days|all]` — show token usage dashboard (default: last 14 days).
 - `/stats tokens` — show current context token breakdown by source/type.
 - `/stats-pi` — show estimated initial prompt input token breakdown. It counts Pi's system prompt text, active provider-level tool schemas, framing overhead, and optional historical calibration.
+- `/calibrate` — start an isolated calibration session with a fixed probe prompt, then update `/stats-pi` and the footer `PI: X tok` estimate from the first assistant response usage. `/calibrate current` reuses the current branch if it already has a suitable first-turn usage sample.
 - `/stats-last [days|all]` — show non-zero daily usage graph.
 - `/stats-most-expense [days|all]` — show most expensive sessions.
 - `/stats-model-compare [days|all]` — show model token/cost comparison.
@@ -47,7 +48,7 @@ baseEstimate = promptTextTokens + toolSchemaTokens + framingTokens
 estimatedInitialInput = baseEstimate × historicalCalibrationMultiplier
 ```
-The historical multiplier is learned opportunistically from future sessions by comparing the pre-call estimate with the provider-reported first assistant `usage.input + usage.cacheRead + usage.cacheWrite` after subtracting the first user prompt estimate. Without samples, `/stats-pi` reports an uncalibrated estimate and a conservative range. Provider-reported usage in Pi session JSONL remains the authoritative post-call value.
+The historical multiplier is learned opportunistically from future sessions by comparing the pre-call estimate with the provider-reported first assistant `usage.input + usage.cacheRead + usage.cacheWrite` after subtracting the first user prompt estimate. `/calibrate` performs the same calculation on demand by opening an isolated session and sending a fixed probe prompt; `/calibrate current` can reuse the current branch once its first assistant response has usage data. Without samples, `/stats-pi` reports an uncalibrated estimate and a conservative range. Provider-reported usage in Pi session JSONL remains the authoritative post-call value.
 ## Tools

package/index.ts CHANGED Viewed

@@ -62,6 +62,7 @@ const DEFAULT_DAYS = 14;
 const MAX_BAR_WIDTH = 24;
 const COST_BAR_WIDTH = 10;
 const FIRST_USER_MESSAGE_FRAMING_TOKENS = 16;
+const CALIBRATION_PROMPT = "Calibration probe: reply with exactly `calibration-ok` and no other text.";
 function addPromptSource(sources: PromptInjectionSource[], label: string, content: string | undefined): number {
   if (!content) return 0;
@@ -712,6 +713,44 @@ export default function statsExtension(pi: ExtensionAPI) {
     }
   };
+  const calibrateFromCurrentBranch = (ctx: ExtensionCommandContext): { ok: true; record: NonNullable<ReturnType<typeof buildInitialPromptCalibrationRecord>> } | { ok: false; reason: string } => {
+    let firstUserTokens: number | null = null;
+    let firstAssistantWithUsage: Record<string, any> | null = null;
+    for (const entry of ctx.sessionManager.getBranch()) {
+      const record = (entry && typeof entry === "object" ? entry : {}) as Record<string, any>;
+      if (record.type !== "message") continue;
+      const message = (record.message && typeof record.message === "object" ? record.message : {}) as Record<string, any>;
+      if (message.role === "user" && firstUserTokens === null) {
+        firstUserTokens = estimatePromptInjectionTokens(stringifyContextValue(message.content)) + FIRST_USER_MESSAGE_FRAMING_TOKENS;
+      }
+      if (message.role === "assistant" && message.usage) {
+        firstAssistantWithUsage = message;
+        break;
+      }
+    }
+    if (firstUserTokens === null) return { ok: false, reason: "No initial user message found in the current branch." };
+    if (!firstAssistantWithUsage) return { ok: false, reason: "No assistant response with usage data found yet. Run /calibrate after the first assistant response finishes." };
+    const usage = firstAssistantWithUsage.usage as Record<string, unknown>;
+    const actualInitialInputTokens =
+      (Number(usage.input ?? 0) || 0) + (Number(usage.cacheRead ?? 0) || 0) + (Number(usage.cacheWrite ?? 0) || 0);
+    if (actualInitialInputTokens <= 0) return { ok: false, reason: "The first assistant response has no input/cache token usage to calibrate from." };
+    const estimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), null);
+    const record = buildInitialPromptCalibrationRecord({
+      estimate,
+      actualInitialInputTokens,
+      firstUserTokens,
+      provider: String(firstAssistantWithUsage.provider ?? ctx.model?.provider ?? "unknown"),
+      model: String(firstAssistantWithUsage.responseModel ?? firstAssistantWithUsage.model ?? ctx.model?.id ?? "unknown"),
+    });
+    if (!record) return { ok: false, reason: "Calibration sample was outside the accepted sanity range (0.25×–4×)." };
+    return { ok: true, record };
+  };
   pi.on("session_start", async () => {
     pendingInitialPromptMeasurement = null;
   });
@@ -859,6 +898,41 @@ export default function statsExtension(pi: ExtensionAPI) {
     },
   });
+  pi.registerCommand("calibrate", {
+    description: "Start an isolated calibration turn to calibrate PI initial prompt token estimates.",
+    handler: async (args, ctx) => {
+      const mode = args.trim().toLowerCase();
+      if (mode === "current" || mode === "here") {
+        const result = calibrateFromCurrentBranch(ctx);
+        if (!result.ok) {
+          ctx.ui.notify(`Calibration failed: ${result.reason}`, "warning");
+          return;
+        }
+        appendInitialPromptCalibrationRecord(pi.appendEntry, result.record);
+        const calibration = getPromptCalibration(ctx);
+        const estimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), calibration);
+        ctx.ui.notify(
+          `Calibrated PI estimate: ~${formatTokens(estimate.total)} tok (scale ×${estimate.calibrationMultiplier.toFixed(2)}, ${estimate.calibrationSamples} samples). Run /stats-pi for details.`,
+          "info",
+        );
+        return;
+      }
+      if (!ctx.isIdle()) {
+        ctx.ui.notify("Calibration needs an idle agent so it can start a clean probe turn.", "warning");
+        return;
+      }
+      ctx.ui.notify("Starting isolated calibration session…", "info");
+      await ctx.newSession({
+        withSession: async (newCtx) => {
+          await newCtx.sendUserMessage(CALIBRATION_PROMPT);
+        },
+      });
+    },
+  });
   pi.registerCommand("stats", {
     description: "Show token usage dashboard. Usage: /stats, /stats 30, /stats all. Details: /stats-tokens, /stats-pi, /stats-last, /stats-most-expense, /stats-model-compare, /stats-cost-trend, /stats-cache",
     handler: async (args, ctx) => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@firstpick/pi-extension-stats",
-  "version": "0.1.8",
+  "version": "0.1.9",
   "description": "Token and cost usage analytics command for Pi session history.",
   "license": "MIT",
   "keywords": [