@firstpick/pi-extension-stats 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/index.ts +74 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -27,6 +27,7 @@ No required configuration.
|
|
|
27
27
|
- `/stats [days|all]` — show token usage dashboard (default: last 14 days).
|
|
28
28
|
- `/stats tokens` — show current context token breakdown by source/type.
|
|
29
29
|
- `/stats-pi` — show estimated initial prompt input token breakdown. It counts Pi's system prompt text, active provider-level tool schemas, framing overhead, and optional historical calibration.
|
|
30
|
+
- `/calibrate` — start an isolated calibration session with a fixed probe prompt, then update `/stats-pi` and the footer `PI: X tok` estimate from the first assistant response usage. `/calibrate current` reuses the current branch if it already has a suitable first-turn usage sample.
|
|
30
31
|
- `/stats-last [days|all]` — show non-zero daily usage graph.
|
|
31
32
|
- `/stats-most-expense [days|all]` — show most expensive sessions.
|
|
32
33
|
- `/stats-model-compare [days|all]` — show model token/cost comparison.
|
|
@@ -47,7 +48,7 @@ baseEstimate = promptTextTokens + toolSchemaTokens + framingTokens
|
|
|
47
48
|
estimatedInitialInput = baseEstimate × historicalCalibrationMultiplier
|
|
48
49
|
```
|
|
49
50
|
|
|
50
|
-
The historical multiplier is learned opportunistically from future sessions by comparing the pre-call estimate with the provider-reported first assistant `usage.input + usage.cacheRead + usage.cacheWrite` after subtracting the first user prompt estimate. Without samples, `/stats-pi` reports an uncalibrated estimate and a conservative range. Provider-reported usage in Pi session JSONL remains the authoritative post-call value.
|
|
51
|
+
The historical multiplier is learned opportunistically from future sessions by comparing the pre-call estimate with the provider-reported first assistant `usage.input + usage.cacheRead + usage.cacheWrite` after subtracting the first user prompt estimate. `/calibrate` performs the same calculation on demand by opening an isolated session and sending a fixed probe prompt; `/calibrate current` can reuse the current branch once its first assistant response has usage data. Without samples, `/stats-pi` reports an uncalibrated estimate and a conservative range. Provider-reported usage in Pi session JSONL remains the authoritative post-call value.
|
|
51
52
|
|
|
52
53
|
## Tools
|
|
53
54
|
|
package/index.ts
CHANGED
|
@@ -62,6 +62,7 @@ const DEFAULT_DAYS = 14;
|
|
|
62
62
|
const MAX_BAR_WIDTH = 24;
|
|
63
63
|
const COST_BAR_WIDTH = 10;
|
|
64
64
|
const FIRST_USER_MESSAGE_FRAMING_TOKENS = 16;
|
|
65
|
+
const CALIBRATION_PROMPT = "Calibration probe: reply with exactly `calibration-ok` and no other text.";
|
|
65
66
|
|
|
66
67
|
function addPromptSource(sources: PromptInjectionSource[], label: string, content: string | undefined): number {
|
|
67
68
|
if (!content) return 0;
|
|
@@ -712,6 +713,44 @@ export default function statsExtension(pi: ExtensionAPI) {
|
|
|
712
713
|
}
|
|
713
714
|
};
|
|
714
715
|
|
|
716
|
+
const calibrateFromCurrentBranch = (ctx: ExtensionCommandContext): { ok: true; record: NonNullable<ReturnType<typeof buildInitialPromptCalibrationRecord>> } | { ok: false; reason: string } => {
|
|
717
|
+
let firstUserTokens: number | null = null;
|
|
718
|
+
let firstAssistantWithUsage: Record<string, any> | null = null;
|
|
719
|
+
|
|
720
|
+
for (const entry of ctx.sessionManager.getBranch()) {
|
|
721
|
+
const record = (entry && typeof entry === "object" ? entry : {}) as Record<string, any>;
|
|
722
|
+
if (record.type !== "message") continue;
|
|
723
|
+
|
|
724
|
+
const message = (record.message && typeof record.message === "object" ? record.message : {}) as Record<string, any>;
|
|
725
|
+
if (message.role === "user" && firstUserTokens === null) {
|
|
726
|
+
firstUserTokens = estimatePromptInjectionTokens(stringifyContextValue(message.content)) + FIRST_USER_MESSAGE_FRAMING_TOKENS;
|
|
727
|
+
}
|
|
728
|
+
if (message.role === "assistant" && message.usage) {
|
|
729
|
+
firstAssistantWithUsage = message;
|
|
730
|
+
break;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
if (firstUserTokens === null) return { ok: false, reason: "No initial user message found in the current branch." };
|
|
735
|
+
if (!firstAssistantWithUsage) return { ok: false, reason: "No assistant response with usage data found yet. Run /calibrate after the first assistant response finishes." };
|
|
736
|
+
|
|
737
|
+
const usage = firstAssistantWithUsage.usage as Record<string, unknown>;
|
|
738
|
+
const actualInitialInputTokens =
|
|
739
|
+
(Number(usage.input ?? 0) || 0) + (Number(usage.cacheRead ?? 0) || 0) + (Number(usage.cacheWrite ?? 0) || 0);
|
|
740
|
+
if (actualInitialInputTokens <= 0) return { ok: false, reason: "The first assistant response has no input/cache token usage to calibrate from." };
|
|
741
|
+
|
|
742
|
+
const estimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), null);
|
|
743
|
+
const record = buildInitialPromptCalibrationRecord({
|
|
744
|
+
estimate,
|
|
745
|
+
actualInitialInputTokens,
|
|
746
|
+
firstUserTokens,
|
|
747
|
+
provider: String(firstAssistantWithUsage.provider ?? ctx.model?.provider ?? "unknown"),
|
|
748
|
+
model: String(firstAssistantWithUsage.responseModel ?? firstAssistantWithUsage.model ?? ctx.model?.id ?? "unknown"),
|
|
749
|
+
});
|
|
750
|
+
if (!record) return { ok: false, reason: "Calibration sample was outside the accepted sanity range (0.25×–4×)." };
|
|
751
|
+
return { ok: true, record };
|
|
752
|
+
};
|
|
753
|
+
|
|
715
754
|
pi.on("session_start", async () => {
|
|
716
755
|
pendingInitialPromptMeasurement = null;
|
|
717
756
|
});
|
|
@@ -859,6 +898,41 @@ export default function statsExtension(pi: ExtensionAPI) {
|
|
|
859
898
|
},
|
|
860
899
|
});
|
|
861
900
|
|
|
901
|
+
pi.registerCommand("calibrate", {
|
|
902
|
+
description: "Start an isolated calibration turn to calibrate PI initial prompt token estimates.",
|
|
903
|
+
handler: async (args, ctx) => {
|
|
904
|
+
const mode = args.trim().toLowerCase();
|
|
905
|
+
if (mode === "current" || mode === "here") {
|
|
906
|
+
const result = calibrateFromCurrentBranch(ctx);
|
|
907
|
+
if (!result.ok) {
|
|
908
|
+
ctx.ui.notify(`Calibration failed: ${result.reason}`, "warning");
|
|
909
|
+
return;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
appendInitialPromptCalibrationRecord(pi.appendEntry, result.record);
|
|
913
|
+
const calibration = getPromptCalibration(ctx);
|
|
914
|
+
const estimate = estimateInitialPromptForContext(ctx.getSystemPrompt(), calibration);
|
|
915
|
+
ctx.ui.notify(
|
|
916
|
+
`Calibrated PI estimate: ~${formatTokens(estimate.total)} tok (scale ×${estimate.calibrationMultiplier.toFixed(2)}, ${estimate.calibrationSamples} samples). Run /stats-pi for details.`,
|
|
917
|
+
"info",
|
|
918
|
+
);
|
|
919
|
+
return;
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
if (!ctx.isIdle()) {
|
|
923
|
+
ctx.ui.notify("Calibration needs an idle agent so it can start a clean probe turn.", "warning");
|
|
924
|
+
return;
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
ctx.ui.notify("Starting isolated calibration session…", "info");
|
|
928
|
+
await ctx.newSession({
|
|
929
|
+
withSession: async (newCtx) => {
|
|
930
|
+
await newCtx.sendUserMessage(CALIBRATION_PROMPT);
|
|
931
|
+
},
|
|
932
|
+
});
|
|
933
|
+
},
|
|
934
|
+
});
|
|
935
|
+
|
|
862
936
|
pi.registerCommand("stats", {
|
|
863
937
|
description: "Show token usage dashboard. Usage: /stats, /stats 30, /stats all. Details: /stats-tokens, /stats-pi, /stats-last, /stats-most-expense, /stats-model-compare, /stats-cost-trend, /stats-cache",
|
|
864
938
|
handler: async (args, ctx) => {
|