@caupulican/pi-adaptative 0.80.75 → 0.80.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,7 @@ import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth
24
24
  import { executeBashWithOperations } from "./bash-executor.js";
25
25
  import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
26
26
  import { applyContextGc } from "./context-gc.js";
27
+ import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
27
28
  import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
28
29
  import { exportSessionToHtml } from "./export-html/index.js";
29
30
  import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
@@ -130,6 +131,10 @@ export class AgentSession {
130
131
  _gatewayRegistry = new GatewayRegistry();
131
132
  /** Cache for getSpawnedUsage(), keyed by session entry count (Bug #22 — avoid O(N) per render frame). */
132
133
  _spawnedUsageCache;
134
+ /** Latest proactive cost-guard decision (#34), for the host UI to surface. Undefined when disabled. */
135
+ _lastCostGuardDecision;
136
+ /** One-shot latch so the cost guard downgrades reasoning once per over-threshold episode, not every call. */
137
+ _costGuardDowngraded = false;
133
138
  /** Set on dispose so in-flight background reflection bails instead of writing to a dead session (Bug #21). */
134
139
  _disposed = false;
135
140
  /** Aborts in-flight background reflection completions on dispose (Bug #21). */
@@ -288,9 +293,50 @@ export class AgentSession {
288
293
  if (this._extensionRunner.hasHandlers("context")) {
289
294
  finalMessages = await this._extensionRunner.emitContext(currentMessages);
290
295
  }
291
- return this._applyContextGc(finalMessages, true).messages;
296
+ const gcMessages = this._applyContextGc(finalMessages, true).messages;
297
+ this._applyCostGuard(gcMessages);
298
+ return gcMessages;
292
299
  };
293
300
  }
301
+ /**
302
+ * Proactive per-turn cost guard (#34): estimate the USD cost of the about-to-be-submitted turn and,
303
+ * when it exceeds the user's ceiling, record a warning decision (for the host UI to surface) and —
304
+ * if configured to `downgrade` — step reasoning effort down ONCE per over-threshold episode to curb a
305
+ * runaway billing spike. Disabled by default (`maxTurnUsd<=0`), so it never alters behavior unless the
306
+ * user opts in. Best-effort: never throws into the turn.
307
+ */
308
+ _applyCostGuard(messages) {
309
+ try {
310
+ const guard = this.settingsManager.getCostGuardSettings();
311
+ if (guard.maxTurnUsd <= 0 || !this.model?.cost) {
312
+ this._lastCostGuardDecision = undefined;
313
+ return;
314
+ }
315
+ const inputTokens = this._estimateCurrentContextTokens(messages);
316
+ const maxOutputTokens = this.model.maxTokens ?? 4096;
317
+ const estUsd = estimateTurnCostUsd({ inputTokens, maxOutputTokens, cost: this.model.cost });
318
+ const decision = evaluateCostGuard(estUsd, { maxTurnUsd: guard.maxTurnUsd, action: guard.action });
319
+ this._lastCostGuardDecision = decision;
320
+ if (!decision.over) {
321
+ this._costGuardDowngraded = false; // back under the ceiling — re-arm the one-shot downgrade
322
+ return;
323
+ }
324
+ if (guard.action === "downgrade" && !this._costGuardDowngraded && this.supportsThinking()) {
325
+ const next = downgradeReasoning(this.thinkingLevel);
326
+ if (next !== this.thinkingLevel) {
327
+ this.setThinkingLevel(next);
328
+ this._costGuardDowngraded = true;
329
+ }
330
+ }
331
+ }
332
+ catch {
333
+ // cost guard must never disrupt a turn
334
+ }
335
+ }
336
+ /** Latest cost-guard decision (for the host footer/UI to surface a warning). Undefined if disabled. */
337
+ getLastCostGuardDecision() {
338
+ return this._lastCostGuardDecision;
339
+ }
294
340
  _installAgentTurnRefresh() {
295
341
  const previousPrepareNextTurn = this.agent.prepareNextTurn?.bind(this.agent);
296
342
  this.agent.prepareNextTurn = async (signal) => {
@@ -3405,7 +3451,7 @@ export class AgentSession {
3405
3451
  const options = {
3406
3452
  maxTokens: opts.maxTokens,
3407
3453
  signal: opts.signal,
3408
- cacheRetention: "none",
3454
+ cacheRetention: opts.cacheRetention ?? "none",
3409
3455
  };
3410
3456
  // pi-ai's `reasoning` option does not include "off" (that's the provider default already).
3411
3457
  if (thinkingLevel !== "off") {
@@ -3465,6 +3511,9 @@ export class AgentSession {
3465
3511
  thinkingLevel: input.thinkingLevel ?? "low",
3466
3512
  maxTokens: plan.tokenBudget,
3467
3513
  signal,
3514
+ // The reflection system prompt is static (#33) — let the provider cache the prefix so
3515
+ // repeated passes only pay for the variable tail.
3516
+ cacheRetention: "short",
3468
3517
  });
3469
3518
  const result = await new ReflectionEngine().reflect({
3470
3519
  recentTurnText: input.recentTurnText,
@@ -3555,7 +3604,9 @@ export class AgentSession {
3555
3604
  return; // do not overwrite an existing skill
3556
3605
  mkdirSync(dir, { recursive: true });
3557
3606
  const safeDescription = description.replace(/[\r\n]+/g, " ").trim();
3558
- const content = `---\nname: ${name}\ndescription: ${safeDescription}\n---\n\n<!-- Auto-generated by the reflection engine (R7 memory-to-behavior). Review and refine. -->\n\n${body.trim()}\n`;
3607
+ // `promoted: true` marks this as reflection-generated so the curator (#32) can lifecycle-manage
3608
+ // it (archive/consolidate) WITHOUT ever touching hand-authored user skills.
3609
+ const content = `---\nname: ${name}\ndescription: ${safeDescription}\npromoted: true\n---\n\n<!-- Auto-generated by the reflection engine (R7 memory-to-behavior). Review and refine. -->\n\n${body.trim()}\n`;
3559
3610
  writeFileSync(file, content, "utf-8");
3560
3611
  }
3561
3612
  catch {