@caupulican/pi-adaptative 0.80.74 → 0.80.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/dist/core/agent-session.d.ts +32 -1
  3. package/dist/core/agent-session.d.ts.map +1 -1
  4. package/dist/core/agent-session.js +96 -9
  5. package/dist/core/agent-session.js.map +1 -1
  6. package/dist/core/compaction/compaction.d.ts +22 -0
  7. package/dist/core/compaction/compaction.d.ts.map +1 -1
  8. package/dist/core/compaction/compaction.js +31 -3
  9. package/dist/core/compaction/compaction.js.map +1 -1
  10. package/dist/core/cost-guard.d.ts +55 -0
  11. package/dist/core/cost-guard.d.ts.map +1 -0
  12. package/dist/core/cost-guard.js +50 -0
  13. package/dist/core/cost-guard.js.map +1 -0
  14. package/dist/core/learning/reflection-engine.d.ts +7 -0
  15. package/dist/core/learning/reflection-engine.d.ts.map +1 -1
  16. package/dist/core/learning/reflection-engine.js +22 -13
  17. package/dist/core/learning/reflection-engine.js.map +1 -1
  18. package/dist/core/memory/providers/file-store.d.ts.map +1 -1
  19. package/dist/core/memory/providers/file-store.js +33 -2
  20. package/dist/core/memory/providers/file-store.js.map +1 -1
  21. package/dist/core/resource-loader.d.ts +19 -1
  22. package/dist/core/resource-loader.d.ts.map +1 -1
  23. package/dist/core/resource-loader.js +69 -5
  24. package/dist/core/resource-loader.js.map +1 -1
  25. package/dist/core/settings-manager.d.ts +16 -0
  26. package/dist/core/settings-manager.d.ts.map +1 -1
  27. package/dist/core/settings-manager.js +15 -0
  28. package/dist/core/settings-manager.js.map +1 -1
  29. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  30. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  31. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  32. package/examples/extensions/sandbox/package-lock.json +2 -2
  33. package/examples/extensions/sandbox/package.json +1 -1
  34. package/examples/extensions/with-deps/package-lock.json +2 -2
  35. package/examples/extensions/with-deps/package.json +1 -1
  36. package/npm-shrinkwrap.json +12 -12
  37. package/package.json +4 -4
@@ -24,6 +24,7 @@ import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth
24
24
  import { executeBashWithOperations } from "./bash-executor.js";
25
25
  import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
26
26
  import { applyContextGc } from "./context-gc.js";
27
+ import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
27
28
  import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
28
29
  import { exportSessionToHtml } from "./export-html/index.js";
29
30
  import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
@@ -39,7 +40,7 @@ import { FileStoreProvider } from "./memory/providers/file-store.js";
39
40
  import { TranscriptRecallProvider } from "./memory/providers/transcript-recall.js";
40
41
  import { compactToolResultDetailsForRetention } from "./message-retention.js";
41
42
  import { createCustomMessage } from "./messages.js";
42
- import { resolveProfileModelSettings } from "./model-resolver.js";
43
+ import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
43
44
  import { expandPromptTemplate } from "./prompt-templates.js";
44
45
  import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
45
46
  import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
@@ -130,6 +131,10 @@ export class AgentSession {
130
131
  _gatewayRegistry = new GatewayRegistry();
131
132
  /** Cache for getSpawnedUsage(), keyed by session entry count (Bug #22 — avoid O(N) per render frame). */
132
133
  _spawnedUsageCache;
134
+ /** Latest proactive cost-guard decision (#34), for the host UI to surface. Undefined when disabled. */
135
+ _lastCostGuardDecision;
136
+ /** One-shot latch so the cost guard downgrades reasoning once per over-threshold episode, not every call. */
137
+ _costGuardDowngraded = false;
133
138
  /** Set on dispose so in-flight background reflection bails instead of writing to a dead session (Bug #21). */
134
139
  _disposed = false;
135
140
  /** Aborts in-flight background reflection completions on dispose (Bug #21). */
@@ -219,6 +224,39 @@ export class AgentSession {
219
224
  const result = await this._modelRegistry.getApiKeyAndHeaders(model);
220
225
  return result.ok ? { apiKey: result.apiKey, headers: result.headers } : {};
221
226
  }
227
+ /**
228
+ * Resolve the model used to SUMMARIZE during compaction (cost guard, #30). A compaction summary is an
229
+ * extraction task — it does not need the main (expensive) model. Selection:
230
+ * - an explicit `compaction.model` setting wins, but only if its provider is authed (else fall back);
231
+ * - `"auto"` (default) picks the CHEAPEST authed model whose context window can hold a compaction
232
+ * (capability floor), and ONLY if it is strictly cheaper than the session model — so we never
233
+ * downgrade to an equally-priced but weaker summarizer (agy's floor: don't degrade the checkpoint);
234
+ * - otherwise the session model is used (safe default).
235
+ */
236
+ _resolveCompactionModel(sessionModel) {
237
+ const setting = this.settingsManager.getCompactionModel();
238
+ if (setting && setting !== "auto") {
239
+ const resolved = resolveCliModel({ cliModel: setting, modelRegistry: this._modelRegistry });
240
+ if (resolved.model && this._modelRegistry.hasConfiguredAuth(resolved.model))
241
+ return resolved.model;
242
+ return sessionModel; // configured but unusable → don't break compaction
243
+ }
244
+ // "auto": cheapest authed model that can summarize a large context AND is cheaper than the session
245
+ // model. The context-window floor keeps a tiny local model from being picked for a big summary.
246
+ const FLOOR_CONTEXT = 64_000;
247
+ const sessionInputCost = sessionModel.cost?.input ?? Number.POSITIVE_INFINITY;
248
+ let best;
249
+ for (const m of this._modelRegistry.getAvailable()) {
250
+ if ((m.contextWindow ?? 0) < FLOOR_CONTEXT)
251
+ continue;
252
+ const cost = m.cost?.input ?? Number.POSITIVE_INFINITY;
253
+ if (cost >= sessionInputCost)
254
+ continue; // only ever pick something cheaper than the session model
255
+ if (!best || cost < (best.cost?.input ?? Number.POSITIVE_INFINITY))
256
+ best = m;
257
+ }
258
+ return best ?? sessionModel;
259
+ }
222
260
  /**
223
261
  * Install tool hooks once on the Agent instance.
224
262
  *
@@ -255,9 +293,50 @@ export class AgentSession {
255
293
  if (this._extensionRunner.hasHandlers("context")) {
256
294
  finalMessages = await this._extensionRunner.emitContext(currentMessages);
257
295
  }
258
- return this._applyContextGc(finalMessages, true).messages;
296
+ const gcMessages = this._applyContextGc(finalMessages, true).messages;
297
+ this._applyCostGuard(gcMessages);
298
+ return gcMessages;
259
299
  };
260
300
  }
301
+ /**
302
+ * Proactive per-turn cost guard (#34): estimate the USD cost of the about-to-be-submitted turn and,
303
+ * when it exceeds the user's ceiling, record a warning decision (for the host UI to surface) and —
304
+ * if configured to `downgrade` — step reasoning effort down ONCE per over-threshold episode to curb a
305
+ * runaway billing spike. Disabled by default (`maxTurnUsd<=0`), so it never alters behavior unless the
306
+ * user opts in. Best-effort: never throws into the turn.
307
+ */
308
+ _applyCostGuard(messages) {
309
+ try {
310
+ const guard = this.settingsManager.getCostGuardSettings();
311
+ if (guard.maxTurnUsd <= 0 || !this.model?.cost) {
312
+ this._lastCostGuardDecision = undefined;
313
+ return;
314
+ }
315
+ const inputTokens = this._estimateCurrentContextTokens(messages);
316
+ const maxOutputTokens = this.model.maxTokens ?? 4096;
317
+ const estUsd = estimateTurnCostUsd({ inputTokens, maxOutputTokens, cost: this.model.cost });
318
+ const decision = evaluateCostGuard(estUsd, { maxTurnUsd: guard.maxTurnUsd, action: guard.action });
319
+ this._lastCostGuardDecision = decision;
320
+ if (!decision.over) {
321
+ this._costGuardDowngraded = false; // back under the ceiling — re-arm the one-shot downgrade
322
+ return;
323
+ }
324
+ if (guard.action === "downgrade" && !this._costGuardDowngraded && this.supportsThinking()) {
325
+ const next = downgradeReasoning(this.thinkingLevel);
326
+ if (next !== this.thinkingLevel) {
327
+ this.setThinkingLevel(next);
328
+ this._costGuardDowngraded = true;
329
+ }
330
+ }
331
+ }
332
+ catch {
333
+ // cost guard must never disrupt a turn
334
+ }
335
+ }
336
+ /** Latest cost-guard decision (for the host footer/UI to surface a warning). Undefined if disabled. */
337
+ getLastCostGuardDecision() {
338
+ return this._lastCostGuardDecision;
339
+ }
261
340
  _installAgentTurnRefresh() {
262
341
  const previousPrepareNextTurn = this.agent.prepareNextTurn?.bind(this.agent);
263
342
  this.agent.prepareNextTurn = async (signal) => {
@@ -1676,7 +1755,8 @@ export class AgentSession {
1676
1755
  if (!this.model) {
1677
1756
  throw new Error(formatNoModelSelectedMessage());
1678
1757
  }
1679
- const { apiKey, headers } = await this._getCompactionRequestAuth(this.model);
1758
+ const compactionModel = this._resolveCompactionModel(this.model);
1759
+ const { apiKey, headers } = await this._getCompactionRequestAuth(compactionModel);
1680
1760
  const pathEntries = this.sessionManager.getBranch();
1681
1761
  const settings = this.settingsManager.getCompactionSettings();
1682
1762
  const preparation = prepareCompaction(pathEntries, settings);
@@ -1719,7 +1799,7 @@ export class AgentSession {
1719
1799
  }
1720
1800
  else {
1721
1801
  // Generate compaction result
1722
- const result = await compact(preparation, this.model, apiKey, headers, customInstructions, this._compactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
1802
+ const result = await compact(preparation, compactionModel, apiKey, headers, customInstructions, this._compactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
1723
1803
  summary = result.summary;
1724
1804
  firstKeptEntryId = result.firstKeptEntryId;
1725
1805
  tokensBefore = result.tokensBefore;
@@ -1897,10 +1977,12 @@ export class AgentSession {
1897
1977
  });
1898
1978
  return false;
1899
1979
  }
1980
+ // Summarize with the cheap auxiliary model when available (cost guard, #30).
1981
+ const compactionModel = this._resolveCompactionModel(this.model);
1900
1982
  let apiKey;
1901
1983
  let headers;
1902
1984
  if (this.agent.streamFn === streamSimple) {
1903
- const authResult = await this._modelRegistry.getApiKeyAndHeaders(this.model);
1985
+ const authResult = await this._modelRegistry.getApiKeyAndHeaders(compactionModel);
1904
1986
  if (!authResult.ok || !authResult.apiKey) {
1905
1987
  this._emit({
1906
1988
  type: "compaction_end",
@@ -1915,7 +1997,7 @@ export class AgentSession {
1915
1997
  headers = authResult.headers;
1916
1998
  }
1917
1999
  else {
1918
- ({ apiKey, headers } = await this._getCompactionRequestAuth(this.model));
2000
+ ({ apiKey, headers } = await this._getCompactionRequestAuth(compactionModel));
1919
2001
  }
1920
2002
  const pathEntries = this.sessionManager.getBranch();
1921
2003
  const preparation = prepareCompaction(pathEntries, settings);
@@ -1967,7 +2049,7 @@ export class AgentSession {
1967
2049
  }
1968
2050
  else {
1969
2051
  // Generate compaction result
1970
- const compactResult = await compact(preparation, this.model, apiKey, headers, undefined, this._autoCompactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
2052
+ const compactResult = await compact(preparation, compactionModel, apiKey, headers, undefined, this._autoCompactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
1971
2053
  summary = compactResult.summary;
1972
2054
  firstKeptEntryId = compactResult.firstKeptEntryId;
1973
2055
  tokensBefore = compactResult.tokensBefore;
@@ -3369,7 +3451,7 @@ export class AgentSession {
3369
3451
  const options = {
3370
3452
  maxTokens: opts.maxTokens,
3371
3453
  signal: opts.signal,
3372
- cacheRetention: "none",
3454
+ cacheRetention: opts.cacheRetention ?? "none",
3373
3455
  };
3374
3456
  // pi-ai's `reasoning` option does not include "off" (that's the provider default already).
3375
3457
  if (thinkingLevel !== "off") {
@@ -3429,6 +3511,9 @@ export class AgentSession {
3429
3511
  thinkingLevel: input.thinkingLevel ?? "low",
3430
3512
  maxTokens: plan.tokenBudget,
3431
3513
  signal,
3514
+ // The reflection system prompt is static (#33) — let the provider cache the prefix so
3515
+ // repeated passes only pay for the variable tail.
3516
+ cacheRetention: "short",
3432
3517
  });
3433
3518
  const result = await new ReflectionEngine().reflect({
3434
3519
  recentTurnText: input.recentTurnText,
@@ -3519,7 +3604,9 @@ export class AgentSession {
3519
3604
  return; // do not overwrite an existing skill
3520
3605
  mkdirSync(dir, { recursive: true });
3521
3606
  const safeDescription = description.replace(/[\r\n]+/g, " ").trim();
3522
- const content = `---\nname: ${name}\ndescription: ${safeDescription}\n---\n\n<!-- Auto-generated by the reflection engine (R7 memory-to-behavior). Review and refine. -->\n\n${body.trim()}\n`;
3607
+ // `promoted: true` marks this as reflection-generated so the curator (#32) can lifecycle-manage
3608
+ // it (archive/consolidate) WITHOUT ever touching hand-authored user skills.
3609
+ const content = `---\nname: ${name}\ndescription: ${safeDescription}\npromoted: true\n---\n\n<!-- Auto-generated by the reflection engine (R7 memory-to-behavior). Review and refine. -->\n\n${body.trim()}\n`;
3523
3610
  writeFileSync(file, content, "utf-8");
3524
3611
  }
3525
3612
  catch {