npm - @caupulican/pi-adaptative - Versions diffs - 0.80.74 → 0.80.76 - Mend

@caupulican/pi-adaptative 0.80.74 → 0.80.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +35 -0
package/dist/core/agent-session.d.ts +32 -1
package/dist/core/agent-session.d.ts.map +1 -1
package/dist/core/agent-session.js +96 -9
package/dist/core/agent-session.js.map +1 -1
package/dist/core/compaction/compaction.d.ts +22 -0
package/dist/core/compaction/compaction.d.ts.map +1 -1
package/dist/core/compaction/compaction.js +31 -3
package/dist/core/compaction/compaction.js.map +1 -1
package/dist/core/cost-guard.d.ts +55 -0
package/dist/core/cost-guard.d.ts.map +1 -0
package/dist/core/cost-guard.js +50 -0
package/dist/core/cost-guard.js.map +1 -0
package/dist/core/learning/reflection-engine.d.ts +7 -0
package/dist/core/learning/reflection-engine.d.ts.map +1 -1
package/dist/core/learning/reflection-engine.js +22 -13
package/dist/core/learning/reflection-engine.js.map +1 -1
package/dist/core/memory/providers/file-store.d.ts.map +1 -1
package/dist/core/memory/providers/file-store.js +33 -2
package/dist/core/memory/providers/file-store.js.map +1 -1
package/dist/core/resource-loader.d.ts +19 -1
package/dist/core/resource-loader.d.ts.map +1 -1
package/dist/core/resource-loader.js +69 -5
package/dist/core/resource-loader.js.map +1 -1
package/dist/core/settings-manager.d.ts +16 -0
package/dist/core/settings-manager.d.ts.map +1 -1
package/dist/core/settings-manager.js +15 -0
package/dist/core/settings-manager.js.map +1 -1
package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
package/examples/extensions/custom-provider-anthropic/package.json +1 -1
package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
package/examples/extensions/sandbox/package-lock.json +2 -2
package/examples/extensions/sandbox/package.json +1 -1
package/examples/extensions/with-deps/package-lock.json +2 -2
package/examples/extensions/with-deps/package.json +1 -1
package/npm-shrinkwrap.json +12 -12
package/package.json +4 -4

package/dist/core/agent-session.js CHANGED Viewed

@@ -24,6 +24,7 @@ import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth
 import { executeBashWithOperations } from "./bash-executor.js";
 import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
 import { applyContextGc } from "./context-gc.js";
+import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
 import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
 import { exportSessionToHtml } from "./export-html/index.js";
 import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
@@ -39,7 +40,7 @@ import { FileStoreProvider } from "./memory/providers/file-store.js";
 import { TranscriptRecallProvider } from "./memory/providers/transcript-recall.js";
 import { compactToolResultDetailsForRetention } from "./message-retention.js";
 import { createCustomMessage } from "./messages.js";
-import { resolveProfileModelSettings } from "./model-resolver.js";
+import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
 import { expandPromptTemplate } from "./prompt-templates.js";
 import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
 import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
@@ -130,6 +131,10 @@ export class AgentSession {
     _gatewayRegistry = new GatewayRegistry();
     /** Cache for getSpawnedUsage(), keyed by session entry count (Bug #22 — avoid O(N) per render frame). */
     _spawnedUsageCache;
+    /** Latest proactive cost-guard decision (#34), for the host UI to surface. Undefined when disabled. */
+    _lastCostGuardDecision;
+    /** One-shot latch so the cost guard downgrades reasoning once per over-threshold episode, not every call. */
+    _costGuardDowngraded = false;
     /** Set on dispose so in-flight background reflection bails instead of writing to a dead session (Bug #21). */
     _disposed = false;
     /** Aborts in-flight background reflection completions on dispose (Bug #21). */
@@ -219,6 +224,39 @@ export class AgentSession {
         const result = await this._modelRegistry.getApiKeyAndHeaders(model);
         return result.ok ? { apiKey: result.apiKey, headers: result.headers } : {};
     }
+    /**
+     * Resolve the model used to SUMMARIZE during compaction (cost guard, #30). A compaction summary is an
+     * extraction task — it does not need the main (expensive) model. Selection:
+     *   - an explicit `compaction.model` setting wins, but only if its provider is authed (else fall back);
+     *   - `"auto"` (default) picks the CHEAPEST authed model whose context window can hold a compaction
+     *     (capability floor), and ONLY if it is strictly cheaper than the session model — so we never
+     *     downgrade to an equally-priced but weaker summarizer (agy's floor: don't degrade the checkpoint);
+     *   - otherwise the session model is used (safe default).
+     */
+    _resolveCompactionModel(sessionModel) {
+        const setting = this.settingsManager.getCompactionModel();
+        if (setting && setting !== "auto") {
+            const resolved = resolveCliModel({ cliModel: setting, modelRegistry: this._modelRegistry });
+            if (resolved.model && this._modelRegistry.hasConfiguredAuth(resolved.model))
+                return resolved.model;
+            return sessionModel; // configured but unusable → don't break compaction
+        }
+        // "auto": cheapest authed model that can summarize a large context AND is cheaper than the session
+        // model. The context-window floor keeps a tiny local model from being picked for a big summary.
+        const FLOOR_CONTEXT = 64_000;
+        const sessionInputCost = sessionModel.cost?.input ?? Number.POSITIVE_INFINITY;
+        let best;
+        for (const m of this._modelRegistry.getAvailable()) {
+            if ((m.contextWindow ?? 0) < FLOOR_CONTEXT)
+                continue;
+            const cost = m.cost?.input ?? Number.POSITIVE_INFINITY;
+            if (cost >= sessionInputCost)
+                continue; // only ever pick something cheaper than the session model
+            if (!best || cost < (best.cost?.input ?? Number.POSITIVE_INFINITY))
+                best = m;
+        }
+        return best ?? sessionModel;
+    }
     /**
      * Install tool hooks once on the Agent instance.
      *
@@ -255,9 +293,50 @@ export class AgentSession {
             if (this._extensionRunner.hasHandlers("context")) {
                 finalMessages = await this._extensionRunner.emitContext(currentMessages);
             }
-            return this._applyContextGc(finalMessages, true).messages;
+            const gcMessages = this._applyContextGc(finalMessages, true).messages;
+            this._applyCostGuard(gcMessages);
+            return gcMessages;
         };
     }
+    /**
+     * Proactive per-turn cost guard (#34): estimate the USD cost of the about-to-be-submitted turn and,
+     * when it exceeds the user's ceiling, record a warning decision (for the host UI to surface) and —
+     * if configured to `downgrade` — step reasoning effort down ONCE per over-threshold episode to curb a
+     * runaway billing spike. Disabled by default (`maxTurnUsd<=0`), so it never alters behavior unless the
+     * user opts in. Best-effort: never throws into the turn.
+     */
+    _applyCostGuard(messages) {
+        try {
+            const guard = this.settingsManager.getCostGuardSettings();
+            if (guard.maxTurnUsd <= 0 || !this.model?.cost) {
+                this._lastCostGuardDecision = undefined;
+                return;
+            }
+            const inputTokens = this._estimateCurrentContextTokens(messages);
+            const maxOutputTokens = this.model.maxTokens ?? 4096;
+            const estUsd = estimateTurnCostUsd({ inputTokens, maxOutputTokens, cost: this.model.cost });
+            const decision = evaluateCostGuard(estUsd, { maxTurnUsd: guard.maxTurnUsd, action: guard.action });
+            this._lastCostGuardDecision = decision;
+            if (!decision.over) {
+                this._costGuardDowngraded = false; // back under the ceiling — re-arm the one-shot downgrade
+                return;
+            }
+            if (guard.action === "downgrade" && !this._costGuardDowngraded && this.supportsThinking()) {
+                const next = downgradeReasoning(this.thinkingLevel);
+                if (next !== this.thinkingLevel) {
+                    this.setThinkingLevel(next);
+                    this._costGuardDowngraded = true;
+                }
+            }
+        }
+        catch {
+            // cost guard must never disrupt a turn
+        }
+    }
+    /** Latest cost-guard decision (for the host footer/UI to surface a warning). Undefined if disabled. */
+    getLastCostGuardDecision() {
+        return this._lastCostGuardDecision;
+    }
     _installAgentTurnRefresh() {
         const previousPrepareNextTurn = this.agent.prepareNextTurn?.bind(this.agent);
         this.agent.prepareNextTurn = async (signal) => {
@@ -1676,7 +1755,8 @@ export class AgentSession {
             if (!this.model) {
                 throw new Error(formatNoModelSelectedMessage());
             }
-            const { apiKey, headers } = await this._getCompactionRequestAuth(this.model);
+            const compactionModel = this._resolveCompactionModel(this.model);
+            const { apiKey, headers } = await this._getCompactionRequestAuth(compactionModel);
             const pathEntries = this.sessionManager.getBranch();
             const settings = this.settingsManager.getCompactionSettings();
             const preparation = prepareCompaction(pathEntries, settings);
@@ -1719,7 +1799,7 @@ export class AgentSession {
             }
             else {
                 // Generate compaction result
-                const result = await compact(preparation, this.model, apiKey, headers, customInstructions, this._compactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
+                const result = await compact(preparation, compactionModel, apiKey, headers, customInstructions, this._compactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
                 summary = result.summary;
                 firstKeptEntryId = result.firstKeptEntryId;
                 tokensBefore = result.tokensBefore;
@@ -1897,10 +1977,12 @@ export class AgentSession {
                 });
                 return false;
             }
+            // Summarize with the cheap auxiliary model when available (cost guard, #30).
+            const compactionModel = this._resolveCompactionModel(this.model);
             let apiKey;
             let headers;
             if (this.agent.streamFn === streamSimple) {
-                const authResult = await this._modelRegistry.getApiKeyAndHeaders(this.model);
+                const authResult = await this._modelRegistry.getApiKeyAndHeaders(compactionModel);
                 if (!authResult.ok || !authResult.apiKey) {
                     this._emit({
                         type: "compaction_end",
@@ -1915,7 +1997,7 @@ export class AgentSession {
                 headers = authResult.headers;
             }
             else {
-                ({ apiKey, headers } = await this._getCompactionRequestAuth(this.model));
+                ({ apiKey, headers } = await this._getCompactionRequestAuth(compactionModel));
             }
             const pathEntries = this.sessionManager.getBranch();
             const preparation = prepareCompaction(pathEntries, settings);
@@ -1967,7 +2049,7 @@ export class AgentSession {
             }
             else {
                 // Generate compaction result
-                const compactResult = await compact(preparation, this.model, apiKey, headers, undefined, this._autoCompactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
+                const compactResult = await compact(preparation, compactionModel, apiKey, headers, undefined, this._autoCompactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
                 summary = compactResult.summary;
                 firstKeptEntryId = compactResult.firstKeptEntryId;
                 tokensBefore = compactResult.tokensBefore;
@@ -3369,7 +3451,7 @@ export class AgentSession {
         const options = {
             maxTokens: opts.maxTokens,
             signal: opts.signal,
-            cacheRetention: "none",
+            cacheRetention: opts.cacheRetention ?? "none",
         };
         // pi-ai's `reasoning` option does not include "off" (that's the provider default already).
         if (thinkingLevel !== "off") {
@@ -3429,6 +3511,9 @@ export class AgentSession {
             thinkingLevel: input.thinkingLevel ?? "low",
             maxTokens: plan.tokenBudget,
             signal,
+            // The reflection system prompt is static (#33) — let the provider cache the prefix so
+            // repeated passes only pay for the variable tail.
+            cacheRetention: "short",
         });
         const result = await new ReflectionEngine().reflect({
             recentTurnText: input.recentTurnText,
@@ -3519,7 +3604,9 @@ export class AgentSession {
                 return; // do not overwrite an existing skill
             mkdirSync(dir, { recursive: true });
             const safeDescription = description.replace(/[\r\n]+/g, " ").trim();
-            const content = `---\nname: ${name}\ndescription: ${safeDescription}\n---\n\n<!-- Auto-generated by the reflection engine (R7 memory-to-behavior). Review and refine. -->\n\n${body.trim()}\n`;
+            // `promoted: true` marks this as reflection-generated so the curator (#32) can lifecycle-manage
+            // it (archive/consolidate) WITHOUT ever touching hand-authored user skills.
+            const content = `---\nname: ${name}\ndescription: ${safeDescription}\npromoted: true\n---\n\n<!-- Auto-generated by the reflection engine (R7 memory-to-behavior). Review and refine. -->\n\n${body.trim()}\n`;
             writeFileSync(file, content, "utf-8");
         }
         catch {