@caupulican/pi-adaptative 0.80.74 → 0.80.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/core/agent-session.d.ts +32 -1
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +96 -9
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/compaction/compaction.d.ts +22 -0
- package/dist/core/compaction/compaction.d.ts.map +1 -1
- package/dist/core/compaction/compaction.js +31 -3
- package/dist/core/compaction/compaction.js.map +1 -1
- package/dist/core/cost-guard.d.ts +55 -0
- package/dist/core/cost-guard.d.ts.map +1 -0
- package/dist/core/cost-guard.js +50 -0
- package/dist/core/cost-guard.js.map +1 -0
- package/dist/core/learning/reflection-engine.d.ts +7 -0
- package/dist/core/learning/reflection-engine.d.ts.map +1 -1
- package/dist/core/learning/reflection-engine.js +22 -13
- package/dist/core/learning/reflection-engine.js.map +1 -1
- package/dist/core/memory/providers/file-store.d.ts.map +1 -1
- package/dist/core/memory/providers/file-store.js +33 -2
- package/dist/core/memory/providers/file-store.js.map +1 -1
- package/dist/core/resource-loader.d.ts +19 -1
- package/dist/core/resource-loader.d.ts.map +1 -1
- package/dist/core/resource-loader.js +69 -5
- package/dist/core/resource-loader.js.map +1 -1
- package/dist/core/settings-manager.d.ts +16 -0
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +15 -0
- package/dist/core/settings-manager.js.map +1 -1
- package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
- package/examples/extensions/custom-provider-anthropic/package.json +1 -1
- package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
- package/examples/extensions/sandbox/package-lock.json +2 -2
- package/examples/extensions/sandbox/package.json +1 -1
- package/examples/extensions/with-deps/package-lock.json +2 -2
- package/examples/extensions/with-deps/package.json +1 -1
- package/npm-shrinkwrap.json +12 -12
- package/package.json +4 -4
|
@@ -24,6 +24,7 @@ import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth
|
|
|
24
24
|
import { executeBashWithOperations } from "./bash-executor.js";
|
|
25
25
|
import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
|
|
26
26
|
import { applyContextGc } from "./context-gc.js";
|
|
27
|
+
import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
|
|
27
28
|
import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
|
|
28
29
|
import { exportSessionToHtml } from "./export-html/index.js";
|
|
29
30
|
import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
|
|
@@ -39,7 +40,7 @@ import { FileStoreProvider } from "./memory/providers/file-store.js";
|
|
|
39
40
|
import { TranscriptRecallProvider } from "./memory/providers/transcript-recall.js";
|
|
40
41
|
import { compactToolResultDetailsForRetention } from "./message-retention.js";
|
|
41
42
|
import { createCustomMessage } from "./messages.js";
|
|
42
|
-
import { resolveProfileModelSettings } from "./model-resolver.js";
|
|
43
|
+
import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
|
|
43
44
|
import { expandPromptTemplate } from "./prompt-templates.js";
|
|
44
45
|
import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
|
|
45
46
|
import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
|
|
@@ -130,6 +131,10 @@ export class AgentSession {
|
|
|
130
131
|
_gatewayRegistry = new GatewayRegistry();
|
|
131
132
|
/** Cache for getSpawnedUsage(), keyed by session entry count (Bug #22 — avoid O(N) per render frame). */
|
|
132
133
|
_spawnedUsageCache;
|
|
134
|
+
/** Latest proactive cost-guard decision (#34), for the host UI to surface. Undefined when disabled. */
|
|
135
|
+
_lastCostGuardDecision;
|
|
136
|
+
/** One-shot latch so the cost guard downgrades reasoning once per over-threshold episode, not every call. */
|
|
137
|
+
_costGuardDowngraded = false;
|
|
133
138
|
/** Set on dispose so in-flight background reflection bails instead of writing to a dead session (Bug #21). */
|
|
134
139
|
_disposed = false;
|
|
135
140
|
/** Aborts in-flight background reflection completions on dispose (Bug #21). */
|
|
@@ -219,6 +224,39 @@ export class AgentSession {
|
|
|
219
224
|
const result = await this._modelRegistry.getApiKeyAndHeaders(model);
|
|
220
225
|
return result.ok ? { apiKey: result.apiKey, headers: result.headers } : {};
|
|
221
226
|
}
|
|
227
|
+
/**
|
|
228
|
+
* Resolve the model used to SUMMARIZE during compaction (cost guard, #30). A compaction summary is an
|
|
229
|
+
* extraction task — it does not need the main (expensive) model. Selection:
|
|
230
|
+
* - an explicit `compaction.model` setting wins, but only if its provider is authed (else fall back);
|
|
231
|
+
* - `"auto"` (default) picks the CHEAPEST authed model whose context window can hold a compaction
|
|
232
|
+
* (capability floor), and ONLY if it is strictly cheaper than the session model — so we never
|
|
233
|
+
* downgrade to an equally-priced but weaker summarizer (agy's floor: don't degrade the checkpoint);
|
|
234
|
+
* - otherwise the session model is used (safe default).
|
|
235
|
+
*/
|
|
236
|
+
_resolveCompactionModel(sessionModel) {
|
|
237
|
+
const setting = this.settingsManager.getCompactionModel();
|
|
238
|
+
if (setting && setting !== "auto") {
|
|
239
|
+
const resolved = resolveCliModel({ cliModel: setting, modelRegistry: this._modelRegistry });
|
|
240
|
+
if (resolved.model && this._modelRegistry.hasConfiguredAuth(resolved.model))
|
|
241
|
+
return resolved.model;
|
|
242
|
+
return sessionModel; // configured but unusable → don't break compaction
|
|
243
|
+
}
|
|
244
|
+
// "auto": cheapest authed model that can summarize a large context AND is cheaper than the session
|
|
245
|
+
// model. The context-window floor keeps a tiny local model from being picked for a big summary.
|
|
246
|
+
const FLOOR_CONTEXT = 64_000;
|
|
247
|
+
const sessionInputCost = sessionModel.cost?.input ?? Number.POSITIVE_INFINITY;
|
|
248
|
+
let best;
|
|
249
|
+
for (const m of this._modelRegistry.getAvailable()) {
|
|
250
|
+
if ((m.contextWindow ?? 0) < FLOOR_CONTEXT)
|
|
251
|
+
continue;
|
|
252
|
+
const cost = m.cost?.input ?? Number.POSITIVE_INFINITY;
|
|
253
|
+
if (cost >= sessionInputCost)
|
|
254
|
+
continue; // only ever pick something cheaper than the session model
|
|
255
|
+
if (!best || cost < (best.cost?.input ?? Number.POSITIVE_INFINITY))
|
|
256
|
+
best = m;
|
|
257
|
+
}
|
|
258
|
+
return best ?? sessionModel;
|
|
259
|
+
}
|
|
222
260
|
/**
|
|
223
261
|
* Install tool hooks once on the Agent instance.
|
|
224
262
|
*
|
|
@@ -255,9 +293,50 @@ export class AgentSession {
|
|
|
255
293
|
if (this._extensionRunner.hasHandlers("context")) {
|
|
256
294
|
finalMessages = await this._extensionRunner.emitContext(currentMessages);
|
|
257
295
|
}
|
|
258
|
-
|
|
296
|
+
const gcMessages = this._applyContextGc(finalMessages, true).messages;
|
|
297
|
+
this._applyCostGuard(gcMessages);
|
|
298
|
+
return gcMessages;
|
|
259
299
|
};
|
|
260
300
|
}
|
|
301
|
+
/**
|
|
302
|
+
* Proactive per-turn cost guard (#34): estimate the USD cost of the about-to-be-submitted turn and,
|
|
303
|
+
* when it exceeds the user's ceiling, record a warning decision (for the host UI to surface) and —
|
|
304
|
+
* if configured to `downgrade` — step reasoning effort down ONCE per over-threshold episode to curb a
|
|
305
|
+
* runaway billing spike. Disabled by default (`maxTurnUsd<=0`), so it never alters behavior unless the
|
|
306
|
+
* user opts in. Best-effort: never throws into the turn.
|
|
307
|
+
*/
|
|
308
|
+
_applyCostGuard(messages) {
|
|
309
|
+
try {
|
|
310
|
+
const guard = this.settingsManager.getCostGuardSettings();
|
|
311
|
+
if (guard.maxTurnUsd <= 0 || !this.model?.cost) {
|
|
312
|
+
this._lastCostGuardDecision = undefined;
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
const inputTokens = this._estimateCurrentContextTokens(messages);
|
|
316
|
+
const maxOutputTokens = this.model.maxTokens ?? 4096;
|
|
317
|
+
const estUsd = estimateTurnCostUsd({ inputTokens, maxOutputTokens, cost: this.model.cost });
|
|
318
|
+
const decision = evaluateCostGuard(estUsd, { maxTurnUsd: guard.maxTurnUsd, action: guard.action });
|
|
319
|
+
this._lastCostGuardDecision = decision;
|
|
320
|
+
if (!decision.over) {
|
|
321
|
+
this._costGuardDowngraded = false; // back under the ceiling — re-arm the one-shot downgrade
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
if (guard.action === "downgrade" && !this._costGuardDowngraded && this.supportsThinking()) {
|
|
325
|
+
const next = downgradeReasoning(this.thinkingLevel);
|
|
326
|
+
if (next !== this.thinkingLevel) {
|
|
327
|
+
this.setThinkingLevel(next);
|
|
328
|
+
this._costGuardDowngraded = true;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
catch {
|
|
333
|
+
// cost guard must never disrupt a turn
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
/** Latest cost-guard decision (for the host footer/UI to surface a warning). Undefined if disabled. */
|
|
337
|
+
getLastCostGuardDecision() {
|
|
338
|
+
return this._lastCostGuardDecision;
|
|
339
|
+
}
|
|
261
340
|
_installAgentTurnRefresh() {
|
|
262
341
|
const previousPrepareNextTurn = this.agent.prepareNextTurn?.bind(this.agent);
|
|
263
342
|
this.agent.prepareNextTurn = async (signal) => {
|
|
@@ -1676,7 +1755,8 @@ export class AgentSession {
|
|
|
1676
1755
|
if (!this.model) {
|
|
1677
1756
|
throw new Error(formatNoModelSelectedMessage());
|
|
1678
1757
|
}
|
|
1679
|
-
const
|
|
1758
|
+
const compactionModel = this._resolveCompactionModel(this.model);
|
|
1759
|
+
const { apiKey, headers } = await this._getCompactionRequestAuth(compactionModel);
|
|
1680
1760
|
const pathEntries = this.sessionManager.getBranch();
|
|
1681
1761
|
const settings = this.settingsManager.getCompactionSettings();
|
|
1682
1762
|
const preparation = prepareCompaction(pathEntries, settings);
|
|
@@ -1719,7 +1799,7 @@ export class AgentSession {
|
|
|
1719
1799
|
}
|
|
1720
1800
|
else {
|
|
1721
1801
|
// Generate compaction result
|
|
1722
|
-
const result = await compact(preparation,
|
|
1802
|
+
const result = await compact(preparation, compactionModel, apiKey, headers, customInstructions, this._compactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
|
|
1723
1803
|
summary = result.summary;
|
|
1724
1804
|
firstKeptEntryId = result.firstKeptEntryId;
|
|
1725
1805
|
tokensBefore = result.tokensBefore;
|
|
@@ -1897,10 +1977,12 @@ export class AgentSession {
|
|
|
1897
1977
|
});
|
|
1898
1978
|
return false;
|
|
1899
1979
|
}
|
|
1980
|
+
// Summarize with the cheap auxiliary model when available (cost guard, #30).
|
|
1981
|
+
const compactionModel = this._resolveCompactionModel(this.model);
|
|
1900
1982
|
let apiKey;
|
|
1901
1983
|
let headers;
|
|
1902
1984
|
if (this.agent.streamFn === streamSimple) {
|
|
1903
|
-
const authResult = await this._modelRegistry.getApiKeyAndHeaders(
|
|
1985
|
+
const authResult = await this._modelRegistry.getApiKeyAndHeaders(compactionModel);
|
|
1904
1986
|
if (!authResult.ok || !authResult.apiKey) {
|
|
1905
1987
|
this._emit({
|
|
1906
1988
|
type: "compaction_end",
|
|
@@ -1915,7 +1997,7 @@ export class AgentSession {
|
|
|
1915
1997
|
headers = authResult.headers;
|
|
1916
1998
|
}
|
|
1917
1999
|
else {
|
|
1918
|
-
({ apiKey, headers } = await this._getCompactionRequestAuth(
|
|
2000
|
+
({ apiKey, headers } = await this._getCompactionRequestAuth(compactionModel));
|
|
1919
2001
|
}
|
|
1920
2002
|
const pathEntries = this.sessionManager.getBranch();
|
|
1921
2003
|
const preparation = prepareCompaction(pathEntries, settings);
|
|
@@ -1967,7 +2049,7 @@ export class AgentSession {
|
|
|
1967
2049
|
}
|
|
1968
2050
|
else {
|
|
1969
2051
|
// Generate compaction result
|
|
1970
|
-
const compactResult = await compact(preparation,
|
|
2052
|
+
const compactResult = await compact(preparation, compactionModel, apiKey, headers, undefined, this._autoCompactionAbortController.signal, this.thinkingLevel, this.agent.streamFn);
|
|
1971
2053
|
summary = compactResult.summary;
|
|
1972
2054
|
firstKeptEntryId = compactResult.firstKeptEntryId;
|
|
1973
2055
|
tokensBefore = compactResult.tokensBefore;
|
|
@@ -3369,7 +3451,7 @@ export class AgentSession {
|
|
|
3369
3451
|
const options = {
|
|
3370
3452
|
maxTokens: opts.maxTokens,
|
|
3371
3453
|
signal: opts.signal,
|
|
3372
|
-
cacheRetention: "none",
|
|
3454
|
+
cacheRetention: opts.cacheRetention ?? "none",
|
|
3373
3455
|
};
|
|
3374
3456
|
// pi-ai's `reasoning` option does not include "off" (that's the provider default already).
|
|
3375
3457
|
if (thinkingLevel !== "off") {
|
|
@@ -3429,6 +3511,9 @@ export class AgentSession {
|
|
|
3429
3511
|
thinkingLevel: input.thinkingLevel ?? "low",
|
|
3430
3512
|
maxTokens: plan.tokenBudget,
|
|
3431
3513
|
signal,
|
|
3514
|
+
// The reflection system prompt is static (#33) — let the provider cache the prefix so
|
|
3515
|
+
// repeated passes only pay for the variable tail.
|
|
3516
|
+
cacheRetention: "short",
|
|
3432
3517
|
});
|
|
3433
3518
|
const result = await new ReflectionEngine().reflect({
|
|
3434
3519
|
recentTurnText: input.recentTurnText,
|
|
@@ -3519,7 +3604,9 @@ export class AgentSession {
|
|
|
3519
3604
|
return; // do not overwrite an existing skill
|
|
3520
3605
|
mkdirSync(dir, { recursive: true });
|
|
3521
3606
|
const safeDescription = description.replace(/[\r\n]+/g, " ").trim();
|
|
3522
|
-
|
|
3607
|
+
// `promoted: true` marks this as reflection-generated so the curator (#32) can lifecycle-manage
|
|
3608
|
+
// it (archive/consolidate) WITHOUT ever touching hand-authored user skills.
|
|
3609
|
+
const content = `---\nname: ${name}\ndescription: ${safeDescription}\npromoted: true\n---\n\n<!-- Auto-generated by the reflection engine (R7 memory-to-behavior). Review and refine. -->\n\n${body.trim()}\n`;
|
|
3523
3610
|
writeFileSync(file, content, "utf-8");
|
|
3524
3611
|
}
|
|
3525
3612
|
catch {
|