@gajae-code/coding-agent 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/dist/types/async/job-manager.d.ts +6 -0
- package/dist/types/config/model-profiles.d.ts +10 -0
- package/dist/types/dap/client.d.ts +2 -1
- package/dist/types/edit/read-file.d.ts +6 -0
- package/dist/types/eval/js/context-manager.d.ts +3 -0
- package/dist/types/eval/js/executor.d.ts +1 -0
- package/dist/types/exec/bash-executor.d.ts +2 -0
- package/dist/types/gjc-runtime/tmux-sessions.d.ts +7 -1
- package/dist/types/lsp/types.d.ts +2 -0
- package/dist/types/modes/bridge/bridge-mode.d.ts +1 -0
- package/dist/types/modes/components/model-selector.d.ts +2 -0
- package/dist/types/modes/components/oauth-selector.d.ts +1 -0
- package/dist/types/modes/components/runtime-mcp-add-wizard.d.ts +1 -0
- package/dist/types/modes/components/tool-execution.d.ts +1 -0
- package/dist/types/modes/interactive-mode.d.ts +1 -0
- package/dist/types/modes/types.d.ts +1 -0
- package/dist/types/runtime/process-lifecycle.d.ts +108 -0
- package/dist/types/runtime-mcp/transports/stdio.d.ts +1 -0
- package/dist/types/runtime-mcp/types.d.ts +2 -0
- package/dist/types/session/agent-session.d.ts +29 -1
- package/dist/types/session/artifacts.d.ts +4 -1
- package/dist/types/session/streaming-output.d.ts +12 -0
- package/dist/types/slash-commands/helpers/fast-status-report.d.ts +76 -0
- package/dist/types/tools/bash.d.ts +1 -0
- package/dist/types/tools/browser/tab-supervisor.d.ts +9 -0
- package/dist/types/tools/sqlite-reader.d.ts +2 -1
- package/dist/types/web/search/providers/codex.d.ts +4 -4
- package/package.json +7 -7
- package/src/async/job-manager.ts +181 -43
- package/src/config/file-lock.ts +9 -1
- package/src/config/model-profile-activation.ts +71 -3
- package/src/config/model-profiles.ts +39 -14
- package/src/dap/client.ts +105 -64
- package/src/dap/session.ts +44 -7
- package/src/defaults/gjc/skills/deep-interview/SKILL.md +11 -2
- package/src/defaults/gjc/skills/ralplan/SKILL.md +2 -2
- package/src/defaults/gjc/skills/ultragoal/SKILL.md +2 -2
- package/src/edit/read-file.ts +19 -1
- package/src/eval/js/context-manager.ts +228 -65
- package/src/eval/js/executor.ts +2 -0
- package/src/eval/js/index.ts +1 -0
- package/src/eval/js/worker-core.ts +10 -6
- package/src/eval/py/executor.ts +68 -19
- package/src/eval/py/kernel.ts +46 -22
- package/src/eval/py/runner.py +68 -14
- package/src/exec/bash-executor.ts +49 -13
- package/src/gjc-runtime/deep-interview-runtime.ts +14 -13
- package/src/gjc-runtime/ralplan-runtime.ts +10 -0
- package/src/gjc-runtime/state-runtime.ts +73 -0
- package/src/gjc-runtime/tmux-gc.ts +86 -37
- package/src/gjc-runtime/tmux-sessions.ts +44 -6
- package/src/gjc-runtime/ultragoal-runtime.ts +8 -4
- package/src/internal-urls/artifact-protocol.ts +10 -1
- package/src/internal-urls/docs-index.generated.ts +2 -2
- package/src/lsp/client.ts +64 -26
- package/src/lsp/index.ts +2 -1
- package/src/lsp/lspmux.ts +33 -9
- package/src/lsp/types.ts +2 -0
- package/src/modes/bridge/bridge-mode.ts +21 -0
- package/src/modes/components/assistant-message.ts +10 -2
- package/src/modes/components/bash-execution.ts +5 -1
- package/src/modes/components/eval-execution.ts +5 -1
- package/src/modes/components/model-selector.ts +34 -2
- package/src/modes/components/oauth-selector.ts +5 -0
- package/src/modes/components/runtime-mcp-add-wizard.ts +58 -7
- package/src/modes/components/skill-message.ts +24 -16
- package/src/modes/components/tool-execution.ts +6 -0
- package/src/modes/controllers/extension-ui-controller.ts +33 -6
- package/src/modes/controllers/input-controller.ts +19 -0
- package/src/modes/controllers/selector-controller.ts +6 -1
- package/src/modes/interactive-mode.ts +13 -0
- package/src/modes/types.ts +1 -0
- package/src/modes/utils/ui-helpers.ts +5 -2
- package/src/prompts/agents/executor.md +1 -1
- package/src/runtime/process-lifecycle.ts +400 -0
- package/src/runtime-mcp/manager.ts +164 -50
- package/src/runtime-mcp/transports/http.ts +12 -11
- package/src/runtime-mcp/transports/stdio.ts +64 -38
- package/src/runtime-mcp/types.ts +3 -0
- package/src/sdk.ts +27 -0
- package/src/session/agent-session.ts +271 -25
- package/src/session/artifacts.ts +17 -2
- package/src/session/blob-store.ts +36 -2
- package/src/session/session-manager.ts +29 -13
- package/src/session/streaming-output.ts +95 -3
- package/src/setup/model-onboarding-guidance.ts +10 -3
- package/src/skill-state/active-state.ts +79 -7
- package/src/slash-commands/builtin-registry.ts +30 -3
- package/src/slash-commands/helpers/fast-status-report.ts +111 -0
- package/src/tools/archive-reader.ts +10 -1
- package/src/tools/bash.ts +11 -4
- package/src/tools/browser/registry.ts +17 -1
- package/src/tools/browser/tab-supervisor.ts +22 -0
- package/src/tools/browser.ts +38 -4
- package/src/tools/cron.ts +2 -6
- package/src/tools/read.ts +11 -12
- package/src/tools/sqlite-reader.ts +19 -5
- package/src/web/search/providers/codex.ts +6 -5
|
@@ -41,6 +41,8 @@ import {
|
|
|
41
41
|
calculatePromptTokens,
|
|
42
42
|
collectEntriesForBranchSummary,
|
|
43
43
|
compact,
|
|
44
|
+
type EmergencyCompactionSample,
|
|
45
|
+
emergencyCompactionReason,
|
|
44
46
|
estimateMessageTokensHeuristic,
|
|
45
47
|
estimateTokens,
|
|
46
48
|
generateBranchSummary,
|
|
@@ -142,6 +144,7 @@ import { onAppendOnlyModeChanged } from "../config/settings";
|
|
|
142
144
|
import { RawSseDebugBuffer } from "../debug/raw-sse-buffer";
|
|
143
145
|
import { loadCapability } from "../discovery";
|
|
144
146
|
import { expandApplyPatchToEntries, normalizeDiff, normalizeToLF, ParseError, previewPatch, stripBom } from "../edit";
|
|
147
|
+
import { disposeVmContextsByOwner } from "../eval/js/context-manager";
|
|
145
148
|
import {
|
|
146
149
|
disposeKernelSessionsByOwner,
|
|
147
150
|
executePython as executePythonCommand,
|
|
@@ -234,6 +237,7 @@ import {
|
|
|
234
237
|
import type { ToolSession } from "../tools";
|
|
235
238
|
import { AskTool } from "../tools/ask";
|
|
236
239
|
import { assertEditableFile } from "../tools/auto-generated-guard";
|
|
240
|
+
import { releaseTabsForOwner } from "../tools/browser/tab-supervisor";
|
|
237
241
|
import type { CheckpointState } from "../tools/checkpoint";
|
|
238
242
|
import { outputMeta, wrapToolWithMetaNotice } from "../tools/output-meta";
|
|
239
243
|
import { normalizeLocalScheme, resolveToCwd } from "../tools/path-utils";
|
|
@@ -545,6 +549,13 @@ function formatRetryFallbackBaseSelector(selector: RetryFallbackSelector): strin
|
|
|
545
549
|
|
|
546
550
|
const IRC_REPLY_MAX_BYTES = 4096;
|
|
547
551
|
|
|
552
|
+
/**
|
|
553
|
+
* Hard cap for {@link AgentSession.disposeChildSubprocesses}. A `SIGINT`/`SIGTERM` handler
|
|
554
|
+
* awaits this teardown before exiting, so it must never block longer than this even if a
|
|
555
|
+
* subprocess (wedged Chrome renderer, stuck Python cell) refuses to settle.
|
|
556
|
+
*/
|
|
557
|
+
const SIGNAL_TEARDOWN_TIMEOUT_MS = 5_000;
|
|
558
|
+
|
|
548
559
|
/**
|
|
549
560
|
* Collapse degenerate IRC ephemeral replies before they hit the relay.
|
|
550
561
|
* Models occasionally loop on a single line (~16 reports of N-times-repeated
|
|
@@ -907,6 +918,7 @@ export class AgentSession {
|
|
|
907
918
|
// Compaction state
|
|
908
919
|
#compactionAbortController: AbortController | undefined = undefined;
|
|
909
920
|
#autoCompactionAbortController: AbortController | undefined = undefined;
|
|
921
|
+
#resourceSampler: () => EmergencyCompactionSample = () => this.#defaultResourceSample();
|
|
910
922
|
#prePromptContextCheckPromise: Promise<void> | undefined = undefined;
|
|
911
923
|
|
|
912
924
|
// Branch summarization state
|
|
@@ -3187,6 +3199,13 @@ export class AgentSession {
|
|
|
3187
3199
|
}
|
|
3188
3200
|
}
|
|
3189
3201
|
await shutdownAllLspClients();
|
|
3202
|
+
// F13: release only THIS session's browser tabs on dispose (kill:false → remote
|
|
3203
|
+
// browsers disconnect, headless close gracefully). Scoped by the session id the
|
|
3204
|
+
// browser tool tagged tabs with, so other live sessions' tabs are untouched.
|
|
3205
|
+
// No-op when this session opened no tabs. Failure is logged, not thrown.
|
|
3206
|
+
await releaseTabsForOwner(this.sessionManager.getSessionId()).catch((error: unknown) =>
|
|
3207
|
+
logger.warn("session dispose: releaseTabsForOwner failed", { error }),
|
|
3208
|
+
);
|
|
3190
3209
|
const pythonExecutionsSettled = await this.#prepareEvalExecutionsForDispose();
|
|
3191
3210
|
if (!pythonExecutionsSettled) {
|
|
3192
3211
|
logger.warn(
|
|
@@ -3194,6 +3213,7 @@ export class AgentSession {
|
|
|
3194
3213
|
);
|
|
3195
3214
|
}
|
|
3196
3215
|
await disposeKernelSessionsByOwner(this.#evalKernelOwnerId);
|
|
3216
|
+
await disposeVmContextsByOwner(this.#evalKernelOwnerId);
|
|
3197
3217
|
this.#releasePowerAssertion();
|
|
3198
3218
|
await this.sessionManager.close();
|
|
3199
3219
|
this.#closeAllProviderSessions("dispose");
|
|
@@ -3208,6 +3228,36 @@ export class AgentSession {
|
|
|
3208
3228
|
this.#eventListeners = [];
|
|
3209
3229
|
}
|
|
3210
3230
|
|
|
3231
|
+
/**
|
|
3232
|
+
* Bounded, best-effort teardown of the subprocess-spawning resources this session
|
|
3233
|
+
* owns: the browser tool's headless/spawned Chrome and the Python eval kernel + JS VM
|
|
3234
|
+
* contexts. Unlike {@link dispose}, this touches only child processes and is time-boxed,
|
|
3235
|
+
* so a top-level `SIGINT`/`SIGTERM`/`SIGHUP` handler can run it without hanging — without
|
|
3236
|
+
* it, an external kill bypasses `dispose()` and orphans Chrome/Python to PID 1 (#698).
|
|
3237
|
+
*
|
|
3238
|
+
* Idempotent: every step is a no-op once the graceful {@link dispose} path has released
|
|
3239
|
+
* the resources. Never throws; per-step failures are logged and the whole run is capped
|
|
3240
|
+
* at `timeoutMs` so a wedged subprocess can't stall process exit.
|
|
3241
|
+
*/
|
|
3242
|
+
async disposeChildSubprocesses(timeoutMs = SIGNAL_TEARDOWN_TIMEOUT_MS): Promise<void> {
|
|
3243
|
+
const sessionId = this.sessionManager.getSessionId();
|
|
3244
|
+
const kernelOwnerId = this.#evalKernelOwnerId;
|
|
3245
|
+
const work = Promise.allSettled([
|
|
3246
|
+
// kill:true so a forced exit also reaps spawned-app Chrome we own (headless
|
|
3247
|
+
// always closes; connected/attached browsers only disconnect — never killed).
|
|
3248
|
+
releaseTabsForOwner(sessionId, { kill: true }).catch((error: unknown) =>
|
|
3249
|
+
logger.warn("signal teardown: releaseTabsForOwner failed", { error }),
|
|
3250
|
+
),
|
|
3251
|
+
disposeKernelSessionsByOwner(kernelOwnerId).catch((error: unknown) =>
|
|
3252
|
+
logger.warn("signal teardown: disposeKernelSessionsByOwner failed", { error }),
|
|
3253
|
+
),
|
|
3254
|
+
disposeVmContextsByOwner(kernelOwnerId).catch((error: unknown) =>
|
|
3255
|
+
logger.warn("signal teardown: disposeVmContextsByOwner failed", { error }),
|
|
3256
|
+
),
|
|
3257
|
+
]);
|
|
3258
|
+
await Promise.race([work, Bun.sleep(timeoutMs)]);
|
|
3259
|
+
}
|
|
3260
|
+
|
|
3211
3261
|
#closeAllProviderSessions(reason: string): void {
|
|
3212
3262
|
for (const [providerKey, state] of this.#providerSessionState) {
|
|
3213
3263
|
try {
|
|
@@ -6016,6 +6066,44 @@ export class AgentSession {
|
|
|
6016
6066
|
);
|
|
6017
6067
|
}
|
|
6018
6068
|
|
|
6069
|
+
/**
|
|
6070
|
+
* True when the configured `serviceTier` resolves to `"priority"` for the
|
|
6071
|
+
* given model `provider`. Returns false for scoped tiers that don't match
|
|
6072
|
+
* (e.g. `"openai-only"` on an anthropic provider) and when `provider` is
|
|
6073
|
+
* undefined. This is the canonical provider-aware fast-mode predicate.
|
|
6074
|
+
*/
|
|
6075
|
+
isFastForProvider(provider?: string): boolean {
|
|
6076
|
+
// Fast mode applies to a concrete model's provider. With no provider
|
|
6077
|
+
// (no model selected) it cannot apply, even under an unscoped `priority`
|
|
6078
|
+
// tier that `resolveServiceTier` would otherwise pass through.
|
|
6079
|
+
if (provider === undefined) return false;
|
|
6080
|
+
return resolveServiceTier(this.serviceTier, provider) === "priority";
|
|
6081
|
+
}
|
|
6082
|
+
|
|
6083
|
+
/**
|
|
6084
|
+
* Effective service tier applied to task-tool subagent sessions
|
|
6085
|
+
* (executor/architect/planner/critic). They run under `task.serviceTier`
|
|
6086
|
+
* unless it is `"inherit"`, in which case they inherit the main session
|
|
6087
|
+
* tier — mirroring `createSubagentSettings`.
|
|
6088
|
+
*/
|
|
6089
|
+
#subagentServiceTier(): ServiceTier | undefined {
|
|
6090
|
+
const configured = this.settings.get("task.serviceTier");
|
|
6091
|
+
if (configured === "inherit") return this.serviceTier;
|
|
6092
|
+
if (configured === "none") return undefined;
|
|
6093
|
+
return configured;
|
|
6094
|
+
}
|
|
6095
|
+
|
|
6096
|
+
/**
|
|
6097
|
+
* Provider-aware fast-mode predicate for task-tool subagent roles, evaluated
|
|
6098
|
+
* against the effective subagent tier (`task.serviceTier`) rather than the
|
|
6099
|
+
* main session tier. Use this for `task.agentModelOverrides` role rows so the
|
|
6100
|
+
* ⚡ glyph reflects the tier the subagent actually runs under.
|
|
6101
|
+
*/
|
|
6102
|
+
isFastForSubagentProvider(provider?: string): boolean {
|
|
6103
|
+
if (provider === undefined) return false;
|
|
6104
|
+
return resolveServiceTier(this.#subagentServiceTier(), provider) === "priority";
|
|
6105
|
+
}
|
|
6106
|
+
|
|
6019
6107
|
/**
|
|
6020
6108
|
* True when the configured `serviceTier` resolves to `"priority"` for the
|
|
6021
6109
|
* *currently selected model's provider*. Returns false for scoped tiers
|
|
@@ -6023,7 +6111,7 @@ export class AgentSession {
|
|
|
6023
6111
|
* no model is selected.
|
|
6024
6112
|
*/
|
|
6025
6113
|
isFastModeActive(): boolean {
|
|
6026
|
-
return
|
|
6114
|
+
return this.isFastForProvider(this.model?.provider);
|
|
6027
6115
|
}
|
|
6028
6116
|
|
|
6029
6117
|
setServiceTier(serviceTier: ServiceTier | undefined): void {
|
|
@@ -6394,6 +6482,7 @@ export class AgentSession {
|
|
|
6394
6482
|
model,
|
|
6395
6483
|
apiKey,
|
|
6396
6484
|
{
|
|
6485
|
+
...this.#maintenanceProviderTransport(),
|
|
6397
6486
|
systemPrompt: this.#baseSystemPrompt,
|
|
6398
6487
|
tools: this.agent.state.tools,
|
|
6399
6488
|
customInstructions,
|
|
@@ -6587,11 +6676,55 @@ export class AgentSession {
|
|
|
6587
6676
|
}
|
|
6588
6677
|
}
|
|
6589
6678
|
|
|
6679
|
+
/** Test seam: override the emergency-compaction resource sampler so tests never read real RSS. */
|
|
6680
|
+
setResourceSampler(sampler: () => EmergencyCompactionSample): void {
|
|
6681
|
+
this.#resourceSampler = sampler;
|
|
6682
|
+
}
|
|
6683
|
+
|
|
6684
|
+
#defaultResourceSample(): EmergencyCompactionSample {
|
|
6685
|
+
let providerBytes = 0;
|
|
6686
|
+
let imageBytes = 0;
|
|
6687
|
+
for (const message of this.state.messages) {
|
|
6688
|
+
const content = (message as { content?: unknown }).content;
|
|
6689
|
+
if (typeof content === "string") {
|
|
6690
|
+
providerBytes += content.length;
|
|
6691
|
+
} else if (Array.isArray(content)) {
|
|
6692
|
+
for (const block of content) {
|
|
6693
|
+
if (!block || typeof block !== "object") continue;
|
|
6694
|
+
const typed = block as { text?: unknown; data?: unknown };
|
|
6695
|
+
if (typeof typed.text === "string") providerBytes += typed.text.length;
|
|
6696
|
+
if (typeof typed.data === "string") {
|
|
6697
|
+
imageBytes += typed.data.length;
|
|
6698
|
+
providerBytes += typed.data.length;
|
|
6699
|
+
}
|
|
6700
|
+
}
|
|
6701
|
+
}
|
|
6702
|
+
}
|
|
6703
|
+
return {
|
|
6704
|
+
heapUsedBytes: process.memoryUsage().heapUsed,
|
|
6705
|
+
providerBytes,
|
|
6706
|
+
messageCount: this.state.messages.length,
|
|
6707
|
+
imageBytes,
|
|
6708
|
+
};
|
|
6709
|
+
}
|
|
6710
|
+
|
|
6590
6711
|
async #checkEstimatedContextBeforePromptOnce(pendingMessages: readonly AgentMessage[]): Promise<void> {
|
|
6591
6712
|
const model = this.model;
|
|
6592
6713
|
if (!model) return;
|
|
6593
6714
|
const contextWindow = model.contextWindow ?? 0;
|
|
6594
6715
|
if (contextWindow <= 0) return;
|
|
6716
|
+
// F6: non-disableable emergency floor — compact before OOM even when token-based
|
|
6717
|
+
// compaction is disabled or its threshold is set too high (weak-hardware protection).
|
|
6718
|
+
const emergencyReason = emergencyCompactionReason(this.#resourceSampler());
|
|
6719
|
+
if (emergencyReason) {
|
|
6720
|
+
logger.warn("Emergency compaction triggered (resource floor exceeded)", { reason: emergencyReason });
|
|
6721
|
+
await this.#runAutoCompaction("overflow", false, false, {
|
|
6722
|
+
continueAfterMaintenance: false,
|
|
6723
|
+
deferHandoffMaintenance: false,
|
|
6724
|
+
force: true,
|
|
6725
|
+
});
|
|
6726
|
+
return;
|
|
6727
|
+
}
|
|
6595
6728
|
const compactionSettings = this.settings.getGroup("compaction");
|
|
6596
6729
|
if (!compactionSettings.enabled || compactionSettings.strategy === "off") return;
|
|
6597
6730
|
|
|
@@ -7243,7 +7376,17 @@ export class AgentSession {
|
|
|
7243
7376
|
addCandidate(this.#resolveRoleModelFull(role, availableModels, currentModel).model);
|
|
7244
7377
|
}
|
|
7245
7378
|
|
|
7246
|
-
|
|
7379
|
+
// Last-resort fallback: the largest-context model that shares the ACTIVE
|
|
7380
|
+
// model's provider. Scoping this to the current provider keeps auto-
|
|
7381
|
+
// compaction on the user's configured/custom route instead of silently
|
|
7382
|
+
// defaulting to an unrelated provider (e.g. a stray OpenAI credential
|
|
7383
|
+
// with no remaining credit) just because it happens to be in the bundled
|
|
7384
|
+
// catalog. Cross-provider compaction stays possible, but only when the
|
|
7385
|
+
// user opts in explicitly via modelRoles (handled by the loop above).
|
|
7386
|
+
const fallbackProvider = currentModel?.provider;
|
|
7387
|
+
const sortedByContext = [...availableModels]
|
|
7388
|
+
.filter(model => fallbackProvider === undefined || model.provider === fallbackProvider)
|
|
7389
|
+
.sort((a, b) => b.contextWindow - a.contextWindow);
|
|
7247
7390
|
for (const model of sortedByContext) {
|
|
7248
7391
|
if (!seen.has(this.#getModelKey(model))) {
|
|
7249
7392
|
addCandidate(model);
|
|
@@ -7271,6 +7414,25 @@ export class AgentSession {
|
|
|
7271
7414
|
);
|
|
7272
7415
|
}
|
|
7273
7416
|
|
|
7417
|
+
/**
|
|
7418
|
+
* Transport-affinity fields forwarded into local maintenance one-shot LLM
|
|
7419
|
+
* calls (compaction, handoff, branch summary) so they reuse the live turn's
|
|
7420
|
+
* provider session state and configured WebSocket transport preference
|
|
7421
|
+
* instead of falling back to a fresh HTTP/SSE session. Mirrors the
|
|
7422
|
+
* `providerSessionId ?? sessionId` affinity the agent loop sends per turn.
|
|
7423
|
+
*/
|
|
7424
|
+
#maintenanceProviderTransport(): {
|
|
7425
|
+
sessionId: string | undefined;
|
|
7426
|
+
providerSessionState: Map<string, ProviderSessionState>;
|
|
7427
|
+
preferWebsockets: boolean | undefined;
|
|
7428
|
+
} {
|
|
7429
|
+
return {
|
|
7430
|
+
sessionId: this.agent.providerSessionId ?? this.agent.sessionId,
|
|
7431
|
+
providerSessionState: this.#providerSessionState,
|
|
7432
|
+
preferWebsockets: this.agent.preferWebsockets,
|
|
7433
|
+
};
|
|
7434
|
+
}
|
|
7435
|
+
|
|
7274
7436
|
async #compactWithFallbackModel(
|
|
7275
7437
|
preparation: CompactionPreparation,
|
|
7276
7438
|
customInstructions: string | undefined,
|
|
@@ -7287,6 +7449,7 @@ export class AgentSession {
|
|
|
7287
7449
|
try {
|
|
7288
7450
|
return await compact(preparation, candidate, apiKey, customInstructions, signal, {
|
|
7289
7451
|
...options,
|
|
7452
|
+
...this.#maintenanceProviderTransport(),
|
|
7290
7453
|
metadata: this.agent.metadataForProvider(candidate.provider),
|
|
7291
7454
|
convertToLlm,
|
|
7292
7455
|
telemetry,
|
|
@@ -7367,11 +7530,13 @@ export class AgentSession {
|
|
|
7367
7530
|
reason: "overflow" | "threshold" | "idle",
|
|
7368
7531
|
willRetry: boolean,
|
|
7369
7532
|
deferred = false,
|
|
7370
|
-
options?: { continueAfterMaintenance?: boolean; deferHandoffMaintenance?: boolean },
|
|
7533
|
+
options?: { continueAfterMaintenance?: boolean; deferHandoffMaintenance?: boolean; force?: boolean },
|
|
7371
7534
|
): Promise<void> {
|
|
7372
7535
|
const compactionSettings = this.settings.getGroup("compaction");
|
|
7373
|
-
|
|
7374
|
-
|
|
7536
|
+
// `force` is the non-disableable emergency floor (F6): it bypasses the user's
|
|
7537
|
+
// disabled/off settings so a resource-floor breach still compacts before OOM.
|
|
7538
|
+
if (!options?.force && compactionSettings.strategy === "off") return;
|
|
7539
|
+
if (!options?.force && reason !== "idle" && !compactionSettings.enabled) return;
|
|
7375
7540
|
const generation = this.#promptGeneration;
|
|
7376
7541
|
if (
|
|
7377
7542
|
options?.deferHandoffMaintenance !== false &&
|
|
@@ -7574,6 +7739,7 @@ export class AgentSession {
|
|
|
7574
7739
|
while (true) {
|
|
7575
7740
|
try {
|
|
7576
7741
|
compactResult = await compact(preparation, candidate, apiKey, undefined, autoCompactionSignal, {
|
|
7742
|
+
...this.#maintenanceProviderTransport(),
|
|
7577
7743
|
promptOverride: compactionPrep.hookPrompt,
|
|
7578
7744
|
extraContext: compactionPrep.hookContext,
|
|
7579
7745
|
remoteInstructions: this.#baseSystemPrompt.join("\n\n"),
|
|
@@ -7799,7 +7965,12 @@ export class AgentSession {
|
|
|
7799
7965
|
*/
|
|
7800
7966
|
#isRetryableError(message: AssistantMessage): boolean {
|
|
7801
7967
|
const classification = this.#classifyErrorForRetry(message);
|
|
7802
|
-
return
|
|
7968
|
+
return (
|
|
7969
|
+
classification === "usage_limit" ||
|
|
7970
|
+
classification === "transient" ||
|
|
7971
|
+
classification === "unknown" ||
|
|
7972
|
+
classification === "first_event_timeout"
|
|
7973
|
+
);
|
|
7803
7974
|
}
|
|
7804
7975
|
|
|
7805
7976
|
#isTransientErrorMessage(errorMessage: string): boolean {
|
|
@@ -7825,6 +7996,33 @@ export class AgentSession {
|
|
|
7825
7996
|
);
|
|
7826
7997
|
}
|
|
7827
7998
|
|
|
7999
|
+
#isFirstEventTimeoutErrorMessage(errorMessage: string): boolean {
|
|
8000
|
+
// First-event timeout: the stream watchdog aborted because no event
|
|
8001
|
+
// arrived within the first-event window. Matches the shared lazy-stream
|
|
8002
|
+
// message and the per-provider variants
|
|
8003
|
+
// ("<Provider> stream timed out while waiting for the first event").
|
|
8004
|
+
return /timed?\s*out while waiting for the first event|timeout waiting for first/i.test(errorMessage);
|
|
8005
|
+
}
|
|
8006
|
+
|
|
8007
|
+
/**
|
|
8008
|
+
* Whether a first-event timeout on the error's provider should fail closed —
|
|
8009
|
+
* i.e. retry a bounded number of times (capped at retry.maxRetries) and then
|
|
8010
|
+
* surface, instead of joining the unbounded transient-retry class.
|
|
8011
|
+
*
|
|
8012
|
+
* Targets the ollama-chat API, which is exclusively ollama-cloud (local
|
|
8013
|
+
* Ollama uses the openai-responses API). That remote, queued backend can
|
|
8014
|
+
* stall before its first token even for tiny prompts; an unbounded
|
|
8015
|
+
* continuation retry re-issues the full request on every attempt and can
|
|
8016
|
+
* silently spike upstream usage (#713). First-party providers keep their
|
|
8017
|
+
* existing unbounded first-event-timeout retry behavior.
|
|
8018
|
+
*/
|
|
8019
|
+
#shouldFailClosedOnFirstEventTimeout(message: AssistantMessage): boolean {
|
|
8020
|
+
// Prefer the active model's API (the model that produced the error);
|
|
8021
|
+
// the errored message's API is a fallback for the rare case where the
|
|
8022
|
+
// session model has already moved on.
|
|
8023
|
+
return this.model?.api === "ollama-chat" || message.api === "ollama-chat";
|
|
8024
|
+
}
|
|
8025
|
+
|
|
7828
8026
|
#isTerminalErrorMessage(errorMessage: string): boolean {
|
|
7829
8027
|
// Errors that will never succeed on retry (auth/permission, malformed
|
|
7830
8028
|
// request, unknown/unsupported model). These surface immediately rather
|
|
@@ -7846,11 +8044,12 @@ export class AgentSession {
|
|
|
7846
8044
|
|
|
7847
8045
|
/**
|
|
7848
8046
|
* Ordered retry classification: overflow (compaction) -> terminal (surface)
|
|
7849
|
-
* -> usage_limit (rotation) ->
|
|
8047
|
+
* -> usage_limit (rotation) -> first_event_timeout (bounded retry) ->
|
|
8048
|
+
* transient (retry) -> unknown (retry).
|
|
7850
8049
|
*/
|
|
7851
8050
|
#classifyErrorForRetry(
|
|
7852
8051
|
message: AssistantMessage,
|
|
7853
|
-
): "none" | "overflow" | "terminal" | "usage_limit" | "transient" | "unknown" {
|
|
8052
|
+
): "none" | "overflow" | "terminal" | "usage_limit" | "first_event_timeout" | "transient" | "unknown" {
|
|
7854
8053
|
if (message.stopReason !== "error" || !message.errorMessage) return "none";
|
|
7855
8054
|
const contextWindow = this.model?.contextWindow ?? 0;
|
|
7856
8055
|
if (isContextOverflow(message, contextWindow)) return "overflow";
|
|
@@ -7878,6 +8077,13 @@ export class AgentSession {
|
|
|
7878
8077
|
if (isTerminalHttp4xx && (explicitStatus !== undefined || !/rate.?limit|too many requests/i.test(err))) {
|
|
7879
8078
|
return "terminal";
|
|
7880
8079
|
}
|
|
8080
|
+
// A first-event timeout on ollama-cloud (the ollama-chat API) must not
|
|
8081
|
+
// join the unbounded transient class: each continuation retry re-issues
|
|
8082
|
+
// the full request to a remote, billable backend, so an unbounded loop
|
|
8083
|
+
// can silently spike usage (#713). Bound it to retry.maxRetries instead.
|
|
8084
|
+
if (this.#isFirstEventTimeoutErrorMessage(err) && this.#shouldFailClosedOnFirstEventTimeout(message)) {
|
|
8085
|
+
return "first_event_timeout";
|
|
8086
|
+
}
|
|
7881
8087
|
if (this.#isTransientErrorMessage(err)) return "transient";
|
|
7882
8088
|
return "unknown";
|
|
7883
8089
|
}
|
|
@@ -9381,6 +9587,7 @@ export class AgentSession {
|
|
|
9381
9587
|
}
|
|
9382
9588
|
const branchSummarySettings = this.settings.getGroup("branchSummary");
|
|
9383
9589
|
const result = await generateBranchSummary(entriesToSummarize, {
|
|
9590
|
+
...this.#maintenanceProviderTransport(),
|
|
9384
9591
|
model,
|
|
9385
9592
|
apiKey,
|
|
9386
9593
|
signal: this.#branchSummaryAbortController.signal,
|
|
@@ -9508,17 +9715,15 @@ export class AgentSession {
|
|
|
9508
9715
|
*/
|
|
9509
9716
|
getSessionStats(): SessionStats {
|
|
9510
9717
|
const state = this.state;
|
|
9511
|
-
|
|
9512
|
-
|
|
9513
|
-
|
|
9514
|
-
|
|
9718
|
+
let userMessages = 0;
|
|
9719
|
+
let assistantMessages = 0;
|
|
9720
|
+
let toolResults = 0;
|
|
9515
9721
|
let toolCalls = 0;
|
|
9516
9722
|
let totalInput = 0;
|
|
9517
9723
|
let totalOutput = 0;
|
|
9518
9724
|
let totalCacheRead = 0;
|
|
9519
9725
|
let totalCacheWrite = 0;
|
|
9520
9726
|
let totalCost = 0;
|
|
9521
|
-
|
|
9522
9727
|
let totalPremiumRequests = 0;
|
|
9523
9728
|
const getTaskToolUsage = (details: unknown): Usage | undefined => {
|
|
9524
9729
|
if (!details || typeof details !== "object") return undefined;
|
|
@@ -9528,8 +9733,13 @@ export class AgentSession {
|
|
|
9528
9733
|
return usage as Usage;
|
|
9529
9734
|
};
|
|
9530
9735
|
|
|
9736
|
+
// Single pass over messages (replaces three role filters plus a separate usage
|
|
9737
|
+
// loop) so per-turn stats stay O(messages + assistant content blocks), not O(4N).
|
|
9531
9738
|
for (const message of state.messages) {
|
|
9532
|
-
if (message.role === "
|
|
9739
|
+
if (message.role === "user") {
|
|
9740
|
+
userMessages += 1;
|
|
9741
|
+
} else if (message.role === "assistant") {
|
|
9742
|
+
assistantMessages += 1;
|
|
9533
9743
|
const assistantMsg = message as AssistantMessage;
|
|
9534
9744
|
toolCalls += assistantMsg.content.filter(c => c.type === "toolCall").length;
|
|
9535
9745
|
totalInput += assistantMsg.usage.input;
|
|
@@ -9538,17 +9748,18 @@ export class AgentSession {
|
|
|
9538
9748
|
totalCacheWrite += assistantMsg.usage.cacheWrite;
|
|
9539
9749
|
totalPremiumRequests += assistantMsg.usage.premiumRequests ?? 0;
|
|
9540
9750
|
totalCost += assistantMsg.usage.cost.total;
|
|
9541
|
-
}
|
|
9542
|
-
|
|
9543
|
-
|
|
9544
|
-
|
|
9545
|
-
|
|
9546
|
-
|
|
9547
|
-
|
|
9548
|
-
|
|
9549
|
-
|
|
9550
|
-
|
|
9551
|
-
|
|
9751
|
+
} else if (message.role === "toolResult") {
|
|
9752
|
+
toolResults += 1;
|
|
9753
|
+
if (message.toolName === "task") {
|
|
9754
|
+
const usage = getTaskToolUsage(message.details);
|
|
9755
|
+
if (usage) {
|
|
9756
|
+
totalInput += usage.input;
|
|
9757
|
+
totalOutput += usage.output;
|
|
9758
|
+
totalCacheRead += usage.cacheRead;
|
|
9759
|
+
totalCacheWrite += usage.cacheWrite;
|
|
9760
|
+
totalPremiumRequests += usage.premiumRequests ?? 0;
|
|
9761
|
+
totalCost += usage.cost.total;
|
|
9762
|
+
}
|
|
9552
9763
|
}
|
|
9553
9764
|
}
|
|
9554
9765
|
}
|
|
@@ -9709,11 +9920,46 @@ export class AgentSession {
|
|
|
9709
9920
|
return tokens;
|
|
9710
9921
|
}
|
|
9711
9922
|
|
|
9923
|
+
#nativeTokenCache = new WeakMap<AgentMessage, { len: number; tokens: number }>();
|
|
9924
|
+
|
|
9925
|
+
/** Cheap content-size signal to invalidate the native token cache on mutation (growth). */
|
|
9926
|
+
/**
|
|
9927
|
+
* Cheap content-size signal to invalidate the native token cache on mutation. Recursively
|
|
9928
|
+
* sums string lengths across the whole message (depth-bounded), so it covers every
|
|
9929
|
+
* provider-visible shape (text/thinking/tool args, toolResult output, tool names, etc.)
|
|
9930
|
+
* without allocating a serialized copy. A size-preserving in-place edit yields only a
|
|
9931
|
+
* benign estimate drift.
|
|
9932
|
+
*/
|
|
9933
|
+
#messageTokenSize(value: unknown, depth = 0): number {
|
|
9934
|
+
if (depth > 6) return 0;
|
|
9935
|
+
if (typeof value === "string") return value.length;
|
|
9936
|
+
if (typeof value === "number" || typeof value === "boolean") return 8;
|
|
9937
|
+
if (Array.isArray(value)) {
|
|
9938
|
+
let size = 0;
|
|
9939
|
+
for (const item of value) size += this.#messageTokenSize(item, depth + 1);
|
|
9940
|
+
return size;
|
|
9941
|
+
}
|
|
9942
|
+
if (value && typeof value === "object") {
|
|
9943
|
+
let size = 0;
|
|
9944
|
+
for (const item of Object.values(value)) size += this.#messageTokenSize(item, depth + 1);
|
|
9945
|
+
return size;
|
|
9946
|
+
}
|
|
9947
|
+
return 0;
|
|
9948
|
+
}
|
|
9949
|
+
|
|
9712
9950
|
#estimateMessageNativeContextTokens(message: AgentMessage): number {
|
|
9951
|
+
// F10/F22: cache the expensive native token count per message object, invalidated by a
|
|
9952
|
+
// cheap content-size signal, so unchanged (stable-size) messages are not re-tokenized on
|
|
9953
|
+
// every pre-prompt estimate. A rare size-preserving in-place edit yields only a benign
|
|
9954
|
+
// token-estimate drift, never wrong output.
|
|
9955
|
+
const len = this.#messageTokenSize(message);
|
|
9956
|
+
const cached = this.#nativeTokenCache.get(message);
|
|
9957
|
+
if (cached && cached.len === len) return cached.tokens;
|
|
9713
9958
|
let tokens = 0;
|
|
9714
9959
|
for (const llmMessage of convertToLlm([message])) {
|
|
9715
9960
|
tokens += estimateTokens(llmMessage);
|
|
9716
9961
|
}
|
|
9962
|
+
this.#nativeTokenCache.set(message, { len, tokens });
|
|
9717
9963
|
return tokens;
|
|
9718
9964
|
}
|
|
9719
9965
|
|
package/src/session/artifacts.ts
CHANGED
|
@@ -7,6 +7,11 @@
|
|
|
7
7
|
import * as fs from "node:fs/promises";
|
|
8
8
|
import * as path from "node:path";
|
|
9
9
|
|
|
10
|
+
import { DEFAULT_ARTIFACT_MAX_BYTES, truncateHeadBytes } from "./streaming-output";
|
|
11
|
+
export interface ArtifactSaveOptions {
|
|
12
|
+
maxBytes?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
10
15
|
/**
|
|
11
16
|
* Manages artifact storage for a session.
|
|
12
17
|
*
|
|
@@ -94,9 +99,19 @@ export class ArtifactManager {
|
|
|
94
99
|
* @param toolType Tool name for file extension (e.g., "bash", "read")
|
|
95
100
|
* @returns Artifact ID (numeric string)
|
|
96
101
|
*/
|
|
97
|
-
async save(content: string, toolType: string): Promise<string> {
|
|
102
|
+
async save(content: string, toolType: string, options: ArtifactSaveOptions = {}): Promise<string> {
|
|
98
103
|
const { id, path } = await this.allocatePath(toolType);
|
|
99
|
-
|
|
104
|
+
const maxBytes = Math.max(0, options.maxBytes ?? DEFAULT_ARTIFACT_MAX_BYTES);
|
|
105
|
+
const contentBytes = Buffer.byteLength(content, "utf-8");
|
|
106
|
+
if (contentBytes > maxBytes) {
|
|
107
|
+
const truncated = truncateHeadBytes(content, maxBytes);
|
|
108
|
+
await Bun.write(
|
|
109
|
+
path,
|
|
110
|
+
`${truncated.text}\n[artifact truncated after ${truncated.bytes} bytes; omitted at least ${contentBytes - truncated.bytes} bytes]\n`,
|
|
111
|
+
);
|
|
112
|
+
} else {
|
|
113
|
+
await Bun.write(path, content);
|
|
114
|
+
}
|
|
100
115
|
return id;
|
|
101
116
|
}
|
|
102
117
|
|
|
@@ -167,19 +167,49 @@ export class EphemeralBlobStore extends BlobStore {
|
|
|
167
167
|
}
|
|
168
168
|
|
|
169
169
|
export class MemoryBlobStore extends BlobStore {
|
|
170
|
+
/**
|
|
171
|
+
* Generous byte/count LRU bound (F8). Content-addressed resident blobs are fail-closed
|
|
172
|
+
* on miss (callers raise/handle {@link ResidentBlobMissingError}), so evicting the
|
|
173
|
+
* least-recently-used entry on an extremely large session is preferable to unbounded
|
|
174
|
+
* RAM growth. The caps sit well above normal usage and only trip on pathological sizes.
|
|
175
|
+
*/
|
|
176
|
+
static readonly #MAX_BYTES = 64 * 1024 * 1024;
|
|
177
|
+
static readonly #MAX_COUNT = 4096;
|
|
178
|
+
|
|
170
179
|
#blobs = new Map<string, Buffer>();
|
|
180
|
+
#bytes = 0;
|
|
171
181
|
|
|
172
182
|
constructor() {
|
|
173
183
|
super(":memory:");
|
|
174
184
|
}
|
|
175
185
|
|
|
186
|
+
#store(hash: string, data: Buffer): void {
|
|
187
|
+
const existing = this.#blobs.get(hash);
|
|
188
|
+
if (existing) {
|
|
189
|
+
this.#blobs.delete(hash);
|
|
190
|
+
this.#bytes -= existing.byteLength;
|
|
191
|
+
}
|
|
192
|
+
this.#blobs.set(hash, data);
|
|
193
|
+
this.#bytes += data.byteLength;
|
|
194
|
+
while (
|
|
195
|
+
(this.#bytes > MemoryBlobStore.#MAX_BYTES || this.#blobs.size > MemoryBlobStore.#MAX_COUNT) &&
|
|
196
|
+
this.#blobs.size > 1
|
|
197
|
+
) {
|
|
198
|
+
const oldest = this.#blobs.keys().next().value;
|
|
199
|
+
if (oldest === undefined) break;
|
|
200
|
+
const evicted = this.#blobs.get(oldest);
|
|
201
|
+
this.#blobs.delete(oldest);
|
|
202
|
+
if (evicted) this.#bytes -= evicted.byteLength;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
176
206
|
async put(data: Buffer): Promise<BlobPutResult> {
|
|
177
207
|
return this.putSync(data);
|
|
178
208
|
}
|
|
179
209
|
|
|
180
210
|
putSync(data: Buffer): BlobPutResult {
|
|
181
211
|
const hash = new Bun.SHA256().update(data).digest("hex");
|
|
182
|
-
this.#
|
|
212
|
+
this.#store(hash, Buffer.from(data));
|
|
183
213
|
return {
|
|
184
214
|
hash,
|
|
185
215
|
path: `memory:${hash}`,
|
|
@@ -195,7 +225,11 @@ export class MemoryBlobStore extends BlobStore {
|
|
|
195
225
|
|
|
196
226
|
getSync(hash: string): Buffer | null {
|
|
197
227
|
const data = this.#blobs.get(hash);
|
|
198
|
-
|
|
228
|
+
if (!data) return null;
|
|
229
|
+
// Refresh LRU recency on hit so hot blobs survive eviction.
|
|
230
|
+
this.#blobs.delete(hash);
|
|
231
|
+
this.#blobs.set(hash, data);
|
|
232
|
+
return Buffer.from(data);
|
|
199
233
|
}
|
|
200
234
|
|
|
201
235
|
async has(hash: string): Promise<boolean> {
|
|
@@ -889,8 +889,27 @@ async function resolvePersistedBlobRefs(value: unknown, blobStore: BlobStore, ke
|
|
|
889
889
|
);
|
|
890
890
|
}
|
|
891
891
|
|
|
892
|
+
/**
|
|
893
|
+
* Run async tasks with bounded concurrency so an image-heavy resume never materializes
|
|
894
|
+
* every blob's base64 simultaneously (F8: avoids the transient OOM spike of an unbounded
|
|
895
|
+
* Promise.all over all historical images).
|
|
896
|
+
*/
|
|
897
|
+
const BLOB_RESOLVE_CONCURRENCY = 8;
|
|
898
|
+
async function runWithConcurrency(tasks: Array<() => Promise<void>>, limit: number): Promise<void> {
|
|
899
|
+
let next = 0;
|
|
900
|
+
const worker = async (): Promise<void> => {
|
|
901
|
+
while (next < tasks.length) {
|
|
902
|
+
const index = next;
|
|
903
|
+
next += 1;
|
|
904
|
+
await tasks[index]!();
|
|
905
|
+
}
|
|
906
|
+
};
|
|
907
|
+
const workerCount = Math.max(1, Math.min(limit, tasks.length));
|
|
908
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
909
|
+
}
|
|
910
|
+
|
|
892
911
|
async function resolveBlobRefsInEntries(entries: FileEntry[], blobStore: BlobStore): Promise<void> {
|
|
893
|
-
const
|
|
912
|
+
const tasks: Array<() => Promise<void>> = [];
|
|
894
913
|
|
|
895
914
|
for (const entry of entries) {
|
|
896
915
|
if (entry.type === "session") continue;
|
|
@@ -902,22 +921,19 @@ async function resolveBlobRefsInEntries(entries: FileEntry[], blobStore: BlobSto
|
|
|
902
921
|
contentArray = entry.content;
|
|
903
922
|
}
|
|
904
923
|
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
resolveImageData(blobStore, block.data)
|
|
910
|
-
|
|
911
|
-
}),
|
|
912
|
-
);
|
|
924
|
+
tasks.push(async () => {
|
|
925
|
+
if (contentArray) {
|
|
926
|
+
for (const block of contentArray) {
|
|
927
|
+
if (isImageBlock(block) && isBlobRef(block.data)) {
|
|
928
|
+
block.data = await resolveImageData(blobStore, block.data);
|
|
929
|
+
}
|
|
913
930
|
}
|
|
914
931
|
}
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
promises.push(resolvePersistedBlobRefs(entry, blobStore));
|
|
932
|
+
await resolvePersistedBlobRefs(entry, blobStore);
|
|
933
|
+
});
|
|
918
934
|
}
|
|
919
935
|
|
|
920
|
-
await
|
|
936
|
+
await runWithConcurrency(tasks, BLOB_RESOLVE_CONCURRENCY);
|
|
921
937
|
}
|
|
922
938
|
|
|
923
939
|
/**
|