@poolzin/pool-bot 2026.2.7 → 2026.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ ## v2026.2.8 (2026-02-15)
2
+
3
+ ### Features
4
+ - NVIDIA dynamic model discovery — users with `NVIDIA_API_KEY` now see 50+ free-tier NIM models (Llama 3.3/3.1, DeepSeek R1, Mistral Large 2, Qwen 2.5, vision models, and more) instead of a single hardcoded Nemotron Ultra 253B
5
+ - Follows the Venice discovery pattern: static fallback catalog + live `/v1/models` fetch with 5s timeout
6
+
7
+ ### Fixes
8
+ - Use the provider's default vision model (not the active chat model) for image understanding, fixing media pipeline failures on text-only models
9
+
10
+ ### Improvements
11
+ - Upstream port: security hardening, graceful restart, Ollama native streaming, tool mutation, compaction safety
12
+
13
+ ---
14
+
1
15
  ## v2026.1.40 (2026-02-09)
2
16
 
3
17
  ### Rebrand
@@ -5,6 +5,7 @@ import { discoverBedrockModels } from "./bedrock-discovery.js";
5
5
  import { buildCloudflareAiGatewayModelDefinition, resolveCloudflareAiGatewayBaseUrl, } from "./cloudflare-ai-gateway.js";
6
6
  import { buildSyntheticModelDefinition, SYNTHETIC_BASE_URL, SYNTHETIC_MODEL_CATALOG, } from "./synthetic-models.js";
7
7
  import { TOGETHER_BASE_URL, TOGETHER_MODEL_CATALOG, buildTogetherModelDefinition, } from "./together-models.js";
8
+ import { discoverNvidiaModels, NVIDIA_BASE_URL } from "./nvidia-models.js";
8
9
  import { discoverVeniceModels, VENICE_BASE_URL } from "./venice-models.js";
9
10
  const MINIMAX_API_BASE_URL = "https://api.minimax.chat/v1";
10
11
  const MINIMAX_PORTAL_BASE_URL = "https://api.minimax.io/anthropic";
@@ -345,6 +346,14 @@ function buildTogetherProvider() {
345
346
  models: TOGETHER_MODEL_CATALOG.map(buildTogetherModelDefinition),
346
347
  };
347
348
  }
349
+ async function buildNvidiaProvider(apiKey) {
350
+ const models = await discoverNvidiaModels(apiKey);
351
+ return {
352
+ baseUrl: NVIDIA_BASE_URL,
353
+ api: "openai-completions",
354
+ models,
355
+ };
356
+ }
348
357
  export function buildQianfanProvider() {
349
358
  return {
350
359
  baseUrl: QIANFAN_BASE_URL,
@@ -457,6 +466,11 @@ export async function resolveImplicitProviders(params) {
457
466
  if (qianfanKey) {
458
467
  providers.qianfan = { ...buildQianfanProvider(), apiKey: qianfanKey };
459
468
  }
469
+ const nvidiaKey = resolveEnvApiKeyVarName("nvidia") ??
470
+ resolveApiKeyFromProfiles({ provider: "nvidia", store: authStore });
471
+ if (nvidiaKey) {
472
+ providers.nvidia = { ...(await buildNvidiaProvider(nvidiaKey)), apiKey: nvidiaKey };
473
+ }
460
474
  return providers;
461
475
  }
462
476
  export async function resolveImplicitCopilotProvider(params) {
@@ -0,0 +1,228 @@
1
+ export const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
2
+ export const NVIDIA_DEFAULT_MODEL_ID = "llama-3.1-nemotron-ultra-253b-v1";
3
+ // NVIDIA free-tier models have no per-token cost.
4
+ export const NVIDIA_DEFAULT_COST = {
5
+ input: 0,
6
+ output: 0,
7
+ cacheRead: 0,
8
+ cacheWrite: 0,
9
+ };
10
+ /**
11
+ * Static catalog of popular NVIDIA NIM free-tier models.
12
+ *
13
+ * NVIDIA's `integrate.api.nvidia.com` hosts 50+ models via OpenAI-compatible
14
+ * `/v1/chat/completions`. This catalog acts as a fallback when the `/v1/models`
15
+ * endpoint is unreachable or the API key isn't valid for discovery.
16
+ *
17
+ * Model IDs match what NVIDIA's API returns — some use `org/model` format,
18
+ * others use bare IDs.
19
+ */
20
+ export const NVIDIA_MODEL_CATALOG = [
21
+ // ── Flagship ──────────────────────────────────────────
22
+ {
23
+ id: "llama-3.1-nemotron-ultra-253b-v1",
24
+ name: "Nemotron Ultra 253B",
25
+ reasoning: true,
26
+ input: ["text"],
27
+ contextWindow: 131072,
28
+ maxTokens: 8192,
29
+ },
30
+ // ── Meta Llama ────────────────────────────────────────
31
+ {
32
+ id: "meta/llama-3.3-70b-instruct",
33
+ name: "Llama 3.3 70B Instruct",
34
+ reasoning: false,
35
+ input: ["text"],
36
+ contextWindow: 131072,
37
+ maxTokens: 8192,
38
+ },
39
+ {
40
+ id: "meta/llama-3.1-405b-instruct",
41
+ name: "Llama 3.1 405B Instruct",
42
+ reasoning: false,
43
+ input: ["text"],
44
+ contextWindow: 131072,
45
+ maxTokens: 4096,
46
+ },
47
+ {
48
+ id: "meta/llama-3.1-8b-instruct",
49
+ name: "Llama 3.1 8B Instruct",
50
+ reasoning: false,
51
+ input: ["text"],
52
+ contextWindow: 131072,
53
+ maxTokens: 8192,
54
+ },
55
+ // ── DeepSeek ──────────────────────────────────────────
56
+ {
57
+ id: "deepseek-ai/deepseek-r1",
58
+ name: "DeepSeek R1",
59
+ reasoning: true,
60
+ input: ["text"],
61
+ contextWindow: 65536,
62
+ maxTokens: 8192,
63
+ },
64
+ // ── Mistral ───────────────────────────────────────────
65
+ {
66
+ id: "mistralai/mistral-large-2-instruct",
67
+ name: "Mistral Large 2 Instruct",
68
+ reasoning: false,
69
+ input: ["text"],
70
+ contextWindow: 131072,
71
+ maxTokens: 8192,
72
+ },
73
+ // ── Qwen ──────────────────────────────────────────────
74
+ {
75
+ id: "qwen/qwen2.5-72b-instruct",
76
+ name: "Qwen 2.5 72B Instruct",
77
+ reasoning: false,
78
+ input: ["text"],
79
+ contextWindow: 131072,
80
+ maxTokens: 8192,
81
+ },
82
+ {
83
+ id: "qwen/qwq-32b",
84
+ name: "QwQ 32B",
85
+ reasoning: true,
86
+ input: ["text"],
87
+ contextWindow: 131072,
88
+ maxTokens: 8192,
89
+ },
90
+ // ── Vision models ─────────────────────────────────────
91
+ {
92
+ id: "microsoft/phi-3.5-vision-instruct",
93
+ name: "Phi 3.5 Vision Instruct",
94
+ reasoning: false,
95
+ input: ["text", "image"],
96
+ contextWindow: 131072,
97
+ maxTokens: 4096,
98
+ },
99
+ {
100
+ id: "nvidia/llama-3.2-nv-vision-instruct-v1",
101
+ name: "Llama 3.2 NV Vision Instruct",
102
+ reasoning: false,
103
+ input: ["text", "image"],
104
+ contextWindow: 131072,
105
+ maxTokens: 4096,
106
+ },
107
+ // ── Code ──────────────────────────────────────────────
108
+ {
109
+ id: "nvidia/llama-3.1-nemotron-70b-instruct",
110
+ name: "Nemotron 70B Instruct",
111
+ reasoning: false,
112
+ input: ["text"],
113
+ contextWindow: 131072,
114
+ maxTokens: 8192,
115
+ },
116
+ {
117
+ id: "ibm/granite-3.1-8b-instruct",
118
+ name: "Granite 3.1 8B Instruct",
119
+ reasoning: false,
120
+ input: ["text"],
121
+ contextWindow: 131072,
122
+ maxTokens: 8192,
123
+ },
124
+ ];
125
+ /**
126
+ * Build a ModelDefinitionConfig from a static catalog entry.
127
+ */
128
+ export function buildNvidiaModelDefinition(entry) {
129
+ return {
130
+ id: entry.id,
131
+ name: entry.name,
132
+ reasoning: entry.reasoning,
133
+ input: [...entry.input],
134
+ cost: NVIDIA_DEFAULT_COST,
135
+ contextWindow: entry.contextWindow,
136
+ maxTokens: entry.maxTokens,
137
+ };
138
+ }
139
+ // Model IDs known to be embedding/reranking/image-gen — not chat-compatible.
140
+ const NON_CHAT_PATTERNS = [
141
+ "embed",
142
+ "rerank",
143
+ "nv-rerankqa",
144
+ "sdxl",
145
+ "stable-diffusion",
146
+ "consistory",
147
+ "shutterstock",
148
+ "image",
149
+ "usearch",
150
+ "parakeet",
151
+ "canary",
152
+ "whisper",
153
+ "speech",
154
+ "audio",
155
+ "tts",
156
+ "asr",
157
+ ];
158
+ function isLikelyChatModel(id) {
159
+ const lower = id.toLowerCase();
160
+ return !NON_CHAT_PATTERNS.some((p) => lower.includes(p));
161
+ }
162
+ /**
163
+ * Discover models from NVIDIA's OpenAI-compatible `/v1/models` endpoint.
164
+ *
165
+ * Requires a valid API key (unlike Venice which is public).
166
+ * Falls back to the static catalog on any failure.
167
+ */
168
+ export async function discoverNvidiaModels(apiKey) {
169
+ // Skip API discovery in test environments
170
+ if (process.env.NODE_ENV === "test" || process.env.VITEST) {
171
+ return NVIDIA_MODEL_CATALOG.map(buildNvidiaModelDefinition);
172
+ }
173
+ try {
174
+ const response = await fetch(`${NVIDIA_BASE_URL}/models`, {
175
+ headers: { Authorization: `Bearer ${apiKey}` },
176
+ signal: AbortSignal.timeout(5000),
177
+ });
178
+ if (!response.ok) {
179
+ console.warn(`[nvidia-models] Discovery failed: HTTP ${response.status}, using static catalog`);
180
+ return NVIDIA_MODEL_CATALOG.map(buildNvidiaModelDefinition);
181
+ }
182
+ const data = (await response.json());
183
+ if (!Array.isArray(data.data) || data.data.length === 0) {
184
+ console.warn("[nvidia-models] No models returned from API, using static catalog");
185
+ return NVIDIA_MODEL_CATALOG.map(buildNvidiaModelDefinition);
186
+ }
187
+ // Build lookup from static catalog for enrichment
188
+ const catalogById = new Map(NVIDIA_MODEL_CATALOG.map((m) => [m.id, m]));
189
+ const models = [];
190
+ for (const apiModel of data.data) {
191
+ if (!isLikelyChatModel(apiModel.id))
192
+ continue;
193
+ const catalogEntry = catalogById.get(apiModel.id);
194
+ if (catalogEntry) {
195
+ // Use curated metadata for known models
196
+ models.push(buildNvidiaModelDefinition(catalogEntry));
197
+ }
198
+ else {
199
+ // Create a best-effort definition for newly discovered models
200
+ const lower = apiModel.id.toLowerCase();
201
+ const isReasoning = lower.includes("r1") ||
202
+ lower.includes("reasoning") ||
203
+ lower.includes("thinking") ||
204
+ lower.includes("nemotron-ultra");
205
+ const hasVision = lower.includes("vision") || lower.includes("-vl-") || lower.includes("nvlm");
206
+ // Derive a human-friendly name from the model ID
207
+ const name = apiModel.id
208
+ .replace(/^[^/]+\//, "") // strip org/ prefix
209
+ .replace(/-/g, " ")
210
+ .replace(/\b\w/g, (c) => c.toUpperCase());
211
+ models.push({
212
+ id: apiModel.id,
213
+ name,
214
+ reasoning: isReasoning,
215
+ input: hasVision ? ["text", "image"] : ["text"],
216
+ cost: NVIDIA_DEFAULT_COST,
217
+ contextWindow: 131072, // safe default for NIM models
218
+ maxTokens: 8192,
219
+ });
220
+ }
221
+ }
222
+ return models.length > 0 ? models : NVIDIA_MODEL_CATALOG.map(buildNvidiaModelDefinition);
223
+ }
224
+ catch (error) {
225
+ console.warn(`[nvidia-models] Discovery failed: ${String(error)}, using static catalog`);
226
+ return NVIDIA_MODEL_CATALOG.map(buildNvidiaModelDefinition);
227
+ }
228
+ }
@@ -0,0 +1,294 @@
1
+ import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
2
+ import { randomUUID } from "node:crypto";
3
+ export const OLLAMA_NATIVE_BASE_URL = "http://127.0.0.1:11434";
4
+ function extractTextContent(content) {
5
+ if (typeof content === "string") {
6
+ return content;
7
+ }
8
+ if (!Array.isArray(content)) {
9
+ return "";
10
+ }
11
+ return content
12
+ .filter((part) => part.type === "text")
13
+ .map((part) => part.text)
14
+ .join("");
15
+ }
16
+ function extractOllamaImages(content) {
17
+ if (!Array.isArray(content)) {
18
+ return [];
19
+ }
20
+ return content
21
+ .filter((part) => part.type === "image")
22
+ .map((part) => part.data);
23
+ }
24
+ function extractToolCalls(content) {
25
+ if (!Array.isArray(content)) {
26
+ return [];
27
+ }
28
+ const parts = content;
29
+ const result = [];
30
+ for (const part of parts) {
31
+ if (part.type === "toolCall") {
32
+ result.push({ function: { name: part.name, arguments: part.arguments } });
33
+ }
34
+ else if (part.type === "tool_use") {
35
+ result.push({ function: { name: part.name, arguments: part.input } });
36
+ }
37
+ }
38
+ return result;
39
+ }
40
+ export function convertToOllamaMessages(messages, system) {
41
+ const result = [];
42
+ if (system) {
43
+ result.push({ role: "system", content: system });
44
+ }
45
+ for (const msg of messages) {
46
+ const { role } = msg;
47
+ if (role === "user") {
48
+ const text = extractTextContent(msg.content);
49
+ const images = extractOllamaImages(msg.content);
50
+ result.push({
51
+ role: "user",
52
+ content: text,
53
+ ...(images.length > 0 ? { images } : {}),
54
+ });
55
+ }
56
+ else if (role === "assistant") {
57
+ const text = extractTextContent(msg.content);
58
+ const toolCalls = extractToolCalls(msg.content);
59
+ result.push({
60
+ role: "assistant",
61
+ content: text,
62
+ ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
63
+ });
64
+ }
65
+ else if (role === "tool" || role === "toolResult") {
66
+ // SDK uses "toolResult" (camelCase) for tool result messages.
67
+ // Ollama API expects "tool" role with tool_name per the native spec.
68
+ const text = extractTextContent(msg.content);
69
+ const toolName = typeof msg.toolName === "string"
70
+ ? msg.toolName
71
+ : undefined;
72
+ result.push({
73
+ role: "tool",
74
+ content: text,
75
+ ...(toolName ? { tool_name: toolName } : {}),
76
+ });
77
+ }
78
+ }
79
+ return result;
80
+ }
81
+ // ── Tool extraction ─────────────────────────────────────────────────────────
82
+ function extractOllamaTools(tools) {
83
+ if (!tools || !Array.isArray(tools)) {
84
+ return [];
85
+ }
86
+ const result = [];
87
+ for (const tool of tools) {
88
+ if (typeof tool.name !== "string" || !tool.name) {
89
+ continue;
90
+ }
91
+ result.push({
92
+ type: "function",
93
+ function: {
94
+ name: tool.name,
95
+ description: typeof tool.description === "string" ? tool.description : "",
96
+ parameters: (tool.parameters ?? {}),
97
+ },
98
+ });
99
+ }
100
+ return result;
101
+ }
102
+ // ── Response conversion ─────────────────────────────────────────────────────
103
+ export function buildAssistantMessage(response, modelInfo) {
104
+ const content = [];
105
+ if (response.message.content) {
106
+ content.push({ type: "text", text: response.message.content });
107
+ }
108
+ const toolCalls = response.message.tool_calls;
109
+ if (toolCalls && toolCalls.length > 0) {
110
+ for (const tc of toolCalls) {
111
+ content.push({
112
+ type: "toolCall",
113
+ id: `ollama_call_${randomUUID()}`,
114
+ name: tc.function.name,
115
+ arguments: tc.function.arguments,
116
+ });
117
+ }
118
+ }
119
+ const hasToolCalls = toolCalls && toolCalls.length > 0;
120
+ const stopReason = hasToolCalls ? "toolUse" : "stop";
121
+ const usage = {
122
+ input: response.prompt_eval_count ?? 0,
123
+ output: response.eval_count ?? 0,
124
+ cacheRead: 0,
125
+ cacheWrite: 0,
126
+ totalTokens: (response.prompt_eval_count ?? 0) + (response.eval_count ?? 0),
127
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
128
+ };
129
+ return {
130
+ role: "assistant",
131
+ content,
132
+ stopReason,
133
+ api: modelInfo.api,
134
+ provider: modelInfo.provider,
135
+ model: modelInfo.id,
136
+ usage,
137
+ timestamp: Date.now(),
138
+ };
139
+ }
140
+ // ── NDJSON streaming parser ─────────────────────────────────────────────────
141
+ export async function* parseNdjsonStream(reader) {
142
+ const decoder = new TextDecoder();
143
+ let buffer = "";
144
+ while (true) {
145
+ const { done, value } = await reader.read();
146
+ if (done) {
147
+ break;
148
+ }
149
+ buffer += decoder.decode(value, { stream: true });
150
+ const lines = buffer.split("\n");
151
+ buffer = lines.pop() ?? "";
152
+ for (const line of lines) {
153
+ const trimmed = line.trim();
154
+ if (!trimmed) {
155
+ continue;
156
+ }
157
+ try {
158
+ yield JSON.parse(trimmed);
159
+ }
160
+ catch {
161
+ console.warn("[ollama-stream] Skipping malformed NDJSON line:", trimmed.slice(0, 120));
162
+ }
163
+ }
164
+ }
165
+ if (buffer.trim()) {
166
+ try {
167
+ yield JSON.parse(buffer.trim());
168
+ }
169
+ catch {
170
+ console.warn("[ollama-stream] Skipping malformed trailing data:", buffer.trim().slice(0, 120));
171
+ }
172
+ }
173
+ }
174
+ // ── Main StreamFn factory ───────────────────────────────────────────────────
175
+ function resolveOllamaChatUrl(baseUrl) {
176
+ const trimmed = baseUrl.trim().replace(/\/+$/, "");
177
+ const normalizedBase = trimmed.replace(/\/v1$/i, "");
178
+ const apiBase = normalizedBase || OLLAMA_NATIVE_BASE_URL;
179
+ return `${apiBase}/api/chat`;
180
+ }
181
+ export function createOllamaStreamFn(baseUrl) {
182
+ const chatUrl = resolveOllamaChatUrl(baseUrl);
183
+ return (model, context, options) => {
184
+ const stream = createAssistantMessageEventStream();
185
+ const run = async () => {
186
+ try {
187
+ const ollamaMessages = convertToOllamaMessages(context.messages ?? [], context.systemPrompt);
188
+ const ollamaTools = extractOllamaTools(context.tools);
189
+ // Ollama defaults to num_ctx=4096 which is too small for large
190
+ // system prompts + many tool definitions. Use model's contextWindow.
191
+ const ollamaOptions = { num_ctx: model.contextWindow ?? 65536 };
192
+ if (typeof options?.temperature === "number") {
193
+ ollamaOptions.temperature = options.temperature;
194
+ }
195
+ if (typeof options?.maxTokens === "number") {
196
+ ollamaOptions.num_predict = options.maxTokens;
197
+ }
198
+ const body = {
199
+ model: model.id,
200
+ messages: ollamaMessages,
201
+ stream: true,
202
+ ...(ollamaTools.length > 0 ? { tools: ollamaTools } : {}),
203
+ options: ollamaOptions,
204
+ };
205
+ const headers = {
206
+ "Content-Type": "application/json",
207
+ ...options?.headers,
208
+ };
209
+ if (options?.apiKey) {
210
+ headers.Authorization = `Bearer ${options.apiKey}`;
211
+ }
212
+ const response = await fetch(chatUrl, {
213
+ method: "POST",
214
+ headers,
215
+ body: JSON.stringify(body),
216
+ signal: options?.signal,
217
+ });
218
+ if (!response.ok) {
219
+ const errorText = await response.text().catch(() => "unknown error");
220
+ throw new Error(`Ollama API error ${response.status}: ${errorText}`);
221
+ }
222
+ if (!response.body) {
223
+ throw new Error("Ollama API returned empty response body");
224
+ }
225
+ const reader = response.body.getReader();
226
+ let accumulatedContent = "";
227
+ const accumulatedToolCalls = [];
228
+ let finalResponse;
229
+ for await (const chunk of parseNdjsonStream(reader)) {
230
+ if (chunk.message?.content) {
231
+ accumulatedContent += chunk.message.content;
232
+ }
233
+ // Ollama sends tool_calls in intermediate (done:false) chunks,
234
+ // NOT in the final done:true chunk. Collect from all chunks.
235
+ if (chunk.message?.tool_calls) {
236
+ accumulatedToolCalls.push(...chunk.message.tool_calls);
237
+ }
238
+ if (chunk.done) {
239
+ finalResponse = chunk;
240
+ break;
241
+ }
242
+ }
243
+ if (!finalResponse) {
244
+ throw new Error("Ollama API stream ended without a final response");
245
+ }
246
+ finalResponse.message.content = accumulatedContent;
247
+ if (accumulatedToolCalls.length > 0) {
248
+ finalResponse.message.tool_calls = accumulatedToolCalls;
249
+ }
250
+ const assistantMessage = buildAssistantMessage(finalResponse, {
251
+ api: model.api,
252
+ provider: model.provider,
253
+ id: model.id,
254
+ });
255
+ const reason = assistantMessage.stopReason === "toolUse" ? "toolUse" : "stop";
256
+ stream.push({
257
+ type: "done",
258
+ reason,
259
+ message: assistantMessage,
260
+ });
261
+ }
262
+ catch (err) {
263
+ const errorMessage = err instanceof Error ? err.message : String(err);
264
+ stream.push({
265
+ type: "error",
266
+ reason: "error",
267
+ error: {
268
+ role: "assistant",
269
+ content: [],
270
+ stopReason: "error",
271
+ errorMessage,
272
+ api: model.api,
273
+ provider: model.provider,
274
+ model: model.id,
275
+ usage: {
276
+ input: 0,
277
+ output: 0,
278
+ cacheRead: 0,
279
+ cacheWrite: 0,
280
+ totalTokens: 0,
281
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
282
+ },
283
+ timestamp: Date.now(),
284
+ },
285
+ });
286
+ }
287
+ finally {
288
+ stream.end();
289
+ }
290
+ };
291
+ queueMicrotask(() => void run());
292
+ return stream;
293
+ };
294
+ }
@@ -0,0 +1,5 @@
1
+ import { withTimeout } from "../../node-host/with-timeout.js";
2
+ export const EMBEDDED_COMPACTION_TIMEOUT_MS = 300_000;
3
+ export async function compactWithSafetyTimeout(compact, timeoutMs = EMBEDDED_COMPACTION_TIMEOUT_MS) {
4
+ return await withTimeout(() => compact(), timeoutMs, "Compaction");
5
+ }
@@ -0,0 +1,27 @@
1
+ export function shouldFlagCompactionTimeout(signal) {
2
+ if (!signal.isTimeout) {
3
+ return false;
4
+ }
5
+ return signal.isCompactionPendingOrRetrying || signal.isCompactionInFlight;
6
+ }
7
+ export function selectCompactionTimeoutSnapshot(params) {
8
+ if (!params.timedOutDuringCompaction) {
9
+ return {
10
+ messagesSnapshot: params.currentSnapshot,
11
+ sessionIdUsed: params.currentSessionId,
12
+ source: "current",
13
+ };
14
+ }
15
+ if (params.preCompactionSnapshot) {
16
+ return {
17
+ messagesSnapshot: params.preCompactionSnapshot,
18
+ sessionIdUsed: params.preCompactionSessionId,
19
+ source: "pre-compaction",
20
+ };
21
+ }
22
+ return {
23
+ messagesSnapshot: params.currentSnapshot,
24
+ sessionIdUsed: params.currentSessionId,
25
+ source: "current",
26
+ };
27
+ }
@@ -0,0 +1,29 @@
1
+ export const DEFAULT_WAIT_FOR_IDLE_TIMEOUT_MS = 30_000;
2
+ async function waitForAgentIdleBestEffort(agent, timeoutMs) {
3
+ const waitForIdle = agent?.waitForIdle;
4
+ if (typeof waitForIdle !== "function") {
5
+ return;
6
+ }
7
+ let timeoutHandle;
8
+ try {
9
+ await Promise.race([
10
+ waitForIdle.call(agent),
11
+ new Promise((resolve) => {
12
+ timeoutHandle = setTimeout(resolve, timeoutMs);
13
+ timeoutHandle.unref?.();
14
+ }),
15
+ ]);
16
+ }
17
+ catch {
18
+ // Best-effort during cleanup.
19
+ }
20
+ finally {
21
+ if (timeoutHandle) {
22
+ clearTimeout(timeoutHandle);
23
+ }
24
+ }
25
+ }
26
+ export async function flushPendingToolResultsAfterIdle(opts) {
27
+ await waitForAgentIdleBestEffort(opts.agent, opts.timeoutMs ?? DEFAULT_WAIT_FOR_IDLE_TIMEOUT_MS);
28
+ opts.sessionManager?.flushPendingToolResults?.();
29
+ }