@link-assistant/agent 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/auth/plugins.ts +4 -1
- package/src/cli/argv.ts +11 -0
- package/src/cli/defaults.ts +24 -1
- package/src/cli/model-config.js +165 -54
- package/src/cli/run-options.js +10 -1
- package/src/index.js +6 -1
- package/src/provider/provider.ts +23 -20
- package/src/provider/retry-fetch.ts +98 -12
- package/src/session/compaction.ts +20 -0
- package/src/session/message-v2.ts +9 -0
- package/src/session/prompt.ts +111 -20
- package/src/storage/storage.ts +13 -2
- package/src/util/verbose-fetch.ts +34 -1
package/package.json
CHANGED
package/src/auth/plugins.ts
CHANGED
|
@@ -1542,9 +1542,12 @@ const GooglePlugin: AuthPlugin = {
|
|
|
1542
1542
|
|
|
1543
1543
|
/**
|
|
1544
1544
|
* Check if a response status is retryable (transient error).
|
|
1545
|
+
* Includes 500/502 for intermittent server errors (#231).
|
|
1545
1546
|
*/
|
|
1546
1547
|
const isRetryableStatus = (status: number): boolean => {
|
|
1547
|
-
return
|
|
1548
|
+
return (
|
|
1549
|
+
status === 429 || status === 500 || status === 502 || status === 503
|
|
1550
|
+
);
|
|
1548
1551
|
};
|
|
1549
1552
|
|
|
1550
1553
|
/**
|
package/src/cli/argv.ts
CHANGED
|
@@ -69,3 +69,14 @@ export function getCompactionModelFromProcessArgv(): string | null {
|
|
|
69
69
|
export function getCompactionSafetyMarginFromProcessArgv(): string | null {
|
|
70
70
|
return getArgFromProcessArgv('compaction-safety-margin');
|
|
71
71
|
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Extract --compaction-models argument directly from process.argv
|
|
75
|
+
* The value is a links notation references sequence, e.g.:
|
|
76
|
+
* "(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)"
|
|
77
|
+
* @returns The compaction models argument from CLI or null if not found
|
|
78
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
79
|
+
*/
|
|
80
|
+
export function getCompactionModelsFromProcessArgv(): string | null {
|
|
81
|
+
return getArgFromProcessArgv('compaction-models');
|
|
82
|
+
}
|
package/src/cli/defaults.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
/** Default model used when no `--model` CLI argument is provided. */
|
|
9
|
-
export const DEFAULT_MODEL = 'opencode/
|
|
9
|
+
export const DEFAULT_MODEL = 'opencode/qwen3.6-plus-free';
|
|
10
10
|
|
|
11
11
|
/** Default provider ID extracted from DEFAULT_MODEL. */
|
|
12
12
|
export const DEFAULT_PROVIDER_ID = DEFAULT_MODEL.split('/')[0];
|
|
@@ -23,6 +23,29 @@ export const DEFAULT_MODEL_ID = DEFAULT_MODEL.split('/').slice(1).join('/');
|
|
|
23
23
|
*/
|
|
24
24
|
export const DEFAULT_COMPACTION_MODEL = 'opencode/gpt-5-nano';
|
|
25
25
|
|
|
26
|
+
/**
|
|
27
|
+
* Default compaction models cascade, ordered from smallest/cheapest context to largest.
|
|
28
|
+
* During compaction, the system tries each model in order. If the used context exceeds
|
|
29
|
+
* a model's context limit, it skips to the next larger model. If a model's rate limit
|
|
30
|
+
* is reached, it also skips to the next model.
|
|
31
|
+
* The special value "same" means use the same model as `--model`.
|
|
32
|
+
*
|
|
33
|
+
* Parsed as links notation references sequence (single anonymous link):
|
|
34
|
+
* "(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)"
|
|
35
|
+
*
|
|
36
|
+
* Context limits (approximate):
|
|
37
|
+
* big-pickle: ~200K
|
|
38
|
+
* nemotron-3-super-free: ~262K
|
|
39
|
+
* minimax-m2.5-free: ~200K
|
|
40
|
+
* gpt-5-nano: ~400K
|
|
41
|
+
* qwen3.6-plus-free: ~1M
|
|
42
|
+
* same: (base model's context)
|
|
43
|
+
*
|
|
44
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
45
|
+
*/
|
|
46
|
+
export const DEFAULT_COMPACTION_MODELS =
|
|
47
|
+
'(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)';
|
|
48
|
+
|
|
26
49
|
/**
|
|
27
50
|
* Default compaction safety margin as a percentage of usable context window.
|
|
28
51
|
* Applied only when the compaction model has a context window equal to or smaller
|
package/src/cli/model-config.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
getModelFromProcessArgv,
|
|
3
3
|
getCompactionModelFromProcessArgv,
|
|
4
|
+
getCompactionModelsFromProcessArgv,
|
|
4
5
|
getCompactionSafetyMarginFromProcessArgv,
|
|
5
6
|
} from './argv.ts';
|
|
6
7
|
import { Log } from '../util/log.ts';
|
|
@@ -8,6 +9,7 @@ import {
|
|
|
8
9
|
DEFAULT_PROVIDER_ID,
|
|
9
10
|
DEFAULT_MODEL_ID,
|
|
10
11
|
DEFAULT_COMPACTION_MODEL,
|
|
12
|
+
DEFAULT_COMPACTION_MODELS,
|
|
11
13
|
DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT,
|
|
12
14
|
} from './defaults.ts';
|
|
13
15
|
|
|
@@ -68,29 +70,39 @@ export async function parseModelConfig(argv, outputError, outputStatus) {
|
|
|
68
70
|
modelID,
|
|
69
71
|
}));
|
|
70
72
|
|
|
71
|
-
// Validate that the model exists in the provider (#196)
|
|
72
|
-
//
|
|
73
|
-
//
|
|
73
|
+
// Validate that the model exists in the provider (#196, #231)
|
|
74
|
+
// If user explicitly specified provider/model and the model is not found,
|
|
75
|
+
// fail immediately instead of silently falling back to a different model.
|
|
74
76
|
try {
|
|
75
77
|
const { Provider } = await import('../provider/provider.ts');
|
|
76
78
|
const s = await Provider.state();
|
|
77
79
|
const provider = s.providers[providerID];
|
|
78
80
|
if (provider && !provider.info.models[modelID]) {
|
|
79
|
-
// Provider exists but model doesn't
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
// Provider exists but model doesn't — fail with a clear error (#231)
|
|
82
|
+
// Silent fallback caused kimi-k2.5-free to be routed to minimax-m2.5-free
|
|
83
|
+
const availableModels = Object.keys(provider.info.models).slice(0, 10);
|
|
84
|
+
Log.Default.error(() => ({
|
|
82
85
|
message:
|
|
83
|
-
'model not found in provider
|
|
86
|
+
'model not found in provider — refusing to proceed with explicit provider/model',
|
|
84
87
|
providerID,
|
|
85
88
|
modelID,
|
|
86
89
|
availableModels,
|
|
87
90
|
}));
|
|
91
|
+
throw new Error(
|
|
92
|
+
`Model "${modelID}" not found in provider "${providerID}". ` +
|
|
93
|
+
`Available models include: ${availableModels.join(', ')}. ` +
|
|
94
|
+
`Use --model ${providerID}/<model-id> with a valid model, or omit the provider prefix for auto-resolution.`
|
|
95
|
+
);
|
|
88
96
|
}
|
|
89
97
|
} catch (validationError) {
|
|
90
|
-
//
|
|
91
|
-
|
|
98
|
+
// Re-throw if this is our own validation error (not an infrastructure issue)
|
|
99
|
+
if (validationError?.message?.includes('not found in provider')) {
|
|
100
|
+
throw validationError;
|
|
101
|
+
}
|
|
102
|
+
// For infrastructure errors (e.g. can't load provider state), log and continue
|
|
92
103
|
Log.Default.info(() => ({
|
|
93
|
-
message:
|
|
104
|
+
message:
|
|
105
|
+
'skipping model existence validation due to infrastructure error',
|
|
94
106
|
reason: validationError?.message,
|
|
95
107
|
}));
|
|
96
108
|
}
|
|
@@ -163,20 +175,71 @@ export async function parseModelConfig(argv, outputError, outputStatus) {
|
|
|
163
175
|
return { providerID, modelID, compactionModel: compactionModelResult };
|
|
164
176
|
}
|
|
165
177
|
|
|
178
|
+
/**
|
|
179
|
+
* Parse a links notation references sequence string into an array of model names.
|
|
180
|
+
* Format: "(model1 model2 model3)" — parenthesized space-separated list.
|
|
181
|
+
* @param {string} notation - Links notation sequence string
|
|
182
|
+
* @returns {string[]} Array of model name strings
|
|
183
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
184
|
+
*/
|
|
185
|
+
function parseLinksNotationSequence(notation) {
|
|
186
|
+
const trimmed = notation.trim();
|
|
187
|
+
// Remove surrounding parentheses if present
|
|
188
|
+
const inner =
|
|
189
|
+
trimmed.startsWith('(') && trimmed.endsWith(')')
|
|
190
|
+
? trimmed.slice(1, -1)
|
|
191
|
+
: trimmed;
|
|
192
|
+
// Split on whitespace and filter empty strings
|
|
193
|
+
return inner.split(/\s+/).filter((s) => s.length > 0);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Resolve a single compaction model entry (short name, provider/model, or "same").
|
|
198
|
+
* @returns {{ providerID: string, modelID: string, useSameModel: boolean }}
|
|
199
|
+
*/
|
|
200
|
+
async function resolveCompactionModelEntry(
|
|
201
|
+
modelArg,
|
|
202
|
+
baseProviderID,
|
|
203
|
+
baseModelID
|
|
204
|
+
) {
|
|
205
|
+
const useSameModel = modelArg.toLowerCase() === 'same';
|
|
206
|
+
|
|
207
|
+
if (useSameModel) {
|
|
208
|
+
return {
|
|
209
|
+
providerID: baseProviderID,
|
|
210
|
+
modelID: baseModelID,
|
|
211
|
+
useSameModel: true,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (modelArg.includes('/')) {
|
|
216
|
+
const parts = modelArg.split('/');
|
|
217
|
+
return {
|
|
218
|
+
providerID: parts[0],
|
|
219
|
+
modelID: parts.slice(1).join('/'),
|
|
220
|
+
useSameModel: false,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Short name resolution
|
|
225
|
+
const { Provider } = await import('../provider/provider.ts');
|
|
226
|
+
const resolved = await Provider.parseModelWithResolution(modelArg);
|
|
227
|
+
return {
|
|
228
|
+
providerID: resolved.providerID,
|
|
229
|
+
modelID: resolved.modelID,
|
|
230
|
+
useSameModel: false,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
166
234
|
/**
|
|
167
235
|
* Parse compaction model config from argv.
|
|
168
|
-
*
|
|
236
|
+
* Supports both --compaction-model (single) and --compaction-models (cascade).
|
|
237
|
+
* When --compaction-models is specified, it overrides --compaction-model.
|
|
169
238
|
* The special value "same" means use the base model for compaction.
|
|
170
239
|
* @see https://github.com/link-assistant/agent/issues/219
|
|
240
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
171
241
|
*/
|
|
172
242
|
async function parseCompactionModelConfig(argv, baseProviderID, baseModelID) {
|
|
173
|
-
// Get compaction model from CLI (safeguard against yargs caching)
|
|
174
|
-
const cliCompactionModelArg = getCompactionModelFromProcessArgv();
|
|
175
|
-
const compactionModelArg =
|
|
176
|
-
cliCompactionModelArg ??
|
|
177
|
-
argv['compaction-model'] ??
|
|
178
|
-
DEFAULT_COMPACTION_MODEL;
|
|
179
|
-
|
|
180
243
|
// Get safety margin from CLI
|
|
181
244
|
const cliSafetyMarginArg = getCompactionSafetyMarginFromProcessArgv();
|
|
182
245
|
const compactionSafetyMarginPercent = cliSafetyMarginArg
|
|
@@ -184,49 +247,97 @@ async function parseCompactionModelConfig(argv, baseProviderID, baseModelID) {
|
|
|
184
247
|
: (argv['compaction-safety-margin'] ??
|
|
185
248
|
DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT);
|
|
186
249
|
|
|
187
|
-
//
|
|
188
|
-
const
|
|
250
|
+
// Check for --compaction-models (cascade) first — it overrides --compaction-model
|
|
251
|
+
const cliCompactionModelsArg = getCompactionModelsFromProcessArgv();
|
|
252
|
+
const compactionModelsArg =
|
|
253
|
+
cliCompactionModelsArg ??
|
|
254
|
+
argv['compaction-models'] ??
|
|
255
|
+
DEFAULT_COMPACTION_MODELS;
|
|
189
256
|
|
|
190
|
-
|
|
191
|
-
|
|
257
|
+
// Parse the links notation sequence into an array of model names
|
|
258
|
+
const modelNames = parseLinksNotationSequence(compactionModelsArg);
|
|
259
|
+
|
|
260
|
+
if (modelNames.length > 0) {
|
|
261
|
+
// Resolve each model in the cascade
|
|
262
|
+
const compactionModels = [];
|
|
263
|
+
for (const name of modelNames) {
|
|
264
|
+
try {
|
|
265
|
+
const resolved = await resolveCompactionModelEntry(
|
|
266
|
+
name,
|
|
267
|
+
baseProviderID,
|
|
268
|
+
baseModelID
|
|
269
|
+
);
|
|
270
|
+
compactionModels.push({
|
|
271
|
+
providerID: resolved.providerID,
|
|
272
|
+
modelID: resolved.modelID,
|
|
273
|
+
useSameModel: resolved.useSameModel,
|
|
274
|
+
});
|
|
275
|
+
} catch (err) {
|
|
276
|
+
// If a model can't be resolved, log and skip it
|
|
277
|
+
Log.Default.warn(() => ({
|
|
278
|
+
message: 'skipping unresolvable compaction model in cascade',
|
|
279
|
+
model: name,
|
|
280
|
+
error: err?.message,
|
|
281
|
+
}));
|
|
282
|
+
}
|
|
283
|
+
}
|
|
192
284
|
|
|
193
|
-
if (useSameModel) {
|
|
194
|
-
compactionProviderID = baseProviderID;
|
|
195
|
-
compactionModelID = baseModelID;
|
|
196
|
-
Log.Default.info(() => ({
|
|
197
|
-
message:
|
|
198
|
-
'compaction model set to "same" — using base model for compaction',
|
|
199
|
-
compactionProviderID,
|
|
200
|
-
compactionModelID,
|
|
201
|
-
}));
|
|
202
|
-
} else if (compactionModelArg.includes('/')) {
|
|
203
|
-
const parts = compactionModelArg.split('/');
|
|
204
|
-
compactionProviderID = parts[0];
|
|
205
|
-
compactionModelID = parts.slice(1).join('/');
|
|
206
|
-
Log.Default.info(() => ({
|
|
207
|
-
message: 'using explicit compaction model',
|
|
208
|
-
compactionProviderID,
|
|
209
|
-
compactionModelID,
|
|
210
|
-
}));
|
|
211
|
-
} else {
|
|
212
|
-
// Short name resolution
|
|
213
|
-
const { Provider } = await import('../provider/provider.ts');
|
|
214
|
-
const resolved =
|
|
215
|
-
await Provider.parseModelWithResolution(compactionModelArg);
|
|
216
|
-
compactionProviderID = resolved.providerID;
|
|
217
|
-
compactionModelID = resolved.modelID;
|
|
218
285
|
Log.Default.info(() => ({
|
|
219
|
-
message: '
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
286
|
+
message: 'compaction models cascade configured',
|
|
287
|
+
models: compactionModels.map((m) =>
|
|
288
|
+
m.useSameModel ? 'same' : `${m.providerID}/${m.modelID}`
|
|
289
|
+
),
|
|
290
|
+
source: cliCompactionModelsArg ? 'cli' : 'default',
|
|
223
291
|
}));
|
|
292
|
+
|
|
293
|
+
// Use the first model as the primary compaction model (for backward compatibility)
|
|
294
|
+
// The full cascade is stored in compactionModels array
|
|
295
|
+
const primary = compactionModels[0] || {
|
|
296
|
+
providerID: baseProviderID,
|
|
297
|
+
modelID: baseModelID,
|
|
298
|
+
useSameModel: true,
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
return {
|
|
302
|
+
providerID: primary.providerID,
|
|
303
|
+
modelID: primary.modelID,
|
|
304
|
+
useSameModel: primary.useSameModel,
|
|
305
|
+
compactionSafetyMarginPercent,
|
|
306
|
+
compactionModels,
|
|
307
|
+
};
|
|
224
308
|
}
|
|
225
309
|
|
|
310
|
+
// Fallback to single --compaction-model
|
|
311
|
+
const cliCompactionModelArg = getCompactionModelFromProcessArgv();
|
|
312
|
+
const compactionModelArg =
|
|
313
|
+
cliCompactionModelArg ??
|
|
314
|
+
argv['compaction-model'] ??
|
|
315
|
+
DEFAULT_COMPACTION_MODEL;
|
|
316
|
+
|
|
317
|
+
const resolved = await resolveCompactionModelEntry(
|
|
318
|
+
compactionModelArg,
|
|
319
|
+
baseProviderID,
|
|
320
|
+
baseModelID
|
|
321
|
+
);
|
|
322
|
+
|
|
323
|
+
Log.Default.info(() => ({
|
|
324
|
+
message: 'using single compaction model',
|
|
325
|
+
compactionProviderID: resolved.providerID,
|
|
326
|
+
compactionModelID: resolved.modelID,
|
|
327
|
+
useSameModel: resolved.useSameModel,
|
|
328
|
+
}));
|
|
329
|
+
|
|
226
330
|
return {
|
|
227
|
-
providerID:
|
|
228
|
-
modelID:
|
|
229
|
-
useSameModel,
|
|
331
|
+
providerID: resolved.providerID,
|
|
332
|
+
modelID: resolved.modelID,
|
|
333
|
+
useSameModel: resolved.useSameModel,
|
|
230
334
|
compactionSafetyMarginPercent,
|
|
335
|
+
compactionModels: [
|
|
336
|
+
{
|
|
337
|
+
providerID: resolved.providerID,
|
|
338
|
+
modelID: resolved.modelID,
|
|
339
|
+
useSameModel: resolved.useSameModel,
|
|
340
|
+
},
|
|
341
|
+
],
|
|
231
342
|
};
|
|
232
343
|
}
|
package/src/cli/run-options.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
DEFAULT_MODEL,
|
|
3
3
|
DEFAULT_COMPACTION_MODEL,
|
|
4
|
+
DEFAULT_COMPACTION_MODELS,
|
|
4
5
|
DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT,
|
|
5
6
|
} from './defaults.ts';
|
|
6
7
|
|
|
@@ -151,9 +152,17 @@ export function buildRunOptions(yargs) {
|
|
|
151
152
|
.option('compaction-model', {
|
|
152
153
|
type: 'string',
|
|
153
154
|
description:
|
|
154
|
-
'Model to use for context compaction in format providerID/modelID. Use "same" to use the base model. Default: opencode/gpt-5-nano (free, 400K context).',
|
|
155
|
+
'Model to use for context compaction in format providerID/modelID. Use "same" to use the base model. Default: opencode/gpt-5-nano (free, 400K context). Overridden by --compaction-models if both are specified.',
|
|
155
156
|
default: DEFAULT_COMPACTION_MODEL,
|
|
156
157
|
})
|
|
158
|
+
.option('compaction-models', {
|
|
159
|
+
type: 'string',
|
|
160
|
+
description:
|
|
161
|
+
'Ordered cascade of compaction models in links notation sequence format: "(model1 model2 ... same)". ' +
|
|
162
|
+
"Models are tried from smallest/cheapest context to largest. If used context exceeds a model's limit or its rate limit is reached, the next model is tried. " +
|
|
163
|
+
'The special value "same" uses the base model. Overrides --compaction-model when specified.',
|
|
164
|
+
default: DEFAULT_COMPACTION_MODELS,
|
|
165
|
+
})
|
|
157
166
|
.option('compaction-safety-margin', {
|
|
158
167
|
type: 'number',
|
|
159
168
|
description:
|
package/src/index.js
CHANGED
|
@@ -27,7 +27,10 @@ import { McpCommand } from './cli/cmd/mcp.ts';
|
|
|
27
27
|
import { AuthCommand } from './cli/cmd/auth.ts';
|
|
28
28
|
import { FormatError } from './cli/error.ts';
|
|
29
29
|
import { UI } from './cli/ui.ts';
|
|
30
|
-
import {
|
|
30
|
+
import {
|
|
31
|
+
createVerboseFetch,
|
|
32
|
+
registerPendingStreamLogExitHandler,
|
|
33
|
+
} from './util/verbose-fetch.ts';
|
|
31
34
|
import {
|
|
32
35
|
runContinuousServerMode,
|
|
33
36
|
runContinuousDirectMode,
|
|
@@ -822,6 +825,8 @@ async function main() {
|
|
|
822
825
|
caller: 'global',
|
|
823
826
|
});
|
|
824
827
|
globalThis.__agentVerboseFetchInstalled = true;
|
|
828
|
+
// Register handler to warn about pending stream logs at process exit (#231)
|
|
829
|
+
registerPendingStreamLogExitHandler();
|
|
825
830
|
}
|
|
826
831
|
})
|
|
827
832
|
.fail((msg, err, yargs) => {
|
package/src/provider/provider.ts
CHANGED
|
@@ -1623,35 +1623,30 @@ export namespace Provider {
|
|
|
1623
1623
|
}
|
|
1624
1624
|
|
|
1625
1625
|
if (!isSyntheticProvider && !info) {
|
|
1626
|
-
//
|
|
1627
|
-
//
|
|
1626
|
+
// Model not found even after cache refresh — fail with a clear error (#231)
|
|
1627
|
+
// Previously this created synthetic fallback info, which allowed the API call
|
|
1628
|
+
// to proceed with the wrong model (e.g., kimi-k2.5-free routed to minimax-m2.5-free)
|
|
1628
1629
|
const availableInProvider = Object.keys(provider.info.models).slice(
|
|
1629
1630
|
0,
|
|
1630
1631
|
10
|
|
1631
1632
|
);
|
|
1632
|
-
log.
|
|
1633
|
+
log.error(() => ({
|
|
1633
1634
|
message:
|
|
1634
|
-
'model not in provider catalog after refresh
|
|
1635
|
+
'model not found in provider catalog after refresh — refusing to proceed',
|
|
1635
1636
|
providerID,
|
|
1636
1637
|
modelID,
|
|
1637
1638
|
availableModels: availableInProvider,
|
|
1638
1639
|
totalModels: Object.keys(provider.info.models).length,
|
|
1639
1640
|
}));
|
|
1640
1641
|
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
temperature: true,
|
|
1650
|
-
tool_call: true,
|
|
1651
|
-
cost: { input: 0, output: 0 },
|
|
1652
|
-
limit: { context: 128000, output: 16384 },
|
|
1653
|
-
options: {},
|
|
1654
|
-
} as ModelsDev.Model;
|
|
1642
|
+
throw new ModelNotFoundError({
|
|
1643
|
+
providerID,
|
|
1644
|
+
modelID,
|
|
1645
|
+
suggestion:
|
|
1646
|
+
`Model "${modelID}" not found in provider "${providerID}" (checked ${Object.keys(provider.info.models).length} models). ` +
|
|
1647
|
+
`Available models include: ${availableInProvider.join(', ')}. ` +
|
|
1648
|
+
`Use --model ${providerID}/<model-id> with a valid model.`,
|
|
1649
|
+
});
|
|
1655
1650
|
}
|
|
1656
1651
|
|
|
1657
1652
|
try {
|
|
@@ -1736,7 +1731,13 @@ export namespace Provider {
|
|
|
1736
1731
|
priority = priority.filter((m) => m !== 'claude-haiku-4.5');
|
|
1737
1732
|
}
|
|
1738
1733
|
if (providerID === 'opencode' || providerID === 'local') {
|
|
1739
|
-
priority = [
|
|
1734
|
+
priority = [
|
|
1735
|
+
'qwen3.6-plus-free',
|
|
1736
|
+
'minimax-m2.5-free',
|
|
1737
|
+
'gpt-5-nano',
|
|
1738
|
+
'nemotron-3-super-free',
|
|
1739
|
+
'big-pickle',
|
|
1740
|
+
];
|
|
1740
1741
|
}
|
|
1741
1742
|
if (providerID === 'kilo') {
|
|
1742
1743
|
priority = [
|
|
@@ -1763,7 +1764,9 @@ export namespace Provider {
|
|
|
1763
1764
|
}
|
|
1764
1765
|
|
|
1765
1766
|
const priority = [
|
|
1767
|
+
'qwen3.6-plus-free',
|
|
1766
1768
|
'glm-5-free',
|
|
1769
|
+
'nemotron-3-super-free',
|
|
1767
1770
|
'minimax-m2.5-free',
|
|
1768
1771
|
'gpt-5-nano',
|
|
1769
1772
|
'big-pickle',
|
|
@@ -1846,7 +1849,7 @@ export namespace Provider {
|
|
|
1846
1849
|
* 1. If model is uniquely available in one provider, use that provider
|
|
1847
1850
|
* 2. If model is available in multiple providers, prioritize based on free model availability:
|
|
1848
1851
|
* - kilo: glm-5-free, glm-4.5-air-free, minimax-m2.5-free, giga-potato-free, deepseek-r1-free (unique to Kilo)
|
|
1849
|
-
* - opencode: big-pickle, gpt-5-nano (unique to OpenCode)
|
|
1852
|
+
* - opencode: big-pickle, gpt-5-nano, qwen3.6-plus-free, nemotron-3-super-free (unique to OpenCode)
|
|
1850
1853
|
* 3. For shared models, prefer OpenCode first, then fall back to Kilo on rate limit
|
|
1851
1854
|
*
|
|
1852
1855
|
* @param modelID - Short model name without provider prefix
|
|
@@ -2,10 +2,11 @@ import { Log } from '../util/log';
|
|
|
2
2
|
import { config } from '../config/config';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
|
-
* Custom fetch wrapper that handles rate limits (HTTP 429)
|
|
5
|
+
* Custom fetch wrapper that handles rate limits (HTTP 429) and server errors (HTTP 5xx)
|
|
6
|
+
* using time-based retry logic.
|
|
6
7
|
*
|
|
7
|
-
* This wrapper intercepts 429 responses at the HTTP level before the AI SDK's
|
|
8
|
-
* retry mechanism can interfere. It respects:
|
|
8
|
+
* This wrapper intercepts 429 and 5xx responses at the HTTP level before the AI SDK's
|
|
9
|
+
* internal retry mechanism can interfere. It respects:
|
|
9
10
|
* - retry-after headers (both seconds and HTTP date formats)
|
|
10
11
|
* - retry-after-ms header for millisecond precision
|
|
11
12
|
* - LINK_ASSISTANT_AGENT_RETRY_TIMEOUT for global time-based retry limit
|
|
@@ -15,10 +16,12 @@ import { config } from '../config/config';
|
|
|
15
16
|
* The AI SDK's internal retry uses a fixed count (default 3 attempts) and ignores
|
|
16
17
|
* retry-after headers. When providers return long retry-after values (e.g., 64 minutes),
|
|
17
18
|
* the SDK exhausts its retries before the agent can properly wait.
|
|
19
|
+
* Additionally, server errors (500, 502, 503) from providers like OpenCode API were not
|
|
20
|
+
* retried, causing compaction cycles to be lost silently.
|
|
18
21
|
*
|
|
19
22
|
* Solution:
|
|
20
|
-
* By wrapping fetch, we handle rate limits at the HTTP layer with
|
|
21
|
-
* ensuring the agent's 7-week global timeout is respected.
|
|
23
|
+
* By wrapping fetch, we handle rate limits and server errors at the HTTP layer with
|
|
24
|
+
* time-based retries, ensuring the agent's 7-week global timeout is respected.
|
|
22
25
|
*
|
|
23
26
|
* Important: Rate limit waits use ISOLATED AbortControllers that are NOT subject to
|
|
24
27
|
* provider/stream timeouts. This prevents long rate limit waits (e.g., 15 hours) from
|
|
@@ -26,6 +29,7 @@ import { config } from '../config/config';
|
|
|
26
29
|
*
|
|
27
30
|
* @see https://github.com/link-assistant/agent/issues/167
|
|
28
31
|
* @see https://github.com/link-assistant/agent/issues/183
|
|
32
|
+
* @see https://github.com/link-assistant/agent/issues/231
|
|
29
33
|
* @see https://github.com/vercel/ai/issues/12585
|
|
30
34
|
*/
|
|
31
35
|
|
|
@@ -37,6 +41,20 @@ export namespace RetryFetch {
|
|
|
37
41
|
const RETRY_BACKOFF_FACTOR = 2;
|
|
38
42
|
const RETRY_MAX_DELAY_NO_HEADERS = 30_000;
|
|
39
43
|
|
|
44
|
+
// Maximum number of retries for server errors (5xx) — unlike rate limits (429)
|
|
45
|
+
// which retry indefinitely within the global timeout, server errors use a fixed
|
|
46
|
+
// retry count to avoid retrying permanently broken endpoints (#231)
|
|
47
|
+
const SERVER_ERROR_MAX_RETRIES = 3;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Check if an HTTP status code is a retryable server error.
|
|
51
|
+
* Retries on 500 (Internal Server Error), 502 (Bad Gateway), and 503 (Service Unavailable).
|
|
52
|
+
* @see https://github.com/link-assistant/agent/issues/231
|
|
53
|
+
*/
|
|
54
|
+
function isRetryableServerError(status: number): boolean {
|
|
55
|
+
return status === 500 || status === 502 || status === 503;
|
|
56
|
+
}
|
|
57
|
+
|
|
40
58
|
// Minimum retry interval to prevent rapid retries (default: 30 seconds)
|
|
41
59
|
// Can be configured via AGENT_MIN_RETRY_INTERVAL env var
|
|
42
60
|
function getMinRetryInterval(): number {
|
|
@@ -298,19 +316,20 @@ export namespace RetryFetch {
|
|
|
298
316
|
};
|
|
299
317
|
|
|
300
318
|
/**
|
|
301
|
-
* Create a fetch function that handles rate limits with
|
|
319
|
+
* Create a fetch function that handles rate limits and server errors with retry logic.
|
|
302
320
|
*
|
|
303
321
|
* This wrapper:
|
|
304
|
-
* 1. Intercepts HTTP 429 responses
|
|
305
|
-
* 2.
|
|
306
|
-
* 3.
|
|
307
|
-
* 4.
|
|
322
|
+
* 1. Intercepts HTTP 429 (rate limit) responses — retries with retry-after headers
|
|
323
|
+
* 2. Intercepts HTTP 500/502/503 (server error) responses — retries up to SERVER_ERROR_MAX_RETRIES
|
|
324
|
+
* 3. Parses retry-after headers for 429 responses
|
|
325
|
+
* 4. Uses exponential backoff for server errors and network errors
|
|
326
|
+
* 5. Respects global LINK_ASSISTANT_AGENT_RETRY_TIMEOUT for all retries
|
|
308
327
|
*
|
|
309
328
|
* If retry-after exceeds LINK_ASSISTANT_AGENT_RETRY_TIMEOUT, the original 429 response is returned
|
|
310
329
|
* to let higher-level error handling take over.
|
|
311
330
|
*
|
|
312
331
|
* @param options Configuration options
|
|
313
|
-
* @returns A fetch function with rate limit retry handling
|
|
332
|
+
* @returns A fetch function with rate limit and server error retry handling
|
|
314
333
|
*/
|
|
315
334
|
export function create(options: RetryFetchOptions = {}): typeof fetch {
|
|
316
335
|
const baseFetch = options.baseFetch ?? fetch;
|
|
@@ -365,7 +384,74 @@ export namespace RetryFetch {
|
|
|
365
384
|
throw error;
|
|
366
385
|
}
|
|
367
386
|
|
|
368
|
-
//
|
|
387
|
+
// Handle retryable server errors (500, 502, 503) with limited retries (#231)
|
|
388
|
+
// Unlike rate limits (429) which retry indefinitely within timeout,
|
|
389
|
+
// server errors use a fixed count to avoid retrying broken endpoints.
|
|
390
|
+
if (isRetryableServerError(response.status)) {
|
|
391
|
+
if (attempt > SERVER_ERROR_MAX_RETRIES) {
|
|
392
|
+
// Read response body for diagnostics before returning (#231)
|
|
393
|
+
// This ensures the actual server error is visible in logs,
|
|
394
|
+
// preventing misleading downstream errors like "input_tokens undefined"
|
|
395
|
+
let errorBody = '';
|
|
396
|
+
try {
|
|
397
|
+
errorBody = await response.clone().text();
|
|
398
|
+
} catch {
|
|
399
|
+
errorBody = '<failed to read response body>';
|
|
400
|
+
}
|
|
401
|
+
log.warn(() => ({
|
|
402
|
+
message:
|
|
403
|
+
'server error max retries exceeded, returning error response',
|
|
404
|
+
sessionID,
|
|
405
|
+
status: response.status,
|
|
406
|
+
attempt,
|
|
407
|
+
maxRetries: SERVER_ERROR_MAX_RETRIES,
|
|
408
|
+
responseBody: errorBody.slice(0, 500),
|
|
409
|
+
}));
|
|
410
|
+
return response;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
const elapsed = Date.now() - startTime;
|
|
414
|
+
if (elapsed >= maxRetryTimeout) {
|
|
415
|
+
let errorBody = '';
|
|
416
|
+
try {
|
|
417
|
+
errorBody = await response.clone().text();
|
|
418
|
+
} catch {
|
|
419
|
+
errorBody = '<failed to read response body>';
|
|
420
|
+
}
|
|
421
|
+
log.warn(() => ({
|
|
422
|
+
message:
|
|
423
|
+
'retry timeout exceeded for server error, returning error response',
|
|
424
|
+
sessionID,
|
|
425
|
+
status: response.status,
|
|
426
|
+
elapsedMs: elapsed,
|
|
427
|
+
maxRetryTimeoutMs: maxRetryTimeout,
|
|
428
|
+
responseBody: errorBody.slice(0, 500),
|
|
429
|
+
}));
|
|
430
|
+
return response;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Use exponential backoff for server errors (no retry-after expected)
|
|
434
|
+
const delay = addJitter(
|
|
435
|
+
Math.min(
|
|
436
|
+
RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1),
|
|
437
|
+
Math.min(maxBackoffDelay, RETRY_MAX_DELAY_NO_HEADERS)
|
|
438
|
+
)
|
|
439
|
+
);
|
|
440
|
+
|
|
441
|
+
log.info(() => ({
|
|
442
|
+
message: 'server error, will retry',
|
|
443
|
+
sessionID,
|
|
444
|
+
status: response.status,
|
|
445
|
+
attempt,
|
|
446
|
+
maxRetries: SERVER_ERROR_MAX_RETRIES,
|
|
447
|
+
delayMs: delay,
|
|
448
|
+
}));
|
|
449
|
+
|
|
450
|
+
await sleep(delay, init?.signal ?? undefined);
|
|
451
|
+
continue;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Only handle rate limit errors (429) beyond this point
|
|
369
455
|
if (response.status !== 429) {
|
|
370
456
|
return response;
|
|
371
457
|
}
|
|
@@ -36,15 +36,35 @@ export namespace SessionCompaction {
|
|
|
36
36
|
*/
|
|
37
37
|
export const OVERFLOW_SAFETY_MARGIN = 0.85;
|
|
38
38
|
|
|
39
|
+
/**
|
|
40
|
+
* A single compaction model entry in the cascade.
|
|
41
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
42
|
+
*/
|
|
43
|
+
export interface CompactionModelEntry {
|
|
44
|
+
providerID: string;
|
|
45
|
+
modelID: string;
|
|
46
|
+
useSameModel: boolean;
|
|
47
|
+
}
|
|
48
|
+
|
|
39
49
|
/**
|
|
40
50
|
* Compaction model configuration passed from CLI.
|
|
51
|
+
* Supports both single model (backward compat) and cascade of models (#232).
|
|
41
52
|
* @see https://github.com/link-assistant/agent/issues/219
|
|
53
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
42
54
|
*/
|
|
43
55
|
export interface CompactionModelConfig {
|
|
44
56
|
providerID: string;
|
|
45
57
|
modelID: string;
|
|
46
58
|
useSameModel: boolean;
|
|
47
59
|
compactionSafetyMarginPercent: number;
|
|
60
|
+
/**
|
|
61
|
+
* Ordered cascade of compaction models from smallest/cheapest to largest.
|
|
62
|
+
* When present, the system tries each model in order during compaction.
|
|
63
|
+
* If used context exceeds a model's limit or its rate limit is reached,
|
|
64
|
+
* the next model is tried.
|
|
65
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
66
|
+
*/
|
|
67
|
+
compactionModels?: CompactionModelEntry[];
|
|
48
68
|
}
|
|
49
69
|
|
|
50
70
|
/**
|
|
@@ -398,6 +398,15 @@ export namespace MessageV2 {
|
|
|
398
398
|
modelID: z.string(),
|
|
399
399
|
useSameModel: z.boolean(),
|
|
400
400
|
compactionSafetyMarginPercent: z.number(),
|
|
401
|
+
compactionModels: z
|
|
402
|
+
.array(
|
|
403
|
+
z.object({
|
|
404
|
+
providerID: z.string(),
|
|
405
|
+
modelID: z.string(),
|
|
406
|
+
useSameModel: z.boolean(),
|
|
407
|
+
})
|
|
408
|
+
)
|
|
409
|
+
.optional(),
|
|
401
410
|
})
|
|
402
411
|
.optional(),
|
|
403
412
|
system: z.string().optional(),
|
package/src/session/prompt.ts
CHANGED
|
@@ -95,6 +95,15 @@ export namespace SessionPrompt {
|
|
|
95
95
|
modelID: z.string(),
|
|
96
96
|
useSameModel: z.boolean(),
|
|
97
97
|
compactionSafetyMarginPercent: z.number(),
|
|
98
|
+
compactionModels: z
|
|
99
|
+
.array(
|
|
100
|
+
z.object({
|
|
101
|
+
providerID: z.string(),
|
|
102
|
+
modelID: z.string(),
|
|
103
|
+
useSameModel: z.boolean(),
|
|
104
|
+
})
|
|
105
|
+
)
|
|
106
|
+
.optional(),
|
|
98
107
|
})
|
|
99
108
|
.optional(),
|
|
100
109
|
agent: z.string().optional(),
|
|
@@ -542,27 +551,109 @@ export namespace SessionPrompt {
|
|
|
542
551
|
|
|
543
552
|
// pending compaction
|
|
544
553
|
if (task?.type === 'compaction') {
|
|
545
|
-
// Use compaction model if configured
|
|
554
|
+
// Use compaction model cascade if configured (#232)
|
|
546
555
|
const compactionModelConfig = lastUser.compactionModel;
|
|
547
|
-
const
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
556
|
+
const cascade = compactionModelConfig?.compactionModels;
|
|
557
|
+
|
|
558
|
+
if (cascade && cascade.length > 0) {
|
|
559
|
+
// Cascade logic: try each model in order (smallest to largest context)
|
|
560
|
+
// Skip models whose context limit is smaller than current used tokens
|
|
561
|
+
// Skip models that hit rate limits (try next)
|
|
562
|
+
const currentTokens = lastFinished
|
|
563
|
+
? lastFinished.tokens.input +
|
|
564
|
+
lastFinished.tokens.cache.read +
|
|
565
|
+
lastFinished.tokens.output
|
|
566
|
+
: 0;
|
|
567
|
+
|
|
568
|
+
let compactionResult = 'stop';
|
|
569
|
+
for (const entry of cascade) {
|
|
570
|
+
const entryProviderID = entry.useSameModel
|
|
571
|
+
? model.providerID
|
|
572
|
+
: entry.providerID;
|
|
573
|
+
const entryModelID = entry.useSameModel
|
|
574
|
+
? model.modelID
|
|
575
|
+
: entry.modelID;
|
|
576
|
+
|
|
577
|
+
// Check if this model's context is large enough for the current tokens
|
|
578
|
+
if (!entry.useSameModel) {
|
|
579
|
+
try {
|
|
580
|
+
const entryModel = await Provider.getModel(
|
|
581
|
+
entryProviderID,
|
|
582
|
+
entryModelID
|
|
583
|
+
);
|
|
584
|
+
const entryContextLimit = entryModel.info?.limit?.context ?? 0;
|
|
585
|
+
if (
|
|
586
|
+
entryContextLimit > 0 &&
|
|
587
|
+
currentTokens > entryContextLimit
|
|
588
|
+
) {
|
|
589
|
+
log.info(() => ({
|
|
590
|
+
message:
|
|
591
|
+
'skipping compaction model — context too small for current tokens',
|
|
592
|
+
modelID: entryModelID,
|
|
593
|
+
providerID: entryProviderID,
|
|
594
|
+
contextLimit: entryContextLimit,
|
|
595
|
+
currentTokens,
|
|
596
|
+
}));
|
|
597
|
+
continue;
|
|
598
|
+
}
|
|
599
|
+
} catch {
|
|
600
|
+
log.info(() => ({
|
|
601
|
+
message:
|
|
602
|
+
'could not resolve compaction cascade model — skipping',
|
|
603
|
+
modelID: entryModelID,
|
|
604
|
+
providerID: entryProviderID,
|
|
605
|
+
}));
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
try {
|
|
611
|
+
compactionResult = await SessionCompaction.process({
|
|
612
|
+
messages: msgs,
|
|
613
|
+
parentID: lastUser.id,
|
|
614
|
+
abort,
|
|
615
|
+
model: {
|
|
616
|
+
providerID: entryProviderID,
|
|
617
|
+
modelID: entryModelID,
|
|
618
|
+
},
|
|
619
|
+
sessionID,
|
|
620
|
+
});
|
|
621
|
+
// If compaction succeeded, break the cascade
|
|
622
|
+
break;
|
|
623
|
+
} catch (err) {
|
|
624
|
+
// If rate limited or error, try next model in cascade
|
|
625
|
+
log.warn(() => ({
|
|
626
|
+
message: 'compaction model failed — trying next in cascade',
|
|
627
|
+
modelID: entryModelID,
|
|
628
|
+
providerID: entryProviderID,
|
|
629
|
+
error: err?.message,
|
|
630
|
+
}));
|
|
631
|
+
continue;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
if (compactionResult === 'stop') break;
|
|
635
|
+
} else {
|
|
636
|
+
// Single model fallback (backward compatibility)
|
|
637
|
+
const compactionProviderID =
|
|
638
|
+
compactionModelConfig && !compactionModelConfig.useSameModel
|
|
639
|
+
? compactionModelConfig.providerID
|
|
640
|
+
: model.providerID;
|
|
641
|
+
const compactionModelID =
|
|
642
|
+
compactionModelConfig && !compactionModelConfig.useSameModel
|
|
643
|
+
? compactionModelConfig.modelID
|
|
644
|
+
: model.modelID;
|
|
645
|
+
const result = await SessionCompaction.process({
|
|
646
|
+
messages: msgs,
|
|
647
|
+
parentID: lastUser.id,
|
|
648
|
+
abort,
|
|
649
|
+
model: {
|
|
650
|
+
providerID: compactionProviderID,
|
|
651
|
+
modelID: compactionModelID,
|
|
652
|
+
},
|
|
653
|
+
sessionID,
|
|
654
|
+
});
|
|
655
|
+
if (result === 'stop') break;
|
|
656
|
+
}
|
|
566
657
|
continue;
|
|
567
658
|
}
|
|
568
659
|
|
package/src/storage/storage.ts
CHANGED
|
@@ -180,8 +180,19 @@ export namespace Storage {
|
|
|
180
180
|
for (let index = migration; index < MIGRATIONS.length; index++) {
|
|
181
181
|
log.info(() => ({ message: 'running migration', index }));
|
|
182
182
|
const migration = MIGRATIONS[index];
|
|
183
|
-
await migration(dir).catch(() =>
|
|
184
|
-
log.error(() => ({
|
|
183
|
+
await migration(dir).catch((migrationError) =>
|
|
184
|
+
log.error(() => ({
|
|
185
|
+
message: 'failed to run migration',
|
|
186
|
+
index,
|
|
187
|
+
error:
|
|
188
|
+
migrationError instanceof Error
|
|
189
|
+
? {
|
|
190
|
+
name: migrationError.name,
|
|
191
|
+
message: migrationError.message,
|
|
192
|
+
stack: migrationError.stack,
|
|
193
|
+
}
|
|
194
|
+
: String(migrationError),
|
|
195
|
+
}))
|
|
185
196
|
);
|
|
186
197
|
await Bun.write(path.join(dir, 'migration'), (index + 1).toString());
|
|
187
198
|
}
|
|
@@ -24,6 +24,21 @@ const log = Log.create({ service: 'http' });
|
|
|
24
24
|
/** Global call counter shared across all verbose fetch wrappers */
|
|
25
25
|
let globalHttpCallCount = 0;
|
|
26
26
|
|
|
27
|
+
/**
|
|
28
|
+
* Track pending async stream log operations (#231).
|
|
29
|
+
* When the process exits while stream logging is in progress, we log a warning
|
|
30
|
+
* so missing HTTP response bodies are visible in the logs rather than silently lost.
|
|
31
|
+
*/
|
|
32
|
+
let pendingStreamLogs = 0;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Get the current count of pending stream log operations.
|
|
36
|
+
* Useful for diagnostics and testing.
|
|
37
|
+
*/
|
|
38
|
+
export function getPendingStreamLogCount(): number {
|
|
39
|
+
return pendingStreamLogs;
|
|
40
|
+
}
|
|
41
|
+
|
|
27
42
|
/**
|
|
28
43
|
* Sanitize HTTP headers by masking sensitive values.
|
|
29
44
|
* Masks authorization, x-api-key, and api-key headers.
|
|
@@ -196,7 +211,8 @@ export function createVerboseFetch(
|
|
|
196
211
|
if (isStreaming) {
|
|
197
212
|
const [sdkStream, logStream] = response.body.tee();
|
|
198
213
|
|
|
199
|
-
// Consume log stream asynchronously
|
|
214
|
+
// Consume log stream asynchronously, tracking pending operations (#231)
|
|
215
|
+
pendingStreamLogs++;
|
|
200
216
|
(async () => {
|
|
201
217
|
try {
|
|
202
218
|
const reader = logStream.getReader();
|
|
@@ -225,6 +241,8 @@ export function createVerboseFetch(
|
|
|
225
241
|
});
|
|
226
242
|
} catch {
|
|
227
243
|
// Ignore logging errors
|
|
244
|
+
} finally {
|
|
245
|
+
pendingStreamLogs--;
|
|
228
246
|
}
|
|
229
247
|
})();
|
|
230
248
|
|
|
@@ -304,3 +322,18 @@ export function getHttpCallCount(): number {
|
|
|
304
322
|
export function resetHttpCallCount(): void {
|
|
305
323
|
globalHttpCallCount = 0;
|
|
306
324
|
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Register a process exit handler that warns about pending stream log operations.
|
|
328
|
+
* Call this once at startup when verbose mode is enabled (#231).
|
|
329
|
+
*/
|
|
330
|
+
export function registerPendingStreamLogExitHandler(): void {
|
|
331
|
+
process.once('exit', () => {
|
|
332
|
+
if (pendingStreamLogs > 0) {
|
|
333
|
+
// Use stderr directly since the process is exiting and log infrastructure may be unavailable
|
|
334
|
+
process.stderr.write(
|
|
335
|
+
`[verbose] warning: ${pendingStreamLogs} HTTP stream response log(s) were still pending at process exit — response bodies may be missing from logs\n`
|
|
336
|
+
);
|
|
337
|
+
}
|
|
338
|
+
});
|
|
339
|
+
}
|