@link-assistant/agent 0.19.2 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/argv.ts +11 -0
- package/src/cli/defaults.ts +24 -1
- package/src/cli/model-config.js +145 -44
- package/src/cli/run-options.js +10 -1
- package/src/provider/provider.ts +10 -2
- package/src/session/compaction.ts +20 -0
- package/src/session/message-v2.ts +9 -0
- package/src/session/prompt.ts +111 -20
package/package.json
CHANGED
package/src/cli/argv.ts
CHANGED
|
@@ -69,3 +69,14 @@ export function getCompactionModelFromProcessArgv(): string | null {
|
|
|
69
69
|
export function getCompactionSafetyMarginFromProcessArgv(): string | null {
|
|
70
70
|
return getArgFromProcessArgv('compaction-safety-margin');
|
|
71
71
|
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Extract --compaction-models argument directly from process.argv
|
|
75
|
+
* The value is a links notation references sequence, e.g.:
|
|
76
|
+
* "(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)"
|
|
77
|
+
* @returns The compaction models argument from CLI or null if not found
|
|
78
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
79
|
+
*/
|
|
80
|
+
export function getCompactionModelsFromProcessArgv(): string | null {
|
|
81
|
+
return getArgFromProcessArgv('compaction-models');
|
|
82
|
+
}
|
package/src/cli/defaults.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
/** Default model used when no `--model` CLI argument is provided. */
|
|
9
|
-
export const DEFAULT_MODEL = 'opencode/
|
|
9
|
+
export const DEFAULT_MODEL = 'opencode/qwen3.6-plus-free';
|
|
10
10
|
|
|
11
11
|
/** Default provider ID extracted from DEFAULT_MODEL. */
|
|
12
12
|
export const DEFAULT_PROVIDER_ID = DEFAULT_MODEL.split('/')[0];
|
|
@@ -23,6 +23,29 @@ export const DEFAULT_MODEL_ID = DEFAULT_MODEL.split('/').slice(1).join('/');
|
|
|
23
23
|
*/
|
|
24
24
|
export const DEFAULT_COMPACTION_MODEL = 'opencode/gpt-5-nano';
|
|
25
25
|
|
|
26
|
+
/**
|
|
27
|
+
* Default compaction models cascade, ordered from smallest/cheapest context to largest.
|
|
28
|
+
* During compaction, the system tries each model in order. If the used context exceeds
|
|
29
|
+
* a model's context limit, it skips to the next larger model. If a model's rate limit
|
|
30
|
+
* is reached, it also skips to the next model.
|
|
31
|
+
* The special value "same" means use the same model as `--model`.
|
|
32
|
+
*
|
|
33
|
+
* Parsed as links notation references sequence (single anonymous link):
|
|
34
|
+
* "(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)"
|
|
35
|
+
*
|
|
36
|
+
* Context limits (approximate):
|
|
37
|
+
* big-pickle: ~200K
|
|
38
|
+
* nemotron-3-super-free: ~262K
|
|
39
|
+
* minimax-m2.5-free: ~200K
|
|
40
|
+
* gpt-5-nano: ~400K
|
|
41
|
+
* qwen3.6-plus-free: ~1M
|
|
42
|
+
* same: (base model's context)
|
|
43
|
+
*
|
|
44
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
45
|
+
*/
|
|
46
|
+
export const DEFAULT_COMPACTION_MODELS =
|
|
47
|
+
'(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)';
|
|
48
|
+
|
|
26
49
|
/**
|
|
27
50
|
* Default compaction safety margin as a percentage of usable context window.
|
|
28
51
|
* Applied only when the compaction model has a context window equal to or smaller
|
package/src/cli/model-config.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
getModelFromProcessArgv,
|
|
3
3
|
getCompactionModelFromProcessArgv,
|
|
4
|
+
getCompactionModelsFromProcessArgv,
|
|
4
5
|
getCompactionSafetyMarginFromProcessArgv,
|
|
5
6
|
} from './argv.ts';
|
|
6
7
|
import { Log } from '../util/log.ts';
|
|
@@ -8,6 +9,7 @@ import {
|
|
|
8
9
|
DEFAULT_PROVIDER_ID,
|
|
9
10
|
DEFAULT_MODEL_ID,
|
|
10
11
|
DEFAULT_COMPACTION_MODEL,
|
|
12
|
+
DEFAULT_COMPACTION_MODELS,
|
|
11
13
|
DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT,
|
|
12
14
|
} from './defaults.ts';
|
|
13
15
|
|
|
@@ -173,20 +175,71 @@ export async function parseModelConfig(argv, outputError, outputStatus) {
|
|
|
173
175
|
return { providerID, modelID, compactionModel: compactionModelResult };
|
|
174
176
|
}
|
|
175
177
|
|
|
178
|
+
/**
|
|
179
|
+
* Parse a links notation references sequence string into an array of model names.
|
|
180
|
+
* Format: "(model1 model2 model3)" — parenthesized space-separated list.
|
|
181
|
+
* @param {string} notation - Links notation sequence string
|
|
182
|
+
* @returns {string[]} Array of model name strings
|
|
183
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
184
|
+
*/
|
|
185
|
+
function parseLinksNotationSequence(notation) {
|
|
186
|
+
const trimmed = notation.trim();
|
|
187
|
+
// Remove surrounding parentheses if present
|
|
188
|
+
const inner =
|
|
189
|
+
trimmed.startsWith('(') && trimmed.endsWith(')')
|
|
190
|
+
? trimmed.slice(1, -1)
|
|
191
|
+
: trimmed;
|
|
192
|
+
// Split on whitespace and filter empty strings
|
|
193
|
+
return inner.split(/\s+/).filter((s) => s.length > 0);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Resolve a single compaction model entry (short name, provider/model, or "same").
|
|
198
|
+
* @returns {{ providerID: string, modelID: string, useSameModel: boolean }}
|
|
199
|
+
*/
|
|
200
|
+
async function resolveCompactionModelEntry(
|
|
201
|
+
modelArg,
|
|
202
|
+
baseProviderID,
|
|
203
|
+
baseModelID
|
|
204
|
+
) {
|
|
205
|
+
const useSameModel = modelArg.toLowerCase() === 'same';
|
|
206
|
+
|
|
207
|
+
if (useSameModel) {
|
|
208
|
+
return {
|
|
209
|
+
providerID: baseProviderID,
|
|
210
|
+
modelID: baseModelID,
|
|
211
|
+
useSameModel: true,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (modelArg.includes('/')) {
|
|
216
|
+
const parts = modelArg.split('/');
|
|
217
|
+
return {
|
|
218
|
+
providerID: parts[0],
|
|
219
|
+
modelID: parts.slice(1).join('/'),
|
|
220
|
+
useSameModel: false,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Short name resolution
|
|
225
|
+
const { Provider } = await import('../provider/provider.ts');
|
|
226
|
+
const resolved = await Provider.parseModelWithResolution(modelArg);
|
|
227
|
+
return {
|
|
228
|
+
providerID: resolved.providerID,
|
|
229
|
+
modelID: resolved.modelID,
|
|
230
|
+
useSameModel: false,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
176
234
|
/**
|
|
177
235
|
* Parse compaction model config from argv.
|
|
178
|
-
*
|
|
236
|
+
* Supports both --compaction-model (single) and --compaction-models (cascade).
|
|
237
|
+
* When --compaction-models is specified, it overrides --compaction-model.
|
|
179
238
|
* The special value "same" means use the base model for compaction.
|
|
180
239
|
* @see https://github.com/link-assistant/agent/issues/219
|
|
240
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
181
241
|
*/
|
|
182
242
|
async function parseCompactionModelConfig(argv, baseProviderID, baseModelID) {
|
|
183
|
-
// Get compaction model from CLI (safeguard against yargs caching)
|
|
184
|
-
const cliCompactionModelArg = getCompactionModelFromProcessArgv();
|
|
185
|
-
const compactionModelArg =
|
|
186
|
-
cliCompactionModelArg ??
|
|
187
|
-
argv['compaction-model'] ??
|
|
188
|
-
DEFAULT_COMPACTION_MODEL;
|
|
189
|
-
|
|
190
243
|
// Get safety margin from CLI
|
|
191
244
|
const cliSafetyMarginArg = getCompactionSafetyMarginFromProcessArgv();
|
|
192
245
|
const compactionSafetyMarginPercent = cliSafetyMarginArg
|
|
@@ -194,49 +247,97 @@ async function parseCompactionModelConfig(argv, baseProviderID, baseModelID) {
|
|
|
194
247
|
: (argv['compaction-safety-margin'] ??
|
|
195
248
|
DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT);
|
|
196
249
|
|
|
197
|
-
//
|
|
198
|
-
const
|
|
250
|
+
// Check for --compaction-models (cascade) first — it overrides --compaction-model
|
|
251
|
+
const cliCompactionModelsArg = getCompactionModelsFromProcessArgv();
|
|
252
|
+
const compactionModelsArg =
|
|
253
|
+
cliCompactionModelsArg ??
|
|
254
|
+
argv['compaction-models'] ??
|
|
255
|
+
DEFAULT_COMPACTION_MODELS;
|
|
199
256
|
|
|
200
|
-
|
|
201
|
-
|
|
257
|
+
// Parse the links notation sequence into an array of model names
|
|
258
|
+
const modelNames = parseLinksNotationSequence(compactionModelsArg);
|
|
259
|
+
|
|
260
|
+
if (modelNames.length > 0) {
|
|
261
|
+
// Resolve each model in the cascade
|
|
262
|
+
const compactionModels = [];
|
|
263
|
+
for (const name of modelNames) {
|
|
264
|
+
try {
|
|
265
|
+
const resolved = await resolveCompactionModelEntry(
|
|
266
|
+
name,
|
|
267
|
+
baseProviderID,
|
|
268
|
+
baseModelID
|
|
269
|
+
);
|
|
270
|
+
compactionModels.push({
|
|
271
|
+
providerID: resolved.providerID,
|
|
272
|
+
modelID: resolved.modelID,
|
|
273
|
+
useSameModel: resolved.useSameModel,
|
|
274
|
+
});
|
|
275
|
+
} catch (err) {
|
|
276
|
+
// If a model can't be resolved, log and skip it
|
|
277
|
+
Log.Default.warn(() => ({
|
|
278
|
+
message: 'skipping unresolvable compaction model in cascade',
|
|
279
|
+
model: name,
|
|
280
|
+
error: err?.message,
|
|
281
|
+
}));
|
|
282
|
+
}
|
|
283
|
+
}
|
|
202
284
|
|
|
203
|
-
if (useSameModel) {
|
|
204
|
-
compactionProviderID = baseProviderID;
|
|
205
|
-
compactionModelID = baseModelID;
|
|
206
|
-
Log.Default.info(() => ({
|
|
207
|
-
message:
|
|
208
|
-
'compaction model set to "same" — using base model for compaction',
|
|
209
|
-
compactionProviderID,
|
|
210
|
-
compactionModelID,
|
|
211
|
-
}));
|
|
212
|
-
} else if (compactionModelArg.includes('/')) {
|
|
213
|
-
const parts = compactionModelArg.split('/');
|
|
214
|
-
compactionProviderID = parts[0];
|
|
215
|
-
compactionModelID = parts.slice(1).join('/');
|
|
216
|
-
Log.Default.info(() => ({
|
|
217
|
-
message: 'using explicit compaction model',
|
|
218
|
-
compactionProviderID,
|
|
219
|
-
compactionModelID,
|
|
220
|
-
}));
|
|
221
|
-
} else {
|
|
222
|
-
// Short name resolution
|
|
223
|
-
const { Provider } = await import('../provider/provider.ts');
|
|
224
|
-
const resolved =
|
|
225
|
-
await Provider.parseModelWithResolution(compactionModelArg);
|
|
226
|
-
compactionProviderID = resolved.providerID;
|
|
227
|
-
compactionModelID = resolved.modelID;
|
|
228
285
|
Log.Default.info(() => ({
|
|
229
|
-
message: '
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
286
|
+
message: 'compaction models cascade configured',
|
|
287
|
+
models: compactionModels.map((m) =>
|
|
288
|
+
m.useSameModel ? 'same' : `${m.providerID}/${m.modelID}`
|
|
289
|
+
),
|
|
290
|
+
source: cliCompactionModelsArg ? 'cli' : 'default',
|
|
233
291
|
}));
|
|
292
|
+
|
|
293
|
+
// Use the first model as the primary compaction model (for backward compatibility)
|
|
294
|
+
// The full cascade is stored in compactionModels array
|
|
295
|
+
const primary = compactionModels[0] || {
|
|
296
|
+
providerID: baseProviderID,
|
|
297
|
+
modelID: baseModelID,
|
|
298
|
+
useSameModel: true,
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
return {
|
|
302
|
+
providerID: primary.providerID,
|
|
303
|
+
modelID: primary.modelID,
|
|
304
|
+
useSameModel: primary.useSameModel,
|
|
305
|
+
compactionSafetyMarginPercent,
|
|
306
|
+
compactionModels,
|
|
307
|
+
};
|
|
234
308
|
}
|
|
235
309
|
|
|
310
|
+
// Fallback to single --compaction-model
|
|
311
|
+
const cliCompactionModelArg = getCompactionModelFromProcessArgv();
|
|
312
|
+
const compactionModelArg =
|
|
313
|
+
cliCompactionModelArg ??
|
|
314
|
+
argv['compaction-model'] ??
|
|
315
|
+
DEFAULT_COMPACTION_MODEL;
|
|
316
|
+
|
|
317
|
+
const resolved = await resolveCompactionModelEntry(
|
|
318
|
+
compactionModelArg,
|
|
319
|
+
baseProviderID,
|
|
320
|
+
baseModelID
|
|
321
|
+
);
|
|
322
|
+
|
|
323
|
+
Log.Default.info(() => ({
|
|
324
|
+
message: 'using single compaction model',
|
|
325
|
+
compactionProviderID: resolved.providerID,
|
|
326
|
+
compactionModelID: resolved.modelID,
|
|
327
|
+
useSameModel: resolved.useSameModel,
|
|
328
|
+
}));
|
|
329
|
+
|
|
236
330
|
return {
|
|
237
|
-
providerID:
|
|
238
|
-
modelID:
|
|
239
|
-
useSameModel,
|
|
331
|
+
providerID: resolved.providerID,
|
|
332
|
+
modelID: resolved.modelID,
|
|
333
|
+
useSameModel: resolved.useSameModel,
|
|
240
334
|
compactionSafetyMarginPercent,
|
|
335
|
+
compactionModels: [
|
|
336
|
+
{
|
|
337
|
+
providerID: resolved.providerID,
|
|
338
|
+
modelID: resolved.modelID,
|
|
339
|
+
useSameModel: resolved.useSameModel,
|
|
340
|
+
},
|
|
341
|
+
],
|
|
241
342
|
};
|
|
242
343
|
}
|
package/src/cli/run-options.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
DEFAULT_MODEL,
|
|
3
3
|
DEFAULT_COMPACTION_MODEL,
|
|
4
|
+
DEFAULT_COMPACTION_MODELS,
|
|
4
5
|
DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT,
|
|
5
6
|
} from './defaults.ts';
|
|
6
7
|
|
|
@@ -151,9 +152,17 @@ export function buildRunOptions(yargs) {
|
|
|
151
152
|
.option('compaction-model', {
|
|
152
153
|
type: 'string',
|
|
153
154
|
description:
|
|
154
|
-
'Model to use for context compaction in format providerID/modelID. Use "same" to use the base model. Default: opencode/gpt-5-nano (free, 400K context).',
|
|
155
|
+
'Model to use for context compaction in format providerID/modelID. Use "same" to use the base model. Default: opencode/gpt-5-nano (free, 400K context). Overridden by --compaction-models if both are specified.',
|
|
155
156
|
default: DEFAULT_COMPACTION_MODEL,
|
|
156
157
|
})
|
|
158
|
+
.option('compaction-models', {
|
|
159
|
+
type: 'string',
|
|
160
|
+
description:
|
|
161
|
+
'Ordered cascade of compaction models in links notation sequence format: "(model1 model2 ... same)". ' +
|
|
162
|
+
"Models are tried from smallest/cheapest context to largest. If used context exceeds a model's limit or its rate limit is reached, the next model is tried. " +
|
|
163
|
+
'The special value "same" uses the base model. Overrides --compaction-model when specified.',
|
|
164
|
+
default: DEFAULT_COMPACTION_MODELS,
|
|
165
|
+
})
|
|
157
166
|
.option('compaction-safety-margin', {
|
|
158
167
|
type: 'number',
|
|
159
168
|
description:
|
package/src/provider/provider.ts
CHANGED
|
@@ -1731,7 +1731,13 @@ export namespace Provider {
|
|
|
1731
1731
|
priority = priority.filter((m) => m !== 'claude-haiku-4.5');
|
|
1732
1732
|
}
|
|
1733
1733
|
if (providerID === 'opencode' || providerID === 'local') {
|
|
1734
|
-
priority = [
|
|
1734
|
+
priority = [
|
|
1735
|
+
'qwen3.6-plus-free',
|
|
1736
|
+
'minimax-m2.5-free',
|
|
1737
|
+
'gpt-5-nano',
|
|
1738
|
+
'nemotron-3-super-free',
|
|
1739
|
+
'big-pickle',
|
|
1740
|
+
];
|
|
1735
1741
|
}
|
|
1736
1742
|
if (providerID === 'kilo') {
|
|
1737
1743
|
priority = [
|
|
@@ -1758,7 +1764,9 @@ export namespace Provider {
|
|
|
1758
1764
|
}
|
|
1759
1765
|
|
|
1760
1766
|
const priority = [
|
|
1767
|
+
'qwen3.6-plus-free',
|
|
1761
1768
|
'glm-5-free',
|
|
1769
|
+
'nemotron-3-super-free',
|
|
1762
1770
|
'minimax-m2.5-free',
|
|
1763
1771
|
'gpt-5-nano',
|
|
1764
1772
|
'big-pickle',
|
|
@@ -1841,7 +1849,7 @@ export namespace Provider {
|
|
|
1841
1849
|
* 1. If model is uniquely available in one provider, use that provider
|
|
1842
1850
|
* 2. If model is available in multiple providers, prioritize based on free model availability:
|
|
1843
1851
|
* - kilo: glm-5-free, glm-4.5-air-free, minimax-m2.5-free, giga-potato-free, deepseek-r1-free (unique to Kilo)
|
|
1844
|
-
* - opencode: big-pickle, gpt-5-nano (unique to OpenCode)
|
|
1852
|
+
* - opencode: big-pickle, gpt-5-nano, qwen3.6-plus-free, nemotron-3-super-free (unique to OpenCode)
|
|
1845
1853
|
* 3. For shared models, prefer OpenCode first, then fall back to Kilo on rate limit
|
|
1846
1854
|
*
|
|
1847
1855
|
* @param modelID - Short model name without provider prefix
|
|
@@ -36,15 +36,35 @@ export namespace SessionCompaction {
|
|
|
36
36
|
*/
|
|
37
37
|
export const OVERFLOW_SAFETY_MARGIN = 0.85;
|
|
38
38
|
|
|
39
|
+
/**
|
|
40
|
+
* A single compaction model entry in the cascade.
|
|
41
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
42
|
+
*/
|
|
43
|
+
export interface CompactionModelEntry {
|
|
44
|
+
providerID: string;
|
|
45
|
+
modelID: string;
|
|
46
|
+
useSameModel: boolean;
|
|
47
|
+
}
|
|
48
|
+
|
|
39
49
|
/**
|
|
40
50
|
* Compaction model configuration passed from CLI.
|
|
51
|
+
* Supports both single model (backward compat) and cascade of models (#232).
|
|
41
52
|
* @see https://github.com/link-assistant/agent/issues/219
|
|
53
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
42
54
|
*/
|
|
43
55
|
export interface CompactionModelConfig {
|
|
44
56
|
providerID: string;
|
|
45
57
|
modelID: string;
|
|
46
58
|
useSameModel: boolean;
|
|
47
59
|
compactionSafetyMarginPercent: number;
|
|
60
|
+
/**
|
|
61
|
+
* Ordered cascade of compaction models from smallest/cheapest to largest.
|
|
62
|
+
* When present, the system tries each model in order during compaction.
|
|
63
|
+
* If used context exceeds a model's limit or its rate limit is reached,
|
|
64
|
+
* the next model is tried.
|
|
65
|
+
* @see https://github.com/link-assistant/agent/issues/232
|
|
66
|
+
*/
|
|
67
|
+
compactionModels?: CompactionModelEntry[];
|
|
48
68
|
}
|
|
49
69
|
|
|
50
70
|
/**
|
|
@@ -398,6 +398,15 @@ export namespace MessageV2 {
|
|
|
398
398
|
modelID: z.string(),
|
|
399
399
|
useSameModel: z.boolean(),
|
|
400
400
|
compactionSafetyMarginPercent: z.number(),
|
|
401
|
+
compactionModels: z
|
|
402
|
+
.array(
|
|
403
|
+
z.object({
|
|
404
|
+
providerID: z.string(),
|
|
405
|
+
modelID: z.string(),
|
|
406
|
+
useSameModel: z.boolean(),
|
|
407
|
+
})
|
|
408
|
+
)
|
|
409
|
+
.optional(),
|
|
401
410
|
})
|
|
402
411
|
.optional(),
|
|
403
412
|
system: z.string().optional(),
|
package/src/session/prompt.ts
CHANGED
|
@@ -95,6 +95,15 @@ export namespace SessionPrompt {
|
|
|
95
95
|
modelID: z.string(),
|
|
96
96
|
useSameModel: z.boolean(),
|
|
97
97
|
compactionSafetyMarginPercent: z.number(),
|
|
98
|
+
compactionModels: z
|
|
99
|
+
.array(
|
|
100
|
+
z.object({
|
|
101
|
+
providerID: z.string(),
|
|
102
|
+
modelID: z.string(),
|
|
103
|
+
useSameModel: z.boolean(),
|
|
104
|
+
})
|
|
105
|
+
)
|
|
106
|
+
.optional(),
|
|
98
107
|
})
|
|
99
108
|
.optional(),
|
|
100
109
|
agent: z.string().optional(),
|
|
@@ -542,27 +551,109 @@ export namespace SessionPrompt {
|
|
|
542
551
|
|
|
543
552
|
// pending compaction
|
|
544
553
|
if (task?.type === 'compaction') {
|
|
545
|
-
// Use compaction model if configured
|
|
554
|
+
// Use compaction model cascade if configured (#232)
|
|
546
555
|
const compactionModelConfig = lastUser.compactionModel;
|
|
547
|
-
const
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
556
|
+
const cascade = compactionModelConfig?.compactionModels;
|
|
557
|
+
|
|
558
|
+
if (cascade && cascade.length > 0) {
|
|
559
|
+
// Cascade logic: try each model in order (smallest to largest context)
|
|
560
|
+
// Skip models whose context limit is smaller than current used tokens
|
|
561
|
+
// Skip models that hit rate limits (try next)
|
|
562
|
+
const currentTokens = lastFinished
|
|
563
|
+
? lastFinished.tokens.input +
|
|
564
|
+
lastFinished.tokens.cache.read +
|
|
565
|
+
lastFinished.tokens.output
|
|
566
|
+
: 0;
|
|
567
|
+
|
|
568
|
+
let compactionResult = 'stop';
|
|
569
|
+
for (const entry of cascade) {
|
|
570
|
+
const entryProviderID = entry.useSameModel
|
|
571
|
+
? model.providerID
|
|
572
|
+
: entry.providerID;
|
|
573
|
+
const entryModelID = entry.useSameModel
|
|
574
|
+
? model.modelID
|
|
575
|
+
: entry.modelID;
|
|
576
|
+
|
|
577
|
+
// Check if this model's context is large enough for the current tokens
|
|
578
|
+
if (!entry.useSameModel) {
|
|
579
|
+
try {
|
|
580
|
+
const entryModel = await Provider.getModel(
|
|
581
|
+
entryProviderID,
|
|
582
|
+
entryModelID
|
|
583
|
+
);
|
|
584
|
+
const entryContextLimit = entryModel.info?.limit?.context ?? 0;
|
|
585
|
+
if (
|
|
586
|
+
entryContextLimit > 0 &&
|
|
587
|
+
currentTokens > entryContextLimit
|
|
588
|
+
) {
|
|
589
|
+
log.info(() => ({
|
|
590
|
+
message:
|
|
591
|
+
'skipping compaction model — context too small for current tokens',
|
|
592
|
+
modelID: entryModelID,
|
|
593
|
+
providerID: entryProviderID,
|
|
594
|
+
contextLimit: entryContextLimit,
|
|
595
|
+
currentTokens,
|
|
596
|
+
}));
|
|
597
|
+
continue;
|
|
598
|
+
}
|
|
599
|
+
} catch {
|
|
600
|
+
log.info(() => ({
|
|
601
|
+
message:
|
|
602
|
+
'could not resolve compaction cascade model — skipping',
|
|
603
|
+
modelID: entryModelID,
|
|
604
|
+
providerID: entryProviderID,
|
|
605
|
+
}));
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
try {
|
|
611
|
+
compactionResult = await SessionCompaction.process({
|
|
612
|
+
messages: msgs,
|
|
613
|
+
parentID: lastUser.id,
|
|
614
|
+
abort,
|
|
615
|
+
model: {
|
|
616
|
+
providerID: entryProviderID,
|
|
617
|
+
modelID: entryModelID,
|
|
618
|
+
},
|
|
619
|
+
sessionID,
|
|
620
|
+
});
|
|
621
|
+
// If compaction succeeded, break the cascade
|
|
622
|
+
break;
|
|
623
|
+
} catch (err) {
|
|
624
|
+
// If rate limited or error, try next model in cascade
|
|
625
|
+
log.warn(() => ({
|
|
626
|
+
message: 'compaction model failed — trying next in cascade',
|
|
627
|
+
modelID: entryModelID,
|
|
628
|
+
providerID: entryProviderID,
|
|
629
|
+
error: err?.message,
|
|
630
|
+
}));
|
|
631
|
+
continue;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
if (compactionResult === 'stop') break;
|
|
635
|
+
} else {
|
|
636
|
+
// Single model fallback (backward compatibility)
|
|
637
|
+
const compactionProviderID =
|
|
638
|
+
compactionModelConfig && !compactionModelConfig.useSameModel
|
|
639
|
+
? compactionModelConfig.providerID
|
|
640
|
+
: model.providerID;
|
|
641
|
+
const compactionModelID =
|
|
642
|
+
compactionModelConfig && !compactionModelConfig.useSameModel
|
|
643
|
+
? compactionModelConfig.modelID
|
|
644
|
+
: model.modelID;
|
|
645
|
+
const result = await SessionCompaction.process({
|
|
646
|
+
messages: msgs,
|
|
647
|
+
parentID: lastUser.id,
|
|
648
|
+
abort,
|
|
649
|
+
model: {
|
|
650
|
+
providerID: compactionProviderID,
|
|
651
|
+
modelID: compactionModelID,
|
|
652
|
+
},
|
|
653
|
+
sessionID,
|
|
654
|
+
});
|
|
655
|
+
if (result === 'stop') break;
|
|
656
|
+
}
|
|
566
657
|
continue;
|
|
567
658
|
}
|
|
568
659
|
|