@link-assistant/agent 0.19.2 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/agent",
3
- "version": "0.19.2",
3
+ "version": "0.20.0",
4
4
  "description": "A minimal, public domain AI CLI agent compatible with OpenCode's JSON interface. Bun-only runtime.",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/cli/argv.ts CHANGED
@@ -69,3 +69,14 @@ export function getCompactionModelFromProcessArgv(): string | null {
69
69
  export function getCompactionSafetyMarginFromProcessArgv(): string | null {
70
70
  return getArgFromProcessArgv('compaction-safety-margin');
71
71
  }
72
+
73
+ /**
74
+ * Extract --compaction-models argument directly from process.argv
75
+ * The value is a links notation references sequence, e.g.:
76
+ * "(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)"
77
+ * @returns The compaction models argument from CLI or null if not found
78
+ * @see https://github.com/link-assistant/agent/issues/232
79
+ */
80
+ export function getCompactionModelsFromProcessArgv(): string | null {
81
+ return getArgFromProcessArgv('compaction-models');
82
+ }
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  /** Default model used when no `--model` CLI argument is provided. */
9
- export const DEFAULT_MODEL = 'opencode/minimax-m2.5-free';
9
+ export const DEFAULT_MODEL = 'opencode/qwen3.6-plus-free';
10
10
 
11
11
  /** Default provider ID extracted from DEFAULT_MODEL. */
12
12
  export const DEFAULT_PROVIDER_ID = DEFAULT_MODEL.split('/')[0];
@@ -23,6 +23,29 @@ export const DEFAULT_MODEL_ID = DEFAULT_MODEL.split('/').slice(1).join('/');
23
23
  */
24
24
  export const DEFAULT_COMPACTION_MODEL = 'opencode/gpt-5-nano';
25
25
 
26
+ /**
27
+ * Default compaction models cascade, ordered from smallest/cheapest context to largest.
28
+ * During compaction, the system tries each model in order. If the used context exceeds
29
+ * a model's context limit, it skips to the next larger model. If a model's rate limit
30
+ * is reached, it also skips to the next model.
31
+ * The special value "same" means use the same model as `--model`.
32
+ *
33
+ * Parsed as links notation references sequence (single anonymous link):
34
+ * "(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)"
35
+ *
36
+ * Context limits (approximate):
37
+ * big-pickle: ~200K
38
+ * nemotron-3-super-free: ~262K
39
+ * minimax-m2.5-free: ~200K
40
+ * gpt-5-nano: ~400K
41
+ * qwen3.6-plus-free: ~1M
42
+ * same: (base model's context)
43
+ *
44
+ * @see https://github.com/link-assistant/agent/issues/232
45
+ */
46
+ export const DEFAULT_COMPACTION_MODELS =
47
+ '(big-pickle nemotron-3-super-free minimax-m2.5-free gpt-5-nano qwen3.6-plus-free same)';
48
+
26
49
  /**
27
50
  * Default compaction safety margin as a percentage of usable context window.
28
51
  * Applied only when the compaction model has a context window equal to or smaller
@@ -1,6 +1,7 @@
1
1
  import {
2
2
  getModelFromProcessArgv,
3
3
  getCompactionModelFromProcessArgv,
4
+ getCompactionModelsFromProcessArgv,
4
5
  getCompactionSafetyMarginFromProcessArgv,
5
6
  } from './argv.ts';
6
7
  import { Log } from '../util/log.ts';
@@ -8,6 +9,7 @@ import {
8
9
  DEFAULT_PROVIDER_ID,
9
10
  DEFAULT_MODEL_ID,
10
11
  DEFAULT_COMPACTION_MODEL,
12
+ DEFAULT_COMPACTION_MODELS,
11
13
  DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT,
12
14
  } from './defaults.ts';
13
15
 
@@ -173,20 +175,71 @@ export async function parseModelConfig(argv, outputError, outputStatus) {
173
175
  return { providerID, modelID, compactionModel: compactionModelResult };
174
176
  }
175
177
 
178
+ /**
179
+ * Parse a links notation references sequence string into an array of model names.
180
+ * Format: "(model1 model2 model3)" — parenthesized space-separated list.
181
+ * @param {string} notation - Links notation sequence string
182
+ * @returns {string[]} Array of model name strings
183
+ * @see https://github.com/link-assistant/agent/issues/232
184
+ */
185
+ function parseLinksNotationSequence(notation) {
186
+ const trimmed = notation.trim();
187
+ // Remove surrounding parentheses if present
188
+ const inner =
189
+ trimmed.startsWith('(') && trimmed.endsWith(')')
190
+ ? trimmed.slice(1, -1)
191
+ : trimmed;
192
+ // Split on whitespace and filter empty strings
193
+ return inner.split(/\s+/).filter((s) => s.length > 0);
194
+ }
195
+
196
+ /**
197
+ * Resolve a single compaction model entry (short name, provider/model, or "same").
198
+ * @returns {{ providerID: string, modelID: string, useSameModel: boolean }}
199
+ */
200
+ async function resolveCompactionModelEntry(
201
+ modelArg,
202
+ baseProviderID,
203
+ baseModelID
204
+ ) {
205
+ const useSameModel = modelArg.toLowerCase() === 'same';
206
+
207
+ if (useSameModel) {
208
+ return {
209
+ providerID: baseProviderID,
210
+ modelID: baseModelID,
211
+ useSameModel: true,
212
+ };
213
+ }
214
+
215
+ if (modelArg.includes('/')) {
216
+ const parts = modelArg.split('/');
217
+ return {
218
+ providerID: parts[0],
219
+ modelID: parts.slice(1).join('/'),
220
+ useSameModel: false,
221
+ };
222
+ }
223
+
224
+ // Short name resolution
225
+ const { Provider } = await import('../provider/provider.ts');
226
+ const resolved = await Provider.parseModelWithResolution(modelArg);
227
+ return {
228
+ providerID: resolved.providerID,
229
+ modelID: resolved.modelID,
230
+ useSameModel: false,
231
+ };
232
+ }
233
+
176
234
  /**
177
235
  * Parse compaction model config from argv.
178
- * Resolves --compaction-model and --compaction-safety-margin CLI arguments.
236
+ * Supports both --compaction-model (single) and --compaction-models (cascade).
237
+ * When --compaction-models is specified, it overrides --compaction-model.
179
238
  * The special value "same" means use the base model for compaction.
180
239
  * @see https://github.com/link-assistant/agent/issues/219
240
+ * @see https://github.com/link-assistant/agent/issues/232
181
241
  */
182
242
  async function parseCompactionModelConfig(argv, baseProviderID, baseModelID) {
183
- // Get compaction model from CLI (safeguard against yargs caching)
184
- const cliCompactionModelArg = getCompactionModelFromProcessArgv();
185
- const compactionModelArg =
186
- cliCompactionModelArg ??
187
- argv['compaction-model'] ??
188
- DEFAULT_COMPACTION_MODEL;
189
-
190
243
  // Get safety margin from CLI
191
244
  const cliSafetyMarginArg = getCompactionSafetyMarginFromProcessArgv();
192
245
  const compactionSafetyMarginPercent = cliSafetyMarginArg
@@ -194,49 +247,97 @@ async function parseCompactionModelConfig(argv, baseProviderID, baseModelID) {
194
247
  : (argv['compaction-safety-margin'] ??
195
248
  DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT);
196
249
 
197
- // Special "same" alias use the base model for compaction
198
- const useSameModel = compactionModelArg.toLowerCase() === 'same';
250
+ // Check for --compaction-models (cascade) first it overrides --compaction-model
251
+ const cliCompactionModelsArg = getCompactionModelsFromProcessArgv();
252
+ const compactionModelsArg =
253
+ cliCompactionModelsArg ??
254
+ argv['compaction-models'] ??
255
+ DEFAULT_COMPACTION_MODELS;
199
256
 
200
- let compactionProviderID;
201
- let compactionModelID;
257
+ // Parse the links notation sequence into an array of model names
258
+ const modelNames = parseLinksNotationSequence(compactionModelsArg);
259
+
260
+ if (modelNames.length > 0) {
261
+ // Resolve each model in the cascade
262
+ const compactionModels = [];
263
+ for (const name of modelNames) {
264
+ try {
265
+ const resolved = await resolveCompactionModelEntry(
266
+ name,
267
+ baseProviderID,
268
+ baseModelID
269
+ );
270
+ compactionModels.push({
271
+ providerID: resolved.providerID,
272
+ modelID: resolved.modelID,
273
+ useSameModel: resolved.useSameModel,
274
+ });
275
+ } catch (err) {
276
+ // If a model can't be resolved, log and skip it
277
+ Log.Default.warn(() => ({
278
+ message: 'skipping unresolvable compaction model in cascade',
279
+ model: name,
280
+ error: err?.message,
281
+ }));
282
+ }
283
+ }
202
284
 
203
- if (useSameModel) {
204
- compactionProviderID = baseProviderID;
205
- compactionModelID = baseModelID;
206
- Log.Default.info(() => ({
207
- message:
208
- 'compaction model set to "same" — using base model for compaction',
209
- compactionProviderID,
210
- compactionModelID,
211
- }));
212
- } else if (compactionModelArg.includes('/')) {
213
- const parts = compactionModelArg.split('/');
214
- compactionProviderID = parts[0];
215
- compactionModelID = parts.slice(1).join('/');
216
- Log.Default.info(() => ({
217
- message: 'using explicit compaction model',
218
- compactionProviderID,
219
- compactionModelID,
220
- }));
221
- } else {
222
- // Short name resolution
223
- const { Provider } = await import('../provider/provider.ts');
224
- const resolved =
225
- await Provider.parseModelWithResolution(compactionModelArg);
226
- compactionProviderID = resolved.providerID;
227
- compactionModelID = resolved.modelID;
228
285
  Log.Default.info(() => ({
229
- message: 'resolved short compaction model name',
230
- input: compactionModelArg,
231
- compactionProviderID,
232
- compactionModelID,
286
+ message: 'compaction models cascade configured',
287
+ models: compactionModels.map((m) =>
288
+ m.useSameModel ? 'same' : `${m.providerID}/${m.modelID}`
289
+ ),
290
+ source: cliCompactionModelsArg ? 'cli' : 'default',
233
291
  }));
292
+
293
+ // Use the first model as the primary compaction model (for backward compatibility)
294
+ // The full cascade is stored in compactionModels array
295
+ const primary = compactionModels[0] || {
296
+ providerID: baseProviderID,
297
+ modelID: baseModelID,
298
+ useSameModel: true,
299
+ };
300
+
301
+ return {
302
+ providerID: primary.providerID,
303
+ modelID: primary.modelID,
304
+ useSameModel: primary.useSameModel,
305
+ compactionSafetyMarginPercent,
306
+ compactionModels,
307
+ };
234
308
  }
235
309
 
310
+ // Fallback to single --compaction-model
311
+ const cliCompactionModelArg = getCompactionModelFromProcessArgv();
312
+ const compactionModelArg =
313
+ cliCompactionModelArg ??
314
+ argv['compaction-model'] ??
315
+ DEFAULT_COMPACTION_MODEL;
316
+
317
+ const resolved = await resolveCompactionModelEntry(
318
+ compactionModelArg,
319
+ baseProviderID,
320
+ baseModelID
321
+ );
322
+
323
+ Log.Default.info(() => ({
324
+ message: 'using single compaction model',
325
+ compactionProviderID: resolved.providerID,
326
+ compactionModelID: resolved.modelID,
327
+ useSameModel: resolved.useSameModel,
328
+ }));
329
+
236
330
  return {
237
- providerID: compactionProviderID,
238
- modelID: compactionModelID,
239
- useSameModel,
331
+ providerID: resolved.providerID,
332
+ modelID: resolved.modelID,
333
+ useSameModel: resolved.useSameModel,
240
334
  compactionSafetyMarginPercent,
335
+ compactionModels: [
336
+ {
337
+ providerID: resolved.providerID,
338
+ modelID: resolved.modelID,
339
+ useSameModel: resolved.useSameModel,
340
+ },
341
+ ],
241
342
  };
242
343
  }
@@ -1,6 +1,7 @@
1
1
  import {
2
2
  DEFAULT_MODEL,
3
3
  DEFAULT_COMPACTION_MODEL,
4
+ DEFAULT_COMPACTION_MODELS,
4
5
  DEFAULT_COMPACTION_SAFETY_MARGIN_PERCENT,
5
6
  } from './defaults.ts';
6
7
 
@@ -151,9 +152,17 @@ export function buildRunOptions(yargs) {
151
152
  .option('compaction-model', {
152
153
  type: 'string',
153
154
  description:
154
- 'Model to use for context compaction in format providerID/modelID. Use "same" to use the base model. Default: opencode/gpt-5-nano (free, 400K context).',
155
+ 'Model to use for context compaction in format providerID/modelID. Use "same" to use the base model. Default: opencode/gpt-5-nano (free, 400K context). Overridden by --compaction-models if both are specified.',
155
156
  default: DEFAULT_COMPACTION_MODEL,
156
157
  })
158
+ .option('compaction-models', {
159
+ type: 'string',
160
+ description:
161
+ 'Ordered cascade of compaction models in links notation sequence format: "(model1 model2 ... same)". ' +
162
+ "Models are tried from smallest/cheapest context to largest. If used context exceeds a model's limit or its rate limit is reached, the next model is tried. " +
163
+ 'The special value "same" uses the base model. Overrides --compaction-model when specified.',
164
+ default: DEFAULT_COMPACTION_MODELS,
165
+ })
157
166
  .option('compaction-safety-margin', {
158
167
  type: 'number',
159
168
  description:
@@ -1731,7 +1731,13 @@ export namespace Provider {
1731
1731
  priority = priority.filter((m) => m !== 'claude-haiku-4.5');
1732
1732
  }
1733
1733
  if (providerID === 'opencode' || providerID === 'local') {
1734
- priority = ['minimax-m2.5-free', 'gpt-5-nano', 'big-pickle'];
1734
+ priority = [
1735
+ 'qwen3.6-plus-free',
1736
+ 'minimax-m2.5-free',
1737
+ 'gpt-5-nano',
1738
+ 'nemotron-3-super-free',
1739
+ 'big-pickle',
1740
+ ];
1735
1741
  }
1736
1742
  if (providerID === 'kilo') {
1737
1743
  priority = [
@@ -1758,7 +1764,9 @@ export namespace Provider {
1758
1764
  }
1759
1765
 
1760
1766
  const priority = [
1767
+ 'qwen3.6-plus-free',
1761
1768
  'glm-5-free',
1769
+ 'nemotron-3-super-free',
1762
1770
  'minimax-m2.5-free',
1763
1771
  'gpt-5-nano',
1764
1772
  'big-pickle',
@@ -1841,7 +1849,7 @@ export namespace Provider {
1841
1849
  * 1. If model is uniquely available in one provider, use that provider
1842
1850
  * 2. If model is available in multiple providers, prioritize based on free model availability:
1843
1851
  * - kilo: glm-5-free, glm-4.5-air-free, minimax-m2.5-free, giga-potato-free, deepseek-r1-free (unique to Kilo)
1844
- * - opencode: big-pickle, gpt-5-nano (unique to OpenCode)
1852
+ * - opencode: big-pickle, gpt-5-nano, qwen3.6-plus-free, nemotron-3-super-free (unique to OpenCode)
1845
1853
  * 3. For shared models, prefer OpenCode first, then fall back to Kilo on rate limit
1846
1854
  *
1847
1855
  * @param modelID - Short model name without provider prefix
@@ -36,15 +36,35 @@ export namespace SessionCompaction {
36
36
  */
37
37
  export const OVERFLOW_SAFETY_MARGIN = 0.85;
38
38
 
39
+ /**
40
+ * A single compaction model entry in the cascade.
41
+ * @see https://github.com/link-assistant/agent/issues/232
42
+ */
43
+ export interface CompactionModelEntry {
44
+ providerID: string;
45
+ modelID: string;
46
+ useSameModel: boolean;
47
+ }
48
+
39
49
  /**
40
50
  * Compaction model configuration passed from CLI.
51
+ * Supports both single model (backward compat) and cascade of models (#232).
41
52
  * @see https://github.com/link-assistant/agent/issues/219
53
+ * @see https://github.com/link-assistant/agent/issues/232
42
54
  */
43
55
  export interface CompactionModelConfig {
44
56
  providerID: string;
45
57
  modelID: string;
46
58
  useSameModel: boolean;
47
59
  compactionSafetyMarginPercent: number;
60
+ /**
61
+ * Ordered cascade of compaction models from smallest/cheapest to largest.
62
+ * When present, the system tries each model in order during compaction.
63
+ * If used context exceeds a model's limit or its rate limit is reached,
64
+ * the next model is tried.
65
+ * @see https://github.com/link-assistant/agent/issues/232
66
+ */
67
+ compactionModels?: CompactionModelEntry[];
48
68
  }
49
69
 
50
70
  /**
@@ -398,6 +398,15 @@ export namespace MessageV2 {
398
398
  modelID: z.string(),
399
399
  useSameModel: z.boolean(),
400
400
  compactionSafetyMarginPercent: z.number(),
401
+ compactionModels: z
402
+ .array(
403
+ z.object({
404
+ providerID: z.string(),
405
+ modelID: z.string(),
406
+ useSameModel: z.boolean(),
407
+ })
408
+ )
409
+ .optional(),
401
410
  })
402
411
  .optional(),
403
412
  system: z.string().optional(),
@@ -95,6 +95,15 @@ export namespace SessionPrompt {
95
95
  modelID: z.string(),
96
96
  useSameModel: z.boolean(),
97
97
  compactionSafetyMarginPercent: z.number(),
98
+ compactionModels: z
99
+ .array(
100
+ z.object({
101
+ providerID: z.string(),
102
+ modelID: z.string(),
103
+ useSameModel: z.boolean(),
104
+ })
105
+ )
106
+ .optional(),
98
107
  })
99
108
  .optional(),
100
109
  agent: z.string().optional(),
@@ -542,27 +551,109 @@ export namespace SessionPrompt {
542
551
 
543
552
  // pending compaction
544
553
  if (task?.type === 'compaction') {
545
- // Use compaction model if configured, otherwise fall back to base model
554
+ // Use compaction model cascade if configured (#232)
546
555
  const compactionModelConfig = lastUser.compactionModel;
547
- const compactionProviderID =
548
- compactionModelConfig && !compactionModelConfig.useSameModel
549
- ? compactionModelConfig.providerID
550
- : model.providerID;
551
- const compactionModelID =
552
- compactionModelConfig && !compactionModelConfig.useSameModel
553
- ? compactionModelConfig.modelID
554
- : model.modelID;
555
- const result = await SessionCompaction.process({
556
- messages: msgs,
557
- parentID: lastUser.id,
558
- abort,
559
- model: {
560
- providerID: compactionProviderID,
561
- modelID: compactionModelID,
562
- },
563
- sessionID,
564
- });
565
- if (result === 'stop') break;
556
+ const cascade = compactionModelConfig?.compactionModels;
557
+
558
+ if (cascade && cascade.length > 0) {
559
+ // Cascade logic: try each model in order (smallest to largest context)
560
+ // Skip models whose context limit is smaller than current used tokens
561
+ // Skip models that hit rate limits (try next)
562
+ const currentTokens = lastFinished
563
+ ? lastFinished.tokens.input +
564
+ lastFinished.tokens.cache.read +
565
+ lastFinished.tokens.output
566
+ : 0;
567
+
568
+ let compactionResult = 'stop';
569
+ for (const entry of cascade) {
570
+ const entryProviderID = entry.useSameModel
571
+ ? model.providerID
572
+ : entry.providerID;
573
+ const entryModelID = entry.useSameModel
574
+ ? model.modelID
575
+ : entry.modelID;
576
+
577
+ // Check if this model's context is large enough for the current tokens
578
+ if (!entry.useSameModel) {
579
+ try {
580
+ const entryModel = await Provider.getModel(
581
+ entryProviderID,
582
+ entryModelID
583
+ );
584
+ const entryContextLimit = entryModel.info?.limit?.context ?? 0;
585
+ if (
586
+ entryContextLimit > 0 &&
587
+ currentTokens > entryContextLimit
588
+ ) {
589
+ log.info(() => ({
590
+ message:
591
+ 'skipping compaction model — context too small for current tokens',
592
+ modelID: entryModelID,
593
+ providerID: entryProviderID,
594
+ contextLimit: entryContextLimit,
595
+ currentTokens,
596
+ }));
597
+ continue;
598
+ }
599
+ } catch {
600
+ log.info(() => ({
601
+ message:
602
+ 'could not resolve compaction cascade model — skipping',
603
+ modelID: entryModelID,
604
+ providerID: entryProviderID,
605
+ }));
606
+ continue;
607
+ }
608
+ }
609
+
610
+ try {
611
+ compactionResult = await SessionCompaction.process({
612
+ messages: msgs,
613
+ parentID: lastUser.id,
614
+ abort,
615
+ model: {
616
+ providerID: entryProviderID,
617
+ modelID: entryModelID,
618
+ },
619
+ sessionID,
620
+ });
621
+ // If compaction succeeded, break the cascade
622
+ break;
623
+ } catch (err) {
624
+ // If rate limited or error, try next model in cascade
625
+ log.warn(() => ({
626
+ message: 'compaction model failed — trying next in cascade',
627
+ modelID: entryModelID,
628
+ providerID: entryProviderID,
629
+ error: err?.message,
630
+ }));
631
+ continue;
632
+ }
633
+ }
634
+ if (compactionResult === 'stop') break;
635
+ } else {
636
+ // Single model fallback (backward compatibility)
637
+ const compactionProviderID =
638
+ compactionModelConfig && !compactionModelConfig.useSameModel
639
+ ? compactionModelConfig.providerID
640
+ : model.providerID;
641
+ const compactionModelID =
642
+ compactionModelConfig && !compactionModelConfig.useSameModel
643
+ ? compactionModelConfig.modelID
644
+ : model.modelID;
645
+ const result = await SessionCompaction.process({
646
+ messages: msgs,
647
+ parentID: lastUser.id,
648
+ abort,
649
+ model: {
650
+ providerID: compactionProviderID,
651
+ modelID: compactionModelID,
652
+ },
653
+ sessionID,
654
+ });
655
+ if (result === 'stop') break;
656
+ }
566
657
  continue;
567
658
  }
568
659