@askalf/dario 3.30.12 → 3.30.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -277,7 +277,7 @@ Dario's built-in `TOOL_MAP` carries **~66 schema-verified entries** covering the
277
277
  | GitHub Copilot | `run_in_terminal`, `insert_edit_into_file`, `semantic_search`, `codebase_search`, `list_dir`, `fetch_webpage` |
278
278
  | OpenHands | `execute_bash`, `str_replace_editor` |
279
279
  | OpenClaw | `exec`, `process`, `web_search`, `web_fetch`, `browser`, `message` |
280
- | Hermes | `terminal`, `patch`, `web_extract`, `clarify` |
280
+ | Hermes Agent (Nous Research) | `terminal`, `process`, `read_file`, `write_file`, `patch`, `search_files`, `web_search`, `web_extract`, `todo` mapped directly. Hermes-specific tools (`browser_*`, `vision_analyze`, `image_generate`, `skill_*`, `memory`, `session_search`, `cronjob`, `send_message`, `ha_*`, `mixture_of_agents`, `delegate_task`, `execute_code`, `text_to_speech`) have no CC equivalent and auto-preserve through the identity detector (`You are Hermes Agent` or `created by Nous Research` in the system prompt flips dario into preserve-tools for Hermes sessions automatically — v3.30.13). Also consider `--max-tokens=client` so Hermes's 64k/128k per-model caps survive dario's outbound pin. |
281
281
 
282
282
  Text-tool clients (Cline / Kilo Code / Roo Code and forks) are auto-detected via system-prompt fingerprint and automatically flipped into preserve-tools mode, because mixing CC's `tools` array with their XML protocol makes the model emit `<function_calls><invoke>` that their parsers can't read. If you run dario specifically for fingerprint fidelity and would rather pick `--preserve-tools` yourself, `--no-auto-detect` (v3.20.1, aka `--no-auto-preserve`) disables the heuristic — explicit operator choice then wins.
283
283
 
@@ -104,8 +104,20 @@ export declare function scrubFrameworkIdentifiers(text: string): string;
104
104
  * names like "Cline" / "Roo" are still present. Tool-protocol
105
105
  * markers are scrub-proof on their own.
106
106
  *
107
- * Returns the matched family (`cline` / `kilo` / `roo` / `cline-like`)
108
- * or null when no text-tool protocol signature is present.
107
+ * Returns the matched family (`cline` / `kilo` / `roo` / `cline-like` /
108
+ * `hermes`) or null when no signature is present.
109
+ *
110
+ * Hermes Agent (Nous Research) is a different case from the Cline family —
111
+ * it uses the standard Anthropic JSON tool-use protocol (not XML). But it
112
+ * ships ~40 tools, 15+ of which have no CC equivalent (browser_*, vision_*,
113
+ * image_generate, text_to_speech, skills_*, memory, session_search,
114
+ * cronjob, send_message, ha_*, mixture_of_agents, delegate_task, …). In
115
+ * default mode dario distributes unmapped tools onto random CC slots which
116
+ * silently misroutes them. preserve-tools is the correct default for
117
+ * Hermes for the same outcome as Cline (client's tool schema passes
118
+ * through untouched) even though the reason is different. The function
119
+ * conflates both cases because the downstream dispatch is identical.
120
+ * Reported via @vmvarg4 on X after the v3.30.5 marketing push.
109
121
  */
110
122
  export declare function detectTextToolClient(systemText: string): string | null;
111
123
  /**
@@ -170,6 +182,19 @@ export interface RequestContext {
170
182
  * Replaces the entire request structure — tools, fields, ordering — with
171
183
  * what real CC sends. Only the conversation content is preserved.
172
184
  */
185
+ /** Default outbound max_tokens when neither a passthrough nor an explicit value is set. Matches CC 2.1.116's wire default. */
186
+ export declare const DEFAULT_MAX_TOKENS = 32000;
187
+ /**
188
+ * Resolve the outbound `max_tokens` value.
189
+ *
190
+ * undefined / 32000 etc. → number pins outbound (preserves dario's CC-wire default)
191
+ * 'client' → extract from `clientBody.max_tokens`; fall back to DEFAULT_MAX_TOKENS
192
+ * when the client didn't send a value or sent something non-numeric
193
+ *
194
+ * dario#88 (Hermes compat — Hermes requests up to 128k for Opus 4.7, 64k for
195
+ * Sonnet; pinning to 32k silently truncated its output capacity).
196
+ */
197
+ export declare function resolveMaxTokens(flag: number | 'client' | undefined, clientBody: Record<string, unknown>): number;
173
198
  /** Valid values for the `--effort` flag. `'client'` passes through the client's own `output_config.effort` (falling back to `'high'` if the client didn't send one). dario#87. */
174
199
  export type EffortValue = 'low' | 'medium' | 'high' | 'xhigh' | 'client';
175
200
  export declare const VALID_EFFORT_VALUES: ReadonlyArray<EffortValue>;
@@ -195,6 +220,7 @@ export declare function buildCCRequest(clientBody: Record<string, unknown>, bill
195
220
  hybridTools?: boolean;
196
221
  noAutoDetect?: boolean;
197
222
  effort?: EffortValue;
223
+ maxTokens?: number | 'client';
198
224
  }): {
199
225
  body: Record<string, unknown>;
200
226
  toolMap: Map<string, ToolMapping>;
@@ -213,8 +213,20 @@ export function scrubFrameworkIdentifiers(text) {
213
213
  * names like "Cline" / "Roo" are still present. Tool-protocol
214
214
  * markers are scrub-proof on their own.
215
215
  *
216
- * Returns the matched family (`cline` / `kilo` / `roo` / `cline-like`)
217
- * or null when no text-tool protocol signature is present.
216
+ * Returns the matched family (`cline` / `kilo` / `roo` / `cline-like` /
217
+ * `hermes`) or null when no signature is present.
218
+ *
219
+ * Hermes Agent (Nous Research) is a different case from the Cline family —
220
+ * it uses the standard Anthropic JSON tool-use protocol (not XML). But it
221
+ * ships ~40 tools, 15+ of which have no CC equivalent (browser_*, vision_*,
222
+ * image_generate, text_to_speech, skills_*, memory, session_search,
223
+ * cronjob, send_message, ha_*, mixture_of_agents, delegate_task, …). In
224
+ * default mode dario distributes unmapped tools onto random CC slots which
225
+ * silently misroutes them. preserve-tools is the correct default for
226
+ * Hermes for the same outcome as Cline (client's tool schema passes
227
+ * through untouched) even though the reason is different. The function
228
+ * conflates both cases because the downstream dispatch is identical.
229
+ * Reported via @vmvarg4 on X after the v3.30.5 marketing push.
218
230
  */
219
231
  export function detectTextToolClient(systemText) {
220
232
  if (!systemText)
@@ -225,6 +237,14 @@ export function detectTextToolClient(systemText) {
225
237
  return 'kilo';
226
238
  if (/\bYou are Roo\b/.test(systemText))
227
239
  return 'roo';
240
+ // Hermes Agent (Nous Research) — canonical opener from agent/prompt_builder.py.
241
+ // Also accept "created by Nous Research" as a secondary anchor since
242
+ // downstream forks may edit the leading identity line but tend to keep
243
+ // attribution intact.
244
+ if (/\bYou are Hermes Agent\b/.test(systemText))
245
+ return 'hermes';
246
+ if (/\bcreated by Nous Research\b/.test(systemText))
247
+ return 'hermes';
228
248
  // Protocol-signature fallback — unique to the Cline family and its
229
249
  // forks; survives a forked system prompt that edited the identity
230
250
  // string out but kept the tool protocol intact.
@@ -708,6 +728,34 @@ const TOOL_MAP = {
708
728
  },
709
729
  exit_worktree: { ccTool: 'ExitWorktree' },
710
730
  };
731
+ /**
732
+ * Build a CC-template request from a client request.
733
+ * Replaces the entire request structure — tools, fields, ordering — with
734
+ * what real CC sends. Only the conversation content is preserved.
735
+ */
736
+ /** Default outbound max_tokens when neither a passthrough nor an explicit value is set. Matches CC 2.1.116's wire default. */
737
+ export const DEFAULT_MAX_TOKENS = 32000;
738
+ /**
739
+ * Resolve the outbound `max_tokens` value.
740
+ *
741
+ * undefined / 32000 etc. → number pins outbound (preserves dario's CC-wire default)
742
+ * 'client' → extract from `clientBody.max_tokens`; fall back to DEFAULT_MAX_TOKENS
743
+ * when the client didn't send a value or sent something non-numeric
744
+ *
745
+ * dario#88 (Hermes compat — Hermes requests up to 128k for Opus 4.7, 64k for
746
+ * Sonnet; pinning to 32k silently truncated its output capacity).
747
+ */
748
+ export function resolveMaxTokens(flag, clientBody) {
749
+ if (flag === undefined)
750
+ return DEFAULT_MAX_TOKENS;
751
+ if (flag === 'client') {
752
+ const clientMT = clientBody.max_tokens;
753
+ if (typeof clientMT === 'number' && Number.isFinite(clientMT) && clientMT > 0)
754
+ return Math.floor(clientMT);
755
+ return DEFAULT_MAX_TOKENS;
756
+ }
757
+ return flag;
758
+ }
711
759
  export const VALID_EFFORT_VALUES = ['low', 'medium', 'high', 'xhigh', 'client'];
712
760
  /**
713
761
  * Resolve the outbound `output_config.effort` value.
@@ -992,7 +1040,7 @@ export function buildCCRequest(clientBody, billingTag, cacheControl, identity, o
992
1040
  session_id: identity.sessionId,
993
1041
  }),
994
1042
  };
995
- ccRequest.max_tokens = 32000;
1043
+ ccRequest.max_tokens = resolveMaxTokens(opts.maxTokens, clientBody);
996
1044
  // Model-specific fields — order: thinking, context_management, output_config
997
1045
  if (!isHaiku) {
998
1046
  ccRequest.thinking = { type: 'adaptive' };
package/dist/cli.d.ts CHANGED
@@ -10,6 +10,13 @@
10
10
  * dario logout — Remove saved credentials
11
11
  */
12
12
  import { type EffortValue } from './cc-template.js';
13
+ /**
14
+ * Parse `--max-tokens=<N|client>` + `DARIO_MAX_TOKENS` env (dario#88).
15
+ * Numeric values pin; `client` (case-insensitive) = passthrough client's
16
+ * max_tokens; unset = dario's default pin applies. Invalid values exit
17
+ * non-zero with guidance. Exported for tests.
18
+ */
19
+ export declare function resolveMaxTokensFlag(args: string[], env: string | undefined): number | 'client' | undefined;
13
20
  /**
14
21
  * Parse the `--effort` flag + `DARIO_EFFORT` env. Validates against the
15
22
  * allowed set; unrecognised values cause a non-zero exit with the list of
package/dist/cli.js CHANGED
@@ -271,6 +271,14 @@ async function proxy() {
271
271
  // should watch the `representative-claim` response header via -v logs
272
272
  // and revert to default if subscription billing breaks.
273
273
  const effort = resolveEffortFlag(args, process.env['DARIO_EFFORT']);
274
+ // --max-tokens=<N|client> — override outbound max_tokens (dario#88,
275
+ // Hermes compat). Default unset pins 32000 (CC 2.1.116's wire default).
276
+ // 'client' passes through whatever the client sent (Hermes requests up
277
+ // to 128k for Opus 4.7, 64k for Sonnet — default pin silently truncates
278
+ // their output capacity). Anthropic enforces a per-model ceiling on
279
+ // the server side, so passing through a too-high value returns a clean
280
+ // 400 rather than silently accepting beyond-model-max.
281
+ const maxTokens = resolveMaxTokensFlag(args, process.env['DARIO_MAX_TOKENS']);
274
282
  // Non-loopback bind without DARIO_API_KEY turns dario into an open
275
283
  // OAuth-subscription relay for anyone on the reachable network. Refuse
276
284
  // to start rather than rely on the operator to read the startup banner.
@@ -290,7 +298,27 @@ async function proxy() {
290
298
  console.error(`[dario] Override (not recommended): pass --unsafe-no-auth if you have out-of-band network controls and accept the risk.`);
291
299
  process.exit(1);
292
300
  }
293
- await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort });
301
+ await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort, maxTokens });
302
+ }
303
+ /**
304
+ * Parse `--max-tokens=<N|client>` + `DARIO_MAX_TOKENS` env (dario#88).
305
+ * Numeric values pin; `client` (case-insensitive) = passthrough client's
306
+ * max_tokens; unset = dario's default pin applies. Invalid values exit
307
+ * non-zero with guidance. Exported for tests.
308
+ */
309
+ export function resolveMaxTokensFlag(args, env) {
310
+ const withValue = args.find(a => a.startsWith('--max-tokens='));
311
+ const raw = withValue ? withValue.slice('--max-tokens='.length) : env;
312
+ if (raw === undefined || raw === '')
313
+ return undefined;
314
+ const normalized = raw.trim();
315
+ if (normalized.toLowerCase() === 'client')
316
+ return 'client';
317
+ const n = Number.parseInt(normalized, 10);
318
+ if (Number.isFinite(n) && n > 0)
319
+ return n;
320
+ console.error(`[dario] Invalid --max-tokens value: ${JSON.stringify(raw)}. Must be a positive integer or the literal "client".`);
321
+ process.exit(1);
294
322
  }
295
323
  /**
296
324
  * Parse the `--effort` flag + `DARIO_EFFORT` env. Validates against the
@@ -722,6 +750,16 @@ async function help() {
722
750
  to 'overage' billing; watch -v logs for
723
751
  representative-claim changes.
724
752
  Env: DARIO_EFFORT. (dario#87)
753
+ --max-tokens=<N|client> Override outbound max_tokens. Default
754
+ (unset) pins 32000 (CC 2.1.116 wire default).
755
+ Set a number to pin that value; set 'client'
756
+ to pass through the client's requested
757
+ max_tokens (Hermes requests 64k–128k; the
758
+ default pin silently truncates its output
759
+ capacity). Anthropic enforces the per-model
760
+ ceiling server-side, so too-high values
761
+ return a clean 400.
762
+ Env: DARIO_MAX_TOKENS. (dario#88)
725
763
  --port=PORT Port to listen on (default: 3456)
726
764
  --host=ADDRESS Address to bind to (default: 127.0.0.1)
727
765
  Use 0.0.0.0 for LAN; see README for DARIO_API_KEY
package/dist/proxy.d.ts CHANGED
@@ -80,6 +80,16 @@ interface ProxyOptions {
80
80
  * dario#87.
81
81
  */
82
82
  effort?: EffortValue;
83
+ /**
84
+ * Override the outbound `max_tokens` value. Default (undefined) pins
85
+ * `32000` — CC 2.1.116's wire default, below Anthropic's per-model
86
+ * limits. A number pins a specific value. `'client'` passes through
87
+ * whatever the client requested (up to Anthropic's per-model ceiling
88
+ * on the server side). Hermes (and other agents) request up to 128k
89
+ * for Opus and 64k for Sonnet; the default 32k pin silently truncates
90
+ * their output capacity. dario#88 (Hermes compat).
91
+ */
92
+ maxTokens?: number | 'client';
83
93
  }
84
94
  export declare function sanitizeError(err: unknown): string;
85
95
  /**
package/dist/proxy.js CHANGED
@@ -979,6 +979,7 @@ export async function startProxy(opts = {}) {
979
979
  hybridTools: opts.hybridTools ?? false,
980
980
  noAutoDetect: opts.noAutoDetect ?? false,
981
981
  effort: opts.effort,
982
+ maxTokens: opts.maxTokens,
982
983
  });
983
984
  // Log the auto-preserve-tools switch once per text-tool
984
985
  // client family. Skip when the operator already opted into
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.30.12",
3
+ "version": "3.30.13",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {