@askalf/dario 3.30.12 → 3.30.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cc-template.d.ts +28 -2
- package/dist/cc-template.js +51 -3
- package/dist/cli.d.ts +7 -0
- package/dist/cli.js +39 -1
- package/dist/proxy.d.ts +10 -0
- package/dist/proxy.js +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -277,7 +277,7 @@ Dario's built-in `TOOL_MAP` carries **~66 schema-verified entries** covering the
|
|
|
277
277
|
| GitHub Copilot | `run_in_terminal`, `insert_edit_into_file`, `semantic_search`, `codebase_search`, `list_dir`, `fetch_webpage` |
|
|
278
278
|
| OpenHands | `execute_bash`, `str_replace_editor` |
|
|
279
279
|
| OpenClaw | `exec`, `process`, `web_search`, `web_fetch`, `browser`, `message` |
|
|
280
|
-
| Hermes | `terminal`, `patch`, `web_extract`, `
|
|
280
|
+
| Hermes Agent (Nous Research) | `terminal`, `process`, `read_file`, `write_file`, `patch`, `search_files`, `web_search`, `web_extract`, `todo` mapped directly. Hermes-specific tools (`browser_*`, `vision_analyze`, `image_generate`, `skill_*`, `memory`, `session_search`, `cronjob`, `send_message`, `ha_*`, `mixture_of_agents`, `delegate_task`, `execute_code`, `text_to_speech`) have no CC equivalent and auto-preserve through the identity detector (`You are Hermes Agent` or `created by Nous Research` in the system prompt flips dario into preserve-tools for Hermes sessions automatically — v3.30.13). Also consider `--max-tokens=client` so Hermes's 64k/128k per-model caps survive dario's outbound pin. |
|
|
281
281
|
|
|
282
282
|
Text-tool clients (Cline / Kilo Code / Roo Code and forks) are auto-detected via system-prompt fingerprint and automatically flipped into preserve-tools mode, because mixing CC's `tools` array with their XML protocol makes the model emit `<function_calls><invoke>` that their parsers can't read. If you run dario specifically for fingerprint fidelity and would rather pick `--preserve-tools` yourself, `--no-auto-detect` (v3.20.1, aka `--no-auto-preserve`) disables the heuristic — explicit operator choice then wins.
|
|
283
283
|
|
package/dist/cc-template.d.ts
CHANGED
|
@@ -104,8 +104,20 @@ export declare function scrubFrameworkIdentifiers(text: string): string;
|
|
|
104
104
|
* names like "Cline" / "Roo" are still present. Tool-protocol
|
|
105
105
|
* markers are scrub-proof on their own.
|
|
106
106
|
*
|
|
107
|
-
* Returns the matched family (`cline` / `kilo` / `roo` / `cline-like`
|
|
108
|
-
* or null when no
|
|
107
|
+
* Returns the matched family (`cline` / `kilo` / `roo` / `cline-like` /
|
|
108
|
+
* `hermes`) or null when no signature is present.
|
|
109
|
+
*
|
|
110
|
+
* Hermes Agent (Nous Research) is a different case from the Cline family —
|
|
111
|
+
* it uses the standard Anthropic JSON tool-use protocol (not XML). But it
|
|
112
|
+
* ships ~40 tools, 15+ of which have no CC equivalent (browser_*, vision_*,
|
|
113
|
+
* image_generate, text_to_speech, skills_*, memory, session_search,
|
|
114
|
+
* cronjob, send_message, ha_*, mixture_of_agents, delegate_task, …). In
|
|
115
|
+
* default mode dario distributes unmapped tools onto random CC slots which
|
|
116
|
+
* silently misroutes them. preserve-tools is the correct default for
|
|
117
|
+
* Hermes for the same outcome as Cline (client's tool schema passes
|
|
118
|
+
* through untouched) even though the reason is different. The function
|
|
119
|
+
* conflates both cases because the downstream dispatch is identical.
|
|
120
|
+
* Reported via @vmvarg4 on X after the v3.30.5 marketing push.
|
|
109
121
|
*/
|
|
110
122
|
export declare function detectTextToolClient(systemText: string): string | null;
|
|
111
123
|
/**
|
|
@@ -170,6 +182,19 @@ export interface RequestContext {
|
|
|
170
182
|
* Replaces the entire request structure — tools, fields, ordering — with
|
|
171
183
|
* what real CC sends. Only the conversation content is preserved.
|
|
172
184
|
*/
|
|
185
|
+
/** Default outbound max_tokens when neither a passthrough nor an explicit value is set. Matches CC 2.1.116's wire default. */
|
|
186
|
+
export declare const DEFAULT_MAX_TOKENS = 32000;
|
|
187
|
+
/**
|
|
188
|
+
* Resolve the outbound `max_tokens` value.
|
|
189
|
+
*
|
|
190
|
+
* undefined / 32000 etc. → number pins outbound (preserves dario's CC-wire default)
|
|
191
|
+
* 'client' → extract from `clientBody.max_tokens`; fall back to DEFAULT_MAX_TOKENS
|
|
192
|
+
* when the client didn't send a value or sent something non-numeric
|
|
193
|
+
*
|
|
194
|
+
* dario#88 (Hermes compat — Hermes requests up to 128k for Opus 4.7, 64k for
|
|
195
|
+
* Sonnet; pinning to 32k silently truncated its output capacity).
|
|
196
|
+
*/
|
|
197
|
+
export declare function resolveMaxTokens(flag: number | 'client' | undefined, clientBody: Record<string, unknown>): number;
|
|
173
198
|
/** Valid values for the `--effort` flag. `'client'` passes through the client's own `output_config.effort` (falling back to `'high'` if the client didn't send one). dario#87. */
|
|
174
199
|
export type EffortValue = 'low' | 'medium' | 'high' | 'xhigh' | 'client';
|
|
175
200
|
export declare const VALID_EFFORT_VALUES: ReadonlyArray<EffortValue>;
|
|
@@ -195,6 +220,7 @@ export declare function buildCCRequest(clientBody: Record<string, unknown>, bill
|
|
|
195
220
|
hybridTools?: boolean;
|
|
196
221
|
noAutoDetect?: boolean;
|
|
197
222
|
effort?: EffortValue;
|
|
223
|
+
maxTokens?: number | 'client';
|
|
198
224
|
}): {
|
|
199
225
|
body: Record<string, unknown>;
|
|
200
226
|
toolMap: Map<string, ToolMapping>;
|
package/dist/cc-template.js
CHANGED
|
@@ -213,8 +213,20 @@ export function scrubFrameworkIdentifiers(text) {
|
|
|
213
213
|
* names like "Cline" / "Roo" are still present. Tool-protocol
|
|
214
214
|
* markers are scrub-proof on their own.
|
|
215
215
|
*
|
|
216
|
-
* Returns the matched family (`cline` / `kilo` / `roo` / `cline-like`
|
|
217
|
-
* or null when no
|
|
216
|
+
* Returns the matched family (`cline` / `kilo` / `roo` / `cline-like` /
|
|
217
|
+
* `hermes`) or null when no signature is present.
|
|
218
|
+
*
|
|
219
|
+
* Hermes Agent (Nous Research) is a different case from the Cline family —
|
|
220
|
+
* it uses the standard Anthropic JSON tool-use protocol (not XML). But it
|
|
221
|
+
* ships ~40 tools, 15+ of which have no CC equivalent (browser_*, vision_*,
|
|
222
|
+
* image_generate, text_to_speech, skills_*, memory, session_search,
|
|
223
|
+
* cronjob, send_message, ha_*, mixture_of_agents, delegate_task, …). In
|
|
224
|
+
* default mode dario distributes unmapped tools onto random CC slots which
|
|
225
|
+
* silently misroutes them. preserve-tools is the correct default for
|
|
226
|
+
* Hermes for the same outcome as Cline (client's tool schema passes
|
|
227
|
+
* through untouched) even though the reason is different. The function
|
|
228
|
+
* conflates both cases because the downstream dispatch is identical.
|
|
229
|
+
* Reported via @vmvarg4 on X after the v3.30.5 marketing push.
|
|
218
230
|
*/
|
|
219
231
|
export function detectTextToolClient(systemText) {
|
|
220
232
|
if (!systemText)
|
|
@@ -225,6 +237,14 @@ export function detectTextToolClient(systemText) {
|
|
|
225
237
|
return 'kilo';
|
|
226
238
|
if (/\bYou are Roo\b/.test(systemText))
|
|
227
239
|
return 'roo';
|
|
240
|
+
// Hermes Agent (Nous Research) — canonical opener from agent/prompt_builder.py.
|
|
241
|
+
// Also accept "created by Nous Research" as a secondary anchor since
|
|
242
|
+
// downstream forks may edit the leading identity line but tend to keep
|
|
243
|
+
// attribution intact.
|
|
244
|
+
if (/\bYou are Hermes Agent\b/.test(systemText))
|
|
245
|
+
return 'hermes';
|
|
246
|
+
if (/\bcreated by Nous Research\b/.test(systemText))
|
|
247
|
+
return 'hermes';
|
|
228
248
|
// Protocol-signature fallback — unique to the Cline family and its
|
|
229
249
|
// forks; survives a forked system prompt that edited the identity
|
|
230
250
|
// string out but kept the tool protocol intact.
|
|
@@ -708,6 +728,34 @@ const TOOL_MAP = {
|
|
|
708
728
|
},
|
|
709
729
|
exit_worktree: { ccTool: 'ExitWorktree' },
|
|
710
730
|
};
|
|
731
|
+
/**
|
|
732
|
+
* Build a CC-template request from a client request.
|
|
733
|
+
* Replaces the entire request structure — tools, fields, ordering — with
|
|
734
|
+
* what real CC sends. Only the conversation content is preserved.
|
|
735
|
+
*/
|
|
736
|
+
/** Default outbound max_tokens when neither a passthrough nor an explicit value is set. Matches CC 2.1.116's wire default. */
|
|
737
|
+
export const DEFAULT_MAX_TOKENS = 32000;
|
|
738
|
+
/**
|
|
739
|
+
* Resolve the outbound `max_tokens` value.
|
|
740
|
+
*
|
|
741
|
+
* undefined / 32000 etc. → number pins outbound (preserves dario's CC-wire default)
|
|
742
|
+
* 'client' → extract from `clientBody.max_tokens`; fall back to DEFAULT_MAX_TOKENS
|
|
743
|
+
* when the client didn't send a value or sent something non-numeric
|
|
744
|
+
*
|
|
745
|
+
* dario#88 (Hermes compat — Hermes requests up to 128k for Opus 4.7, 64k for
|
|
746
|
+
* Sonnet; pinning to 32k silently truncated its output capacity).
|
|
747
|
+
*/
|
|
748
|
+
export function resolveMaxTokens(flag, clientBody) {
|
|
749
|
+
if (flag === undefined)
|
|
750
|
+
return DEFAULT_MAX_TOKENS;
|
|
751
|
+
if (flag === 'client') {
|
|
752
|
+
const clientMT = clientBody.max_tokens;
|
|
753
|
+
if (typeof clientMT === 'number' && Number.isFinite(clientMT) && clientMT > 0)
|
|
754
|
+
return Math.floor(clientMT);
|
|
755
|
+
return DEFAULT_MAX_TOKENS;
|
|
756
|
+
}
|
|
757
|
+
return flag;
|
|
758
|
+
}
|
|
711
759
|
export const VALID_EFFORT_VALUES = ['low', 'medium', 'high', 'xhigh', 'client'];
|
|
712
760
|
/**
|
|
713
761
|
* Resolve the outbound `output_config.effort` value.
|
|
@@ -992,7 +1040,7 @@ export function buildCCRequest(clientBody, billingTag, cacheControl, identity, o
|
|
|
992
1040
|
session_id: identity.sessionId,
|
|
993
1041
|
}),
|
|
994
1042
|
};
|
|
995
|
-
ccRequest.max_tokens =
|
|
1043
|
+
ccRequest.max_tokens = resolveMaxTokens(opts.maxTokens, clientBody);
|
|
996
1044
|
// Model-specific fields — order: thinking, context_management, output_config
|
|
997
1045
|
if (!isHaiku) {
|
|
998
1046
|
ccRequest.thinking = { type: 'adaptive' };
|
package/dist/cli.d.ts
CHANGED
|
@@ -10,6 +10,13 @@
|
|
|
10
10
|
* dario logout — Remove saved credentials
|
|
11
11
|
*/
|
|
12
12
|
import { type EffortValue } from './cc-template.js';
|
|
13
|
+
/**
|
|
14
|
+
* Parse `--max-tokens=<N|client>` + `DARIO_MAX_TOKENS` env (dario#88).
|
|
15
|
+
* Numeric values pin; `client` (case-insensitive) = passthrough client's
|
|
16
|
+
* max_tokens; unset = dario's default pin applies. Invalid values exit
|
|
17
|
+
* non-zero with guidance. Exported for tests.
|
|
18
|
+
*/
|
|
19
|
+
export declare function resolveMaxTokensFlag(args: string[], env: string | undefined): number | 'client' | undefined;
|
|
13
20
|
/**
|
|
14
21
|
* Parse the `--effort` flag + `DARIO_EFFORT` env. Validates against the
|
|
15
22
|
* allowed set; unrecognised values cause a non-zero exit with the list of
|
package/dist/cli.js
CHANGED
|
@@ -271,6 +271,14 @@ async function proxy() {
|
|
|
271
271
|
// should watch the `representative-claim` response header via -v logs
|
|
272
272
|
// and revert to default if subscription billing breaks.
|
|
273
273
|
const effort = resolveEffortFlag(args, process.env['DARIO_EFFORT']);
|
|
274
|
+
// --max-tokens=<N|client> — override outbound max_tokens (dario#88,
|
|
275
|
+
// Hermes compat). Default unset pins 32000 (CC 2.1.116's wire default).
|
|
276
|
+
// 'client' passes through whatever the client sent (Hermes requests up
|
|
277
|
+
// to 128k for Opus 4.7, 64k for Sonnet — default pin silently truncates
|
|
278
|
+
// their output capacity). Anthropic enforces a per-model ceiling on
|
|
279
|
+
// the server side, so passing through a too-high value returns a clean
|
|
280
|
+
// 400 rather than silently accepting beyond-model-max.
|
|
281
|
+
const maxTokens = resolveMaxTokensFlag(args, process.env['DARIO_MAX_TOKENS']);
|
|
274
282
|
// Non-loopback bind without DARIO_API_KEY turns dario into an open
|
|
275
283
|
// OAuth-subscription relay for anyone on the reachable network. Refuse
|
|
276
284
|
// to start rather than rely on the operator to read the startup banner.
|
|
@@ -290,7 +298,27 @@ async function proxy() {
|
|
|
290
298
|
console.error(`[dario] Override (not recommended): pass --unsafe-no-auth if you have out-of-band network controls and accept the risk.`);
|
|
291
299
|
process.exit(1);
|
|
292
300
|
}
|
|
293
|
-
await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort });
|
|
301
|
+
await startProxy({ port, host, verbose, verboseBodies, model, passthrough, preserveTools, hybridTools, noAutoDetect, strictTls, pacingMinMs, pacingJitterMs, drainOnClose, sessionIdleRotateMs, sessionRotateJitterMs, sessionMaxAgeMs, sessionPerClient, preserveOrchestrationTags, noLiveCapture, strictTemplate, maxConcurrent, maxQueued, queueTimeoutMs, effort, maxTokens });
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Parse `--max-tokens=<N|client>` + `DARIO_MAX_TOKENS` env (dario#88).
|
|
305
|
+
* Numeric values pin; `client` (case-insensitive) = passthrough client's
|
|
306
|
+
* max_tokens; unset = dario's default pin applies. Invalid values exit
|
|
307
|
+
* non-zero with guidance. Exported for tests.
|
|
308
|
+
*/
|
|
309
|
+
export function resolveMaxTokensFlag(args, env) {
|
|
310
|
+
const withValue = args.find(a => a.startsWith('--max-tokens='));
|
|
311
|
+
const raw = withValue ? withValue.slice('--max-tokens='.length) : env;
|
|
312
|
+
if (raw === undefined || raw === '')
|
|
313
|
+
return undefined;
|
|
314
|
+
const normalized = raw.trim();
|
|
315
|
+
if (normalized.toLowerCase() === 'client')
|
|
316
|
+
return 'client';
|
|
317
|
+
const n = Number.parseInt(normalized, 10);
|
|
318
|
+
if (Number.isFinite(n) && n > 0)
|
|
319
|
+
return n;
|
|
320
|
+
console.error(`[dario] Invalid --max-tokens value: ${JSON.stringify(raw)}. Must be a positive integer or the literal "client".`);
|
|
321
|
+
process.exit(1);
|
|
294
322
|
}
|
|
295
323
|
/**
|
|
296
324
|
* Parse the `--effort` flag + `DARIO_EFFORT` env. Validates against the
|
|
@@ -722,6 +750,16 @@ async function help() {
|
|
|
722
750
|
to 'overage' billing; watch -v logs for
|
|
723
751
|
representative-claim changes.
|
|
724
752
|
Env: DARIO_EFFORT. (dario#87)
|
|
753
|
+
--max-tokens=<N|client> Override outbound max_tokens. Default
|
|
754
|
+
(unset) pins 32000 (CC 2.1.116 wire default).
|
|
755
|
+
Set a number to pin that value; set 'client'
|
|
756
|
+
to pass through the client's requested
|
|
757
|
+
max_tokens (Hermes requests 64k–128k; the
|
|
758
|
+
default pin silently truncates its output
|
|
759
|
+
capacity). Anthropic enforces the per-model
|
|
760
|
+
ceiling server-side, so too-high values
|
|
761
|
+
return a clean 400.
|
|
762
|
+
Env: DARIO_MAX_TOKENS. (dario#88)
|
|
725
763
|
--port=PORT Port to listen on (default: 3456)
|
|
726
764
|
--host=ADDRESS Address to bind to (default: 127.0.0.1)
|
|
727
765
|
Use 0.0.0.0 for LAN; see README for DARIO_API_KEY
|
package/dist/proxy.d.ts
CHANGED
|
@@ -80,6 +80,16 @@ interface ProxyOptions {
|
|
|
80
80
|
* dario#87.
|
|
81
81
|
*/
|
|
82
82
|
effort?: EffortValue;
|
|
83
|
+
/**
|
|
84
|
+
* Override the outbound `max_tokens` value. Default (undefined) pins
|
|
85
|
+
* `32000` — CC 2.1.116's wire default, below Anthropic's per-model
|
|
86
|
+
* limits. A number pins a specific value. `'client'` passes through
|
|
87
|
+
* whatever the client requested (up to Anthropic's per-model ceiling
|
|
88
|
+
* on the server side). Hermes (and other agents) request up to 128k
|
|
89
|
+
* for Opus and 64k for Sonnet; the default 32k pin silently truncates
|
|
90
|
+
* their output capacity. dario#88 (Hermes compat).
|
|
91
|
+
*/
|
|
92
|
+
maxTokens?: number | 'client';
|
|
83
93
|
}
|
|
84
94
|
export declare function sanitizeError(err: unknown): string;
|
|
85
95
|
/**
|
package/dist/proxy.js
CHANGED
|
@@ -979,6 +979,7 @@ export async function startProxy(opts = {}) {
|
|
|
979
979
|
hybridTools: opts.hybridTools ?? false,
|
|
980
980
|
noAutoDetect: opts.noAutoDetect ?? false,
|
|
981
981
|
effort: opts.effort,
|
|
982
|
+
maxTokens: opts.maxTokens,
|
|
982
983
|
});
|
|
983
984
|
// Log the auto-preserve-tools switch once per text-tool
|
|
984
985
|
// client family. Skip when the operator already opted into
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/dario",
|
|
3
|
-
"version": "3.30.
|
|
3
|
+
"version": "3.30.13",
|
|
4
4
|
"description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|