@askalf/dario 3.36.0 → 3.37.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -104,6 +104,7 @@ Already have **Pro + Max** stacked? Pool mode (`dario accounts add work` / `dari
104
104
  - **You want CC's behavioral constraints out of your prompt.** `dario proxy --system-prompt=partial` strips CC's Tone-and-style / Text-output / verbosity / no-comments-by-default bullets and recovers ~1.2–2.8× output capability on open-ended work — empirically without flipping subscription billing (the classifier doesn't read this slot). RLHF refusals on harmful content are unaffected (alignment is in the weights, not the prompt). See [`docs/system-prompt.md`](./docs/system-prompt.md) and the empirical writeup in [`docs/research/system-prompt.md`](./docs/research/system-prompt.md).
105
105
  - **You want dario reachable from inside Claude Code or any MCP client.** `dario subagent install` registers a CC sub-agent for in-session diagnostics ([`docs/sub-agent.md`](./docs/sub-agent.md)). `dario mcp` turns dario into a read-only MCP server ([`docs/mcp-server.md`](./docs/mcp-server.md)).
106
106
  - **You want to actually audit it.** ~12,650 lines of TypeScript across 27 files. Zero runtime dependencies. Credentials at `~/.dario/` with `0600` permissions. `127.0.0.1`-only by default. Every release [SLSA-attested](https://www.npmjs.com/package/@askalf/dario). Nothing phones home. Small enough to read in a weekend.
107
+ - **You want a deep-research tool that runs at $0/mo.** [deepdive](https://github.com/askalf/deepdive) is dario's companion CLI — `npx @askalf/deepdive "your question"`, get a cited Markdown report. Replaces Perplexity Pro ($20/mo), OpenAI Deep Research ($20/mo), Gemini Deep Research ($20/mo) — all of which mark up LLM calls on top of LLM calls. The deep-research workload (50k–200k tokens per question, sustained) is exactly what Max was priced for; deepdive is what uses it for that.
107
108
 
108
109
  ---
109
110
 
@@ -248,14 +248,14 @@ export declare const DEFAULT_MAX_TOKENS = 32000;
248
248
  * Sonnet; pinning to 32k silently truncated its output capacity).
249
249
  */
250
250
  export declare function resolveMaxTokens(flag: number | 'client' | undefined, clientBody: Record<string, unknown>): number;
251
- /** Valid values for the `--effort` flag. `'client'` passes through the client's own `output_config.effort` (falling back to `'high'` if the client didn't send one). dario#87. */
252
- export type EffortValue = 'low' | 'medium' | 'high' | 'xhigh' | 'client';
251
+ /** Valid values for the `--effort` flag. Mirrors CC's `--effort` set as of v2.1.126 (`low|medium|high|xhigh|max`) plus dario's pseudo-value `'client'` for passthrough. `'client'` passes through the client's own `output_config.effort` (falling back to `'high'` if the client didn't send one). dario#87, `'max'` added in dario#190. */
252
+ export type EffortValue = 'low' | 'medium' | 'high' | 'xhigh' | 'max' | 'client';
253
253
  export declare const VALID_EFFORT_VALUES: ReadonlyArray<EffortValue>;
254
254
  /**
255
255
  * Resolve the outbound `output_config.effort` value.
256
256
  *
257
257
  * undefined / 'high' → 'high' (current default, matches CC 2.1.116 wire value)
258
- * 'low' / 'medium' / 'xhigh' → pin to that value
258
+ * 'low' / 'medium' / 'xhigh' / 'max' → pin to that value
259
259
  * 'client' → extract from `clientBody.output_config.effort`; fall back
260
260
  * to 'high' if the client didn't send one or sent a non-string
261
261
  *
@@ -884,12 +884,12 @@ export function resolveMaxTokens(flag, clientBody) {
884
884
  }
885
885
  return flag;
886
886
  }
887
- export const VALID_EFFORT_VALUES = ['low', 'medium', 'high', 'xhigh', 'client'];
887
+ export const VALID_EFFORT_VALUES = ['low', 'medium', 'high', 'xhigh', 'max', 'client'];
888
888
  /**
889
889
  * Resolve the outbound `output_config.effort` value.
890
890
  *
891
891
  * undefined / 'high' → 'high' (current default, matches CC 2.1.116 wire value)
892
- * 'low' / 'medium' / 'xhigh' → pin to that value
892
+ * 'low' / 'medium' / 'xhigh' / 'max' → pin to that value
893
893
  * 'client' → extract from `clientBody.output_config.effort`; fall back
894
894
  * to 'high' if the client didn't send one or sent a non-string
895
895
  *
package/dist/cli.js CHANGED
@@ -957,12 +957,15 @@ async function help() {
957
957
  dario returns 504 "queue-timeout"
958
958
  (default: 60000).
959
959
  Env: DARIO_QUEUE_TIMEOUT_MS. (dario#80)
960
- --effort=<low|medium|high|xhigh|client>
960
+ --effort=<low|medium|high|xhigh|max|client>
961
961
  Override the outbound output_config.effort
962
962
  on non-haiku requests. Default (unset)
963
963
  pins 'high' — matches CC 2.1.116's wire
964
- value. 'client' passes through what the
965
- client sent (falls back to 'high' if none).
964
+ value. 'max' is CC's highest reasoning
965
+ budget (added in CC v2.1.x; verified in
966
+ v2.1.126). 'client' passes through what
967
+ the client sent (falls back to 'high' if
968
+ none).
966
969
  WARNING: non-'high' values may cause
967
970
  Anthropic's classifier to flip requests
968
971
  to 'overage' billing; watch -v logs for
@@ -1060,6 +1063,7 @@ async function shim() {
1060
1063
  const rest = args.slice(1);
1061
1064
  const sepIdx = rest.indexOf('--');
1062
1065
  let verbose = false;
1066
+ let priority = 'normal';
1063
1067
  let head;
1064
1068
  let childArgs;
1065
1069
  if (sepIdx >= 0) {
@@ -1073,14 +1077,23 @@ async function shim() {
1073
1077
  for (const flag of head) {
1074
1078
  if (flag === '-v' || flag === '--verbose')
1075
1079
  verbose = true;
1080
+ else if (flag.startsWith('--priority=')) {
1081
+ const v = flag.slice('--priority='.length);
1082
+ if (v !== 'normal' && v !== 'below-normal' && v !== 'low') {
1083
+ console.error(`--priority: invalid value ${JSON.stringify(v)}. Expected one of: normal, below-normal, low.`);
1084
+ process.exit(1);
1085
+ }
1086
+ priority = v;
1087
+ }
1076
1088
  else {
1077
1089
  console.error(`Unknown shim flag: ${flag}`);
1078
1090
  process.exit(1);
1079
1091
  }
1080
1092
  }
1081
1093
  if (childArgs.length === 0) {
1082
- console.error('Usage: dario shim [-v] -- <command> [args...]');
1094
+ console.error('Usage: dario shim [-v] [--priority=normal|below-normal|low] -- <command> [args...]');
1083
1095
  console.error('Example: dario shim -- claude --print -p "hi"');
1096
+ console.error(' dario shim --priority=below-normal -- claude (recommended on Windows when RDP\'d into the host)');
1084
1097
  process.exit(1);
1085
1098
  }
1086
1099
  const { runShim } = await import('./shim/host.js');
@@ -1089,6 +1102,7 @@ async function shim() {
1089
1102
  command: childArgs[0],
1090
1103
  args: childArgs.slice(1),
1091
1104
  verbose,
1105
+ priority,
1092
1106
  });
1093
1107
  if (verbose) {
1094
1108
  const summary = result.analytics.summary(60);
@@ -31,6 +31,26 @@ interface RelayEvent {
31
31
  claim?: string | null;
32
32
  overageUtil?: number | null;
33
33
  }
34
+ /**
35
+ * Process-priority levels accepted by the shim. Cross-platform via Node's
36
+ * os.setPriority — same name on every OS, different underlying class:
37
+ * - 'normal' : default, no change
38
+ * - 'below-normal' : BELOW_NORMAL_PRIORITY_CLASS on Windows, nice +7 on POSIX
39
+ * - 'low' : IDLE_PRIORITY_CLASS on Windows, nice +19 on POSIX
40
+ *
41
+ * Use case: when the dario user is RDP'd into the same machine that hosts
42
+ * the claude CLI, claude can saturate ~4 cores during heavy tool work and
43
+ * starve the kernel network IO threads. The result is a Windows-specific
44
+ * cascade — RDP TCP socket writes return ERROR_SEM_TIMEOUT, sessions drop,
45
+ * Defender notices the disruption and writes a config-change event ~12s
46
+ * later. Lowering claude's scheduling priority lets the kernel preempt it
47
+ * for IO threads without changing claude's behavior or throughput. Same
48
+ * sustained CPU usage, no more drops. Documented in faq.md.
49
+ *
50
+ * (See dario#xxx — this lands as part of the same investigation that
51
+ * turned up the wider Defender / vmswitch / NIC offload cleanup work.)
52
+ */
53
+ export type ShimPriority = 'normal' | 'below-normal' | 'low';
34
54
  export interface ShimHostOptions {
35
55
  /** Command to spawn (the user's claude binary, or any node-based CC wrapper). */
36
56
  command: string;
@@ -40,6 +60,13 @@ export interface ShimHostOptions {
40
60
  templatePath?: string;
41
61
  /** Print per-event lines to stderr. */
42
62
  verbose?: boolean;
63
+ /**
64
+ * Process priority for the spawned child. Defaults to 'normal' (no change).
65
+ * Set to 'below-normal' when running claude on a machine you're RDP'd into,
66
+ * so kernel network IO threads can preempt the heavy claude workload and
67
+ * the RDP session doesn't drop on every tool burst. See ShimPriority.
68
+ */
69
+ priority?: ShimPriority;
43
70
  /** Optional Analytics sink. If omitted, a fresh instance is created. */
44
71
  analytics?: Analytics;
45
72
  }
package/dist/shim/host.js CHANGED
@@ -19,7 +19,7 @@
19
19
  import { createServer } from 'node:net';
20
20
  import { spawn } from 'node:child_process';
21
21
  import { mkdtempSync, existsSync } from 'node:fs';
22
- import { tmpdir, homedir } from 'node:os';
22
+ import { tmpdir, homedir, setPriority, constants as osConstants } from 'node:os';
23
23
  import { join, dirname } from 'node:path';
24
24
  import { fileURLToPath } from 'node:url';
25
25
  import { Analytics } from './../analytics.js';
@@ -42,6 +42,13 @@ export function locateShimRuntime() {
42
42
  }
43
43
  throw new Error(`shim runtime not found; checked: ${candidates.join(', ')}`);
44
44
  }
45
+ function priorityValue(p) {
46
+ switch (p) {
47
+ case 'normal': return osConstants.priority.PRIORITY_NORMAL;
48
+ case 'below-normal': return osConstants.priority.PRIORITY_BELOW_NORMAL;
49
+ case 'low': return osConstants.priority.PRIORITY_LOW;
50
+ }
51
+ }
45
52
  /**
46
53
  * Pick a socket path: unix domain socket on POSIX, named pipe on Windows.
47
54
  * Both forms are accepted directly by net.createServer / net.connect.
@@ -153,6 +160,26 @@ export async function runShim(opts) {
153
160
  server.close();
154
161
  throw e;
155
162
  }
163
+ // Apply priority best-effort. setPriority can fail if the user lacks the
164
+ // privilege to lower priorities below normal (rare on Windows / Linux for
165
+ // the same user's process, but reported on locked-down corporate setups).
166
+ // We log and continue — priority is a perf optimization, not a correctness
167
+ // requirement. The child runs at default priority if the call fails.
168
+ if (opts.priority && opts.priority !== 'normal') {
169
+ try {
170
+ if (child.pid !== undefined) {
171
+ setPriority(child.pid, priorityValue(opts.priority));
172
+ if (verbose) {
173
+ process.stderr.write(`[dario shim] child PID ${child.pid} priority → ${opts.priority}\n`);
174
+ }
175
+ }
176
+ }
177
+ catch (err) {
178
+ if (verbose) {
179
+ process.stderr.write(`[dario shim] priority set failed (continuing at default): ${err.message}\n`);
180
+ }
181
+ }
182
+ }
156
183
  const exitCode = await new Promise((resolve) => {
157
184
  child.on('exit', (code, signal) => {
158
185
  if (signal)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "3.36.0",
3
+ "version": "3.37.1",
4
4
  "description": "A local LLM router. One endpoint, every provider — Claude subscriptions, OpenAI, OpenRouter, Groq, local LiteLLM, any OpenAI-compat endpoint — your tools don't need to change.",
5
5
  "type": "module",
6
6
  "bin": {