@vellumai/assistant 0.10.0-dev.202606232139.0a4341a → 0.10.0-dev.202606232234.a0ec2ee

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.10.0-dev.202606232139.0a4341a",
3
+ "version": "0.10.0-dev.202606232234.a0ec2ee",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "exports": {
@@ -745,7 +745,11 @@ describe("loadConfig startup behavior", () => {
745
745
  );
746
746
  expect(raw.llm.profiles.frontier.provider).toBe("anthropic");
747
747
  expect(raw.llm.profiles.frontier.model).toBe("claude-opus-4-8");
748
- expect(raw.llm.profiles["cost-optimized"].provider).toBe("anthropic");
748
+ // Speed is served by DeepSeek V4 Flash on Fireworks.
749
+ expect(raw.llm.profiles["cost-optimized"].provider).toBe("fireworks");
750
+ expect(raw.llm.profiles["cost-optimized"].model).toBe(
751
+ "accounts/fireworks/models/deepseek-v4-flash",
752
+ );
749
753
  });
750
754
 
751
755
  test("off-platform managed profiles are overwritten on every boot", () => {
@@ -88,8 +88,15 @@ mock.module("../../../util/logger.js", () => ({
88
88
  }));
89
89
 
90
90
  mock.module("../../lib/cache-fs.js", () => ({
91
- readFileSync: (path: string, encoding?: BufferEncoding) => {
91
+ readFileSync: (path: string | number, encoding?: BufferEncoding) => {
92
+ // Stdin must be read via fd 0, not by reopening "/dev/stdin": a spawned
93
+ // subprocess whose stdin is a pipe (Bun.spawn stdin:"pipe") cannot reopen
94
+ // its read-end by path — open("/dev/stdin") fails ENXIO. Throwing here on
95
+ // the path makes any regression to path-based reading fail loudly.
92
96
  if (path === "/dev/stdin") {
97
+ throw new Error("ENXIO: no such device or address, open '/dev/stdin'");
98
+ }
99
+ if (path === 0) {
93
100
  if (mockStdinContent === null) {
94
101
  throw new Error("EAGAIN: resource temporarily unavailable");
95
102
  }
@@ -17,6 +17,9 @@ import { log } from "../logger.js";
17
17
  /** Warn (stderr) when a raw payload exceeds this byte count. */
18
18
  const MAX_PAYLOAD_BYTES = 1_000_000; // 1 MB
19
19
 
20
+ /** Standard input file descriptor. */
21
+ const STDIN_FD = 0;
22
+
20
23
  // ── TTL parsing ───────────────────────────────────────────────────────
21
24
 
22
25
  const TTL_PATTERN = /^(\d+(?:\.\d+)?)\s*(ms|s|m|h)$/;
@@ -99,6 +102,11 @@ function parseJsonPayload(raw: string, source: string): unknown {
99
102
  * Read JSON payload from stdin when piped. Throws when stdin is a TTY
100
103
  * (no piped input) or when the input is empty/invalid JSON, so the CLI
101
104
  * can surface actionable parse errors.
105
+ *
106
+ * Reads file descriptor 0 directly rather than reopening the `/dev/stdin`
107
+ * path. When the caller is a spawned subprocess whose stdin is a pipe (e.g.
108
+ * `Bun.spawn(..., { stdin: "pipe" })`), `open("/dev/stdin")` fails with ENXIO
109
+ * because a pipe read-end cannot be reopened by path; the fd is readable.
102
110
  */
103
111
  function readPayloadFromStdin(): unknown {
104
112
  if (process.stdin.isTTY) {
@@ -111,7 +119,7 @@ function readPayloadFromStdin(): unknown {
111
119
 
112
120
  let raw: string;
113
121
  try {
114
- raw = readFileSync("/dev/stdin", "utf-8");
122
+ raw = readFileSync(STDIN_FD, "utf-8");
115
123
  } catch (err) {
116
124
  throw new Error(
117
125
  `Failed to read stdin: ${err instanceof Error ? err.message : String(err)}.\n` +
@@ -173,10 +181,9 @@ export function registerCacheCommand(program: Command): void {
173
181
  transport: "ipc",
174
182
  description: "Interact with the assistant's in-memory key/value cache",
175
183
  build: (cache) => {
176
-
177
- cache.addHelpText(
178
- "after",
179
- `
184
+ cache.addHelpText(
185
+ "after",
186
+ `
180
187
  The cache is a TTL-aware, LRU-evicting in-memory store managed by the
181
188
  running assistant. Data is scoped to the assistant process lifetime and
182
189
  is not persisted across restarts.
@@ -190,33 +197,33 @@ Examples:
190
197
  $ echo '{"result": [1,2,3]}' | assistant cache set --ttl 5m
191
198
  $ assistant cache get my-key
192
199
  $ assistant cache delete my-key`,
193
- );
194
-
195
- // ── set ───────────────────────────────────────────────────────────
196
-
197
- cache
198
- .command("set")
199
- .description("Store a JSON value in the cache")
200
- .option(
201
- "--key <key>",
202
- "Cache key for idempotent upsert. Omit to auto-generate.",
203
- )
204
- .option(
205
- "--ttl <duration>",
206
- "Time-to-live (minimum 1s). Units: ms, s, m, h (e.g. 1000ms, 30s, 5m, 2h). Defaults to 30m if omitted.",
207
- )
208
- .option(
209
- "--value <json>",
210
- "JSON payload to store. Alternative to piping via stdin.",
211
- )
212
- .option(
213
- "--file <path>",
214
- "Path to a file containing the JSON payload. Alternative to piping via stdin.",
215
- )
216
- .option("--json", "Output result as machine-readable JSON.")
217
- .addHelpText(
218
- "after",
219
- `
200
+ );
201
+
202
+ // ── set ───────────────────────────────────────────────────────────
203
+
204
+ cache
205
+ .command("set")
206
+ .description("Store a JSON value in the cache")
207
+ .option(
208
+ "--key <key>",
209
+ "Cache key for idempotent upsert. Omit to auto-generate.",
210
+ )
211
+ .option(
212
+ "--ttl <duration>",
213
+ "Time-to-live (minimum 1s). Units: ms, s, m, h (e.g. 1000ms, 30s, 5m, 2h). Defaults to 30m if omitted.",
214
+ )
215
+ .option(
216
+ "--value <json>",
217
+ "JSON payload to store. Alternative to piping via stdin.",
218
+ )
219
+ .option(
220
+ "--file <path>",
221
+ "Path to a file containing the JSON payload. Alternative to piping via stdin.",
222
+ )
223
+ .option("--json", "Output result as machine-readable JSON.")
224
+ .addHelpText(
225
+ "after",
226
+ `
220
227
  Stores a JSON payload in the cache and prints the assigned key. The payload
221
228
  can be provided via --value, --file, or piped through stdin. If --key is
222
229
  provided, the entry is upserted (created or replaced). If omitted, a new
@@ -240,86 +247,86 @@ Examples:
240
247
  $ assistant cache set --file /tmp/payload.json --key scores --ttl 10m
241
248
  $ echo '{"scores":[98,85,72]}' | assistant cache set
242
249
  $ echo '"simple string"' | assistant cache set --ttl 1h --json`,
243
- )
244
- .action(
245
- async (opts: {
246
- key?: string;
247
- ttl?: string;
248
- value?: string;
249
- file?: string;
250
- json?: boolean;
251
- }) => {
252
- let data: unknown;
253
- try {
254
- data = resolvePayload(opts);
255
- } catch (err) {
256
- const msg = err instanceof Error ? err.message : String(err);
257
- if (opts.json) {
258
- process.stdout.write(
259
- JSON.stringify({ ok: false, error: msg }) + "\n",
260
- );
261
- } else {
262
- log.error(msg);
263
- }
264
- process.exitCode = 1;
265
- return;
266
- }
267
-
268
- let ttl_ms: number | undefined;
269
- try {
270
- ttl_ms = parseTtl(opts.ttl);
271
- } catch (err) {
272
- const msg = err instanceof Error ? err.message : String(err);
273
- if (opts.json) {
274
- process.stdout.write(
275
- JSON.stringify({ ok: false, error: msg }) + "\n",
276
- );
277
- } else {
278
- log.error(msg);
279
- }
280
- process.exitCode = 1;
281
- return;
282
- }
283
-
284
- const params: Record<string, unknown> = { data };
285
- if (ttl_ms !== undefined) params.ttl_ms = ttl_ms;
286
- if (opts.key) params.key = opts.key;
287
-
288
- const result = await cliIpcCall<{ key: string }>("cache_set", {
289
- body: params,
290
- });
291
-
292
- if (!result.ok) {
293
- if (opts.json) {
294
- process.stdout.write(
295
- JSON.stringify({ ok: false, error: result.error }) + "\n",
296
- );
297
- } else {
298
- log.error(`Error: ${result.error}`);
299
- }
300
- process.exitCode = 1;
301
- return;
302
- }
303
-
304
- if (opts.json) {
305
- process.stdout.write(
306
- JSON.stringify({ ok: true, key: result.result!.key }) + "\n",
307
- );
308
- } else {
309
- log.info(`Cached with key: ${result.result!.key}`);
310
- }
311
- },
312
- );
250
+ )
251
+ .action(
252
+ async (opts: {
253
+ key?: string;
254
+ ttl?: string;
255
+ value?: string;
256
+ file?: string;
257
+ json?: boolean;
258
+ }) => {
259
+ let data: unknown;
260
+ try {
261
+ data = resolvePayload(opts);
262
+ } catch (err) {
263
+ const msg = err instanceof Error ? err.message : String(err);
264
+ if (opts.json) {
265
+ process.stdout.write(
266
+ JSON.stringify({ ok: false, error: msg }) + "\n",
267
+ );
268
+ } else {
269
+ log.error(msg);
270
+ }
271
+ process.exitCode = 1;
272
+ return;
273
+ }
274
+
275
+ let ttl_ms: number | undefined;
276
+ try {
277
+ ttl_ms = parseTtl(opts.ttl);
278
+ } catch (err) {
279
+ const msg = err instanceof Error ? err.message : String(err);
280
+ if (opts.json) {
281
+ process.stdout.write(
282
+ JSON.stringify({ ok: false, error: msg }) + "\n",
283
+ );
284
+ } else {
285
+ log.error(msg);
286
+ }
287
+ process.exitCode = 1;
288
+ return;
289
+ }
290
+
291
+ const params: Record<string, unknown> = { data };
292
+ if (ttl_ms !== undefined) params.ttl_ms = ttl_ms;
293
+ if (opts.key) params.key = opts.key;
294
+
295
+ const result = await cliIpcCall<{ key: string }>("cache_set", {
296
+ body: params,
297
+ });
298
+
299
+ if (!result.ok) {
300
+ if (opts.json) {
301
+ process.stdout.write(
302
+ JSON.stringify({ ok: false, error: result.error }) + "\n",
303
+ );
304
+ } else {
305
+ log.error(`Error: ${result.error}`);
306
+ }
307
+ process.exitCode = 1;
308
+ return;
309
+ }
310
+
311
+ if (opts.json) {
312
+ process.stdout.write(
313
+ JSON.stringify({ ok: true, key: result.result!.key }) + "\n",
314
+ );
315
+ } else {
316
+ log.info(`Cached with key: ${result.result!.key}`);
317
+ }
318
+ },
319
+ );
313
320
 
314
- // ── get ───────────────────────────────────────────────────────────
321
+ // ── get ───────────────────────────────────────────────────────────
315
322
 
316
- cache
317
- .command("get <key>")
318
- .description("Retrieve a cached value by key")
319
- .option("--json", "Output result as machine-readable JSON.")
320
- .addHelpText(
321
- "after",
322
- `
323
+ cache
324
+ .command("get <key>")
325
+ .description("Retrieve a cached value by key")
326
+ .option("--json", "Output result as machine-readable JSON.")
327
+ .addHelpText(
328
+ "after",
329
+ `
323
330
  Arguments:
324
331
  key The cache key to look up. Run 'assistant cache set' to store a
325
332
  value and receive its key.
@@ -331,49 +338,52 @@ exist or has expired, reports not-found. In --json mode, a miss returns
331
338
  Examples:
332
339
  $ assistant cache get my-key
333
340
  $ assistant cache get my-key --json`,
334
- )
335
- .action(async (key: string, opts: { json?: boolean }) => {
336
- const result = await cliIpcCall<{ data: unknown } | null>("cache_get", {
337
- body: { key },
338
- });
339
-
340
- if (!result.ok) {
341
- if (opts.json) {
342
- process.stdout.write(
343
- JSON.stringify({ ok: false, error: result.error }) + "\n",
341
+ )
342
+ .action(async (key: string, opts: { json?: boolean }) => {
343
+ const result = await cliIpcCall<{ data: unknown } | null>(
344
+ "cache_get",
345
+ {
346
+ body: { key },
347
+ },
344
348
  );
345
- } else {
346
- log.error(`Error: ${result.error}`);
347
- }
348
- process.exitCode = 1;
349
- return;
350
- }
351
-
352
- if (opts.json) {
353
- process.stdout.write(
354
- JSON.stringify({
355
- ok: true,
356
- data: result.result ? result.result.data : null,
357
- }) + "\n",
358
- );
359
- } else {
360
- if (result.result == null) {
361
- log.info(`No cache entry found for key "${key}".`);
362
- } else {
363
- log.info(JSON.stringify(result.result.data, null, 2));
364
- }
365
- }
366
- });
367
-
368
- // ── delete ────────────────────────────────────────────────────────
369
-
370
- cache
371
- .command("delete <key>")
372
- .description("Remove a cached entry by key")
373
- .option("--json", "Output result as machine-readable JSON.")
374
- .addHelpText(
375
- "after",
376
- `
349
+
350
+ if (!result.ok) {
351
+ if (opts.json) {
352
+ process.stdout.write(
353
+ JSON.stringify({ ok: false, error: result.error }) + "\n",
354
+ );
355
+ } else {
356
+ log.error(`Error: ${result.error}`);
357
+ }
358
+ process.exitCode = 1;
359
+ return;
360
+ }
361
+
362
+ if (opts.json) {
363
+ process.stdout.write(
364
+ JSON.stringify({
365
+ ok: true,
366
+ data: result.result ? result.result.data : null,
367
+ }) + "\n",
368
+ );
369
+ } else {
370
+ if (result.result == null) {
371
+ log.info(`No cache entry found for key "${key}".`);
372
+ } else {
373
+ log.info(JSON.stringify(result.result.data, null, 2));
374
+ }
375
+ }
376
+ });
377
+
378
+ // ── delete ────────────────────────────────────────────────────────
379
+
380
+ cache
381
+ .command("delete <key>")
382
+ .description("Remove a cached entry by key")
383
+ .option("--json", "Output result as machine-readable JSON.")
384
+ .addHelpText(
385
+ "after",
386
+ `
377
387
  Arguments:
378
388
  key The cache key to remove. Run 'assistant cache get <key>' to
379
389
  verify a key exists before deleting.
@@ -384,36 +394,39 @@ existed or not, but reports whether an entry was actually removed.
384
394
  Examples:
385
395
  $ assistant cache delete my-key
386
396
  $ assistant cache delete my-key --json`,
387
- )
388
- .action(async (key: string, opts: { json?: boolean }) => {
389
- const result = await cliIpcCall<{ deleted: boolean }>("cache_delete", {
390
- body: { key },
391
- });
392
-
393
- if (!result.ok) {
394
- if (opts.json) {
395
- process.stdout.write(
396
- JSON.stringify({ ok: false, error: result.error }) + "\n",
397
+ )
398
+ .action(async (key: string, opts: { json?: boolean }) => {
399
+ const result = await cliIpcCall<{ deleted: boolean }>(
400
+ "cache_delete",
401
+ {
402
+ body: { key },
403
+ },
397
404
  );
398
- } else {
399
- log.error(`Error: ${result.error}`);
400
- }
401
- process.exitCode = 1;
402
- return;
403
- }
404
-
405
- const deleted = result.result!.deleted;
406
-
407
- if (opts.json) {
408
- process.stdout.write(JSON.stringify({ ok: true, deleted }) + "\n");
409
- } else {
410
- if (deleted) {
411
- log.info(`Deleted cache entry "${key}".`);
412
- } else {
413
- log.info(`No cache entry "${key}" (nothing to delete).`);
414
- }
415
- }
416
- });
405
+
406
+ if (!result.ok) {
407
+ if (opts.json) {
408
+ process.stdout.write(
409
+ JSON.stringify({ ok: false, error: result.error }) + "\n",
410
+ );
411
+ } else {
412
+ log.error(`Error: ${result.error}`);
413
+ }
414
+ process.exitCode = 1;
415
+ return;
416
+ }
417
+
418
+ const deleted = result.result!.deleted;
419
+
420
+ if (opts.json) {
421
+ process.stdout.write(JSON.stringify({ ok: true, deleted }) + "\n");
422
+ } else {
423
+ if (deleted) {
424
+ log.info(`Deleted cache entry "${key}".`);
425
+ } else {
426
+ log.info(`No cache entry "${key}" (nothing to delete).`);
427
+ }
428
+ }
429
+ });
417
430
  },
418
431
  });
419
432
  }
@@ -88,15 +88,24 @@ const MANAGED_PROFILE_TEMPLATES: Record<string, ManagedProfileTemplate> = {
88
88
  // profile there's nothing stronger to consult, so the advisor defaults off.
89
89
  advisorEnabled: false,
90
90
  },
91
+ // Served by DeepSeek V4 Flash on Fireworks via managed platform inference: a
92
+ // fast, low-cost open model. `model` is pinned explicitly rather than
93
+ // resolved via the `latency-optimized` intent (which still maps to Kimi K2.5
94
+ // on Fireworks and Anthropic Haiku elsewhere).
95
+ //
96
+ // `effort: "none"` (not "low") because Fireworks is not thinking-aware: the
97
+ // disabled `thinking` config is stripped before the request, so a non-"none"
98
+ // effort would be sent as `reasoning_effort` and make this profile pay for
99
+ // reasoning despite thinking being off. "none" keeps Speed non-reasoning.
91
100
  "cost-optimized": {
92
- intent: "latency-optimized",
93
- provider: "anthropic",
94
- connectionName: "anthropic-managed",
101
+ model: "accounts/fireworks/models/deepseek-v4-flash",
102
+ provider: "fireworks",
103
+ connectionName: "fireworks-managed",
95
104
  source: "managed",
96
105
  label: "Speed",
97
- description: "Fastest responses at lower cost",
106
+ description: "Fastest responses at lower cost (DeepSeek V4 Flash)",
98
107
  maxTokens: 8192,
99
- effort: "low",
108
+ effort: "none",
100
109
  thinking: { enabled: false, streamThinking: false },
101
110
  contextWindow: { maxInputTokens: DEFAULT_CONTEXT_WINDOW_MAX_INPUT_TOKENS },
102
111
  },
@@ -760,6 +760,22 @@ const RAW_PROVIDER_CATALOG: ProviderCatalogEntry[] = [
760
760
  maxEffort: "max",
761
761
  pricing: { inputPer1mTokens: 1.74, outputPer1mTokens: 3.48 },
762
762
  },
763
+ {
764
+ id: "accounts/fireworks/models/deepseek-v4-flash",
765
+ displayName: "DeepSeek V4 Flash",
766
+ contextWindowTokens: 1040000,
767
+ maxOutputTokens: 131072,
768
+ supportsThinking: true,
769
+ supportsCaching: true,
770
+ supportsVision: false,
771
+ supportsToolUse: true,
772
+ maxEffort: "max",
773
+ pricing: {
774
+ inputPer1mTokens: 0.14,
775
+ outputPer1mTokens: 0.28,
776
+ cacheReadPer1mTokens: 0.03,
777
+ },
778
+ },
763
779
  ],
764
780
  defaultModel: "accounts/fireworks/models/kimi-k2p5",
765
781
  apiKeyUrl: "https://fireworks.ai/account/api-keys",