@vellumai/assistant 0.10.0-dev.202606232139.0a4341a → 0.10.0-dev.202606232234.a0ec2ee
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -745,7 +745,11 @@ describe("loadConfig startup behavior", () => {
|
|
|
745
745
|
);
|
|
746
746
|
expect(raw.llm.profiles.frontier.provider).toBe("anthropic");
|
|
747
747
|
expect(raw.llm.profiles.frontier.model).toBe("claude-opus-4-8");
|
|
748
|
-
|
|
748
|
+
// Speed is served by DeepSeek V4 Flash on Fireworks.
|
|
749
|
+
expect(raw.llm.profiles["cost-optimized"].provider).toBe("fireworks");
|
|
750
|
+
expect(raw.llm.profiles["cost-optimized"].model).toBe(
|
|
751
|
+
"accounts/fireworks/models/deepseek-v4-flash",
|
|
752
|
+
);
|
|
749
753
|
});
|
|
750
754
|
|
|
751
755
|
test("off-platform managed profiles are overwritten on every boot", () => {
|
|
@@ -88,8 +88,15 @@ mock.module("../../../util/logger.js", () => ({
|
|
|
88
88
|
}));
|
|
89
89
|
|
|
90
90
|
mock.module("../../lib/cache-fs.js", () => ({
|
|
91
|
-
readFileSync: (path: string, encoding?: BufferEncoding) => {
|
|
91
|
+
readFileSync: (path: string | number, encoding?: BufferEncoding) => {
|
|
92
|
+
// Stdin must be read via fd 0, not by reopening "/dev/stdin": a spawned
|
|
93
|
+
// subprocess whose stdin is a pipe (Bun.spawn stdin:"pipe") cannot reopen
|
|
94
|
+
// its read-end by path — open("/dev/stdin") fails ENXIO. Throwing here on
|
|
95
|
+
// the path makes any regression to path-based reading fail loudly.
|
|
92
96
|
if (path === "/dev/stdin") {
|
|
97
|
+
throw new Error("ENXIO: no such device or address, open '/dev/stdin'");
|
|
98
|
+
}
|
|
99
|
+
if (path === 0) {
|
|
93
100
|
if (mockStdinContent === null) {
|
|
94
101
|
throw new Error("EAGAIN: resource temporarily unavailable");
|
|
95
102
|
}
|
|
@@ -17,6 +17,9 @@ import { log } from "../logger.js";
|
|
|
17
17
|
/** Warn (stderr) when a raw payload exceeds this byte count. */
|
|
18
18
|
const MAX_PAYLOAD_BYTES = 1_000_000; // 1 MB
|
|
19
19
|
|
|
20
|
+
/** Standard input file descriptor. */
|
|
21
|
+
const STDIN_FD = 0;
|
|
22
|
+
|
|
20
23
|
// ── TTL parsing ───────────────────────────────────────────────────────
|
|
21
24
|
|
|
22
25
|
const TTL_PATTERN = /^(\d+(?:\.\d+)?)\s*(ms|s|m|h)$/;
|
|
@@ -99,6 +102,11 @@ function parseJsonPayload(raw: string, source: string): unknown {
|
|
|
99
102
|
* Read JSON payload from stdin when piped. Throws when stdin is a TTY
|
|
100
103
|
* (no piped input) or when the input is empty/invalid JSON, so the CLI
|
|
101
104
|
* can surface actionable parse errors.
|
|
105
|
+
*
|
|
106
|
+
* Reads file descriptor 0 directly rather than reopening the `/dev/stdin`
|
|
107
|
+
* path. When the caller is a spawned subprocess whose stdin is a pipe (e.g.
|
|
108
|
+
* `Bun.spawn(..., { stdin: "pipe" })`), `open("/dev/stdin")` fails with ENXIO
|
|
109
|
+
* because a pipe read-end cannot be reopened by path; the fd is readable.
|
|
102
110
|
*/
|
|
103
111
|
function readPayloadFromStdin(): unknown {
|
|
104
112
|
if (process.stdin.isTTY) {
|
|
@@ -111,7 +119,7 @@ function readPayloadFromStdin(): unknown {
|
|
|
111
119
|
|
|
112
120
|
let raw: string;
|
|
113
121
|
try {
|
|
114
|
-
raw = readFileSync(
|
|
122
|
+
raw = readFileSync(STDIN_FD, "utf-8");
|
|
115
123
|
} catch (err) {
|
|
116
124
|
throw new Error(
|
|
117
125
|
`Failed to read stdin: ${err instanceof Error ? err.message : String(err)}.\n` +
|
|
@@ -173,10 +181,9 @@ export function registerCacheCommand(program: Command): void {
|
|
|
173
181
|
transport: "ipc",
|
|
174
182
|
description: "Interact with the assistant's in-memory key/value cache",
|
|
175
183
|
build: (cache) => {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
`
|
|
184
|
+
cache.addHelpText(
|
|
185
|
+
"after",
|
|
186
|
+
`
|
|
180
187
|
The cache is a TTL-aware, LRU-evicting in-memory store managed by the
|
|
181
188
|
running assistant. Data is scoped to the assistant process lifetime and
|
|
182
189
|
is not persisted across restarts.
|
|
@@ -190,33 +197,33 @@ Examples:
|
|
|
190
197
|
$ echo '{"result": [1,2,3]}' | assistant cache set --ttl 5m
|
|
191
198
|
$ assistant cache get my-key
|
|
192
199
|
$ assistant cache delete my-key`,
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
200
|
+
);
|
|
201
|
+
|
|
202
|
+
// ── set ───────────────────────────────────────────────────────────
|
|
203
|
+
|
|
204
|
+
cache
|
|
205
|
+
.command("set")
|
|
206
|
+
.description("Store a JSON value in the cache")
|
|
207
|
+
.option(
|
|
208
|
+
"--key <key>",
|
|
209
|
+
"Cache key for idempotent upsert. Omit to auto-generate.",
|
|
210
|
+
)
|
|
211
|
+
.option(
|
|
212
|
+
"--ttl <duration>",
|
|
213
|
+
"Time-to-live (minimum 1s). Units: ms, s, m, h (e.g. 1000ms, 30s, 5m, 2h). Defaults to 30m if omitted.",
|
|
214
|
+
)
|
|
215
|
+
.option(
|
|
216
|
+
"--value <json>",
|
|
217
|
+
"JSON payload to store. Alternative to piping via stdin.",
|
|
218
|
+
)
|
|
219
|
+
.option(
|
|
220
|
+
"--file <path>",
|
|
221
|
+
"Path to a file containing the JSON payload. Alternative to piping via stdin.",
|
|
222
|
+
)
|
|
223
|
+
.option("--json", "Output result as machine-readable JSON.")
|
|
224
|
+
.addHelpText(
|
|
225
|
+
"after",
|
|
226
|
+
`
|
|
220
227
|
Stores a JSON payload in the cache and prints the assigned key. The payload
|
|
221
228
|
can be provided via --value, --file, or piped through stdin. If --key is
|
|
222
229
|
provided, the entry is upserted (created or replaced). If omitted, a new
|
|
@@ -240,86 +247,86 @@ Examples:
|
|
|
240
247
|
$ assistant cache set --file /tmp/payload.json --key scores --ttl 10m
|
|
241
248
|
$ echo '{"scores":[98,85,72]}' | assistant cache set
|
|
242
249
|
$ echo '"simple string"' | assistant cache set --ttl 1h --json`,
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
250
|
+
)
|
|
251
|
+
.action(
|
|
252
|
+
async (opts: {
|
|
253
|
+
key?: string;
|
|
254
|
+
ttl?: string;
|
|
255
|
+
value?: string;
|
|
256
|
+
file?: string;
|
|
257
|
+
json?: boolean;
|
|
258
|
+
}) => {
|
|
259
|
+
let data: unknown;
|
|
260
|
+
try {
|
|
261
|
+
data = resolvePayload(opts);
|
|
262
|
+
} catch (err) {
|
|
263
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
264
|
+
if (opts.json) {
|
|
265
|
+
process.stdout.write(
|
|
266
|
+
JSON.stringify({ ok: false, error: msg }) + "\n",
|
|
267
|
+
);
|
|
268
|
+
} else {
|
|
269
|
+
log.error(msg);
|
|
270
|
+
}
|
|
271
|
+
process.exitCode = 1;
|
|
272
|
+
return;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
let ttl_ms: number | undefined;
|
|
276
|
+
try {
|
|
277
|
+
ttl_ms = parseTtl(opts.ttl);
|
|
278
|
+
} catch (err) {
|
|
279
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
280
|
+
if (opts.json) {
|
|
281
|
+
process.stdout.write(
|
|
282
|
+
JSON.stringify({ ok: false, error: msg }) + "\n",
|
|
283
|
+
);
|
|
284
|
+
} else {
|
|
285
|
+
log.error(msg);
|
|
286
|
+
}
|
|
287
|
+
process.exitCode = 1;
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const params: Record<string, unknown> = { data };
|
|
292
|
+
if (ttl_ms !== undefined) params.ttl_ms = ttl_ms;
|
|
293
|
+
if (opts.key) params.key = opts.key;
|
|
294
|
+
|
|
295
|
+
const result = await cliIpcCall<{ key: string }>("cache_set", {
|
|
296
|
+
body: params,
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
if (!result.ok) {
|
|
300
|
+
if (opts.json) {
|
|
301
|
+
process.stdout.write(
|
|
302
|
+
JSON.stringify({ ok: false, error: result.error }) + "\n",
|
|
303
|
+
);
|
|
304
|
+
} else {
|
|
305
|
+
log.error(`Error: ${result.error}`);
|
|
306
|
+
}
|
|
307
|
+
process.exitCode = 1;
|
|
308
|
+
return;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
if (opts.json) {
|
|
312
|
+
process.stdout.write(
|
|
313
|
+
JSON.stringify({ ok: true, key: result.result!.key }) + "\n",
|
|
314
|
+
);
|
|
315
|
+
} else {
|
|
316
|
+
log.info(`Cached with key: ${result.result!.key}`);
|
|
317
|
+
}
|
|
318
|
+
},
|
|
319
|
+
);
|
|
313
320
|
|
|
314
|
-
|
|
321
|
+
// ── get ───────────────────────────────────────────────────────────
|
|
315
322
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
+
cache
|
|
324
|
+
.command("get <key>")
|
|
325
|
+
.description("Retrieve a cached value by key")
|
|
326
|
+
.option("--json", "Output result as machine-readable JSON.")
|
|
327
|
+
.addHelpText(
|
|
328
|
+
"after",
|
|
329
|
+
`
|
|
323
330
|
Arguments:
|
|
324
331
|
key The cache key to look up. Run 'assistant cache set' to store a
|
|
325
332
|
value and receive its key.
|
|
@@ -331,49 +338,52 @@ exist or has expired, reports not-found. In --json mode, a miss returns
|
|
|
331
338
|
Examples:
|
|
332
339
|
$ assistant cache get my-key
|
|
333
340
|
$ assistant cache get my-key --json`,
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
if (opts.json) {
|
|
342
|
-
process.stdout.write(
|
|
343
|
-
JSON.stringify({ ok: false, error: result.error }) + "\n",
|
|
341
|
+
)
|
|
342
|
+
.action(async (key: string, opts: { json?: boolean }) => {
|
|
343
|
+
const result = await cliIpcCall<{ data: unknown } | null>(
|
|
344
|
+
"cache_get",
|
|
345
|
+
{
|
|
346
|
+
body: { key },
|
|
347
|
+
},
|
|
344
348
|
);
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
349
|
+
|
|
350
|
+
if (!result.ok) {
|
|
351
|
+
if (opts.json) {
|
|
352
|
+
process.stdout.write(
|
|
353
|
+
JSON.stringify({ ok: false, error: result.error }) + "\n",
|
|
354
|
+
);
|
|
355
|
+
} else {
|
|
356
|
+
log.error(`Error: ${result.error}`);
|
|
357
|
+
}
|
|
358
|
+
process.exitCode = 1;
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (opts.json) {
|
|
363
|
+
process.stdout.write(
|
|
364
|
+
JSON.stringify({
|
|
365
|
+
ok: true,
|
|
366
|
+
data: result.result ? result.result.data : null,
|
|
367
|
+
}) + "\n",
|
|
368
|
+
);
|
|
369
|
+
} else {
|
|
370
|
+
if (result.result == null) {
|
|
371
|
+
log.info(`No cache entry found for key "${key}".`);
|
|
372
|
+
} else {
|
|
373
|
+
log.info(JSON.stringify(result.result.data, null, 2));
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// ── delete ────────────────────────────────────────────────────────
|
|
379
|
+
|
|
380
|
+
cache
|
|
381
|
+
.command("delete <key>")
|
|
382
|
+
.description("Remove a cached entry by key")
|
|
383
|
+
.option("--json", "Output result as machine-readable JSON.")
|
|
384
|
+
.addHelpText(
|
|
385
|
+
"after",
|
|
386
|
+
`
|
|
377
387
|
Arguments:
|
|
378
388
|
key The cache key to remove. Run 'assistant cache get <key>' to
|
|
379
389
|
verify a key exists before deleting.
|
|
@@ -384,36 +394,39 @@ existed or not, but reports whether an entry was actually removed.
|
|
|
384
394
|
Examples:
|
|
385
395
|
$ assistant cache delete my-key
|
|
386
396
|
$ assistant cache delete my-key --json`,
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
if (opts.json) {
|
|
395
|
-
process.stdout.write(
|
|
396
|
-
JSON.stringify({ ok: false, error: result.error }) + "\n",
|
|
397
|
+
)
|
|
398
|
+
.action(async (key: string, opts: { json?: boolean }) => {
|
|
399
|
+
const result = await cliIpcCall<{ deleted: boolean }>(
|
|
400
|
+
"cache_delete",
|
|
401
|
+
{
|
|
402
|
+
body: { key },
|
|
403
|
+
},
|
|
397
404
|
);
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
405
|
+
|
|
406
|
+
if (!result.ok) {
|
|
407
|
+
if (opts.json) {
|
|
408
|
+
process.stdout.write(
|
|
409
|
+
JSON.stringify({ ok: false, error: result.error }) + "\n",
|
|
410
|
+
);
|
|
411
|
+
} else {
|
|
412
|
+
log.error(`Error: ${result.error}`);
|
|
413
|
+
}
|
|
414
|
+
process.exitCode = 1;
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
const deleted = result.result!.deleted;
|
|
419
|
+
|
|
420
|
+
if (opts.json) {
|
|
421
|
+
process.stdout.write(JSON.stringify({ ok: true, deleted }) + "\n");
|
|
422
|
+
} else {
|
|
423
|
+
if (deleted) {
|
|
424
|
+
log.info(`Deleted cache entry "${key}".`);
|
|
425
|
+
} else {
|
|
426
|
+
log.info(`No cache entry "${key}" (nothing to delete).`);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
});
|
|
417
430
|
},
|
|
418
431
|
});
|
|
419
432
|
}
|
|
@@ -88,15 +88,24 @@ const MANAGED_PROFILE_TEMPLATES: Record<string, ManagedProfileTemplate> = {
|
|
|
88
88
|
// profile there's nothing stronger to consult, so the advisor defaults off.
|
|
89
89
|
advisorEnabled: false,
|
|
90
90
|
},
|
|
91
|
+
// Served by DeepSeek V4 Flash on Fireworks via managed platform inference: a
|
|
92
|
+
// fast, low-cost open model. `model` is pinned explicitly rather than
|
|
93
|
+
// resolved via the `latency-optimized` intent (which still maps to Kimi K2.5
|
|
94
|
+
// on Fireworks and Anthropic Haiku elsewhere).
|
|
95
|
+
//
|
|
96
|
+
// `effort: "none"` (not "low") because Fireworks is not thinking-aware: the
|
|
97
|
+
// disabled `thinking` config is stripped before the request, so a non-"none"
|
|
98
|
+
// effort would be sent as `reasoning_effort` and make this profile pay for
|
|
99
|
+
// reasoning despite thinking being off. "none" keeps Speed non-reasoning.
|
|
91
100
|
"cost-optimized": {
|
|
92
|
-
|
|
93
|
-
provider: "
|
|
94
|
-
connectionName: "
|
|
101
|
+
model: "accounts/fireworks/models/deepseek-v4-flash",
|
|
102
|
+
provider: "fireworks",
|
|
103
|
+
connectionName: "fireworks-managed",
|
|
95
104
|
source: "managed",
|
|
96
105
|
label: "Speed",
|
|
97
|
-
description: "Fastest responses at lower cost",
|
|
106
|
+
description: "Fastest responses at lower cost (DeepSeek V4 Flash)",
|
|
98
107
|
maxTokens: 8192,
|
|
99
|
-
effort: "
|
|
108
|
+
effort: "none",
|
|
100
109
|
thinking: { enabled: false, streamThinking: false },
|
|
101
110
|
contextWindow: { maxInputTokens: DEFAULT_CONTEXT_WINDOW_MAX_INPUT_TOKENS },
|
|
102
111
|
},
|
|
@@ -760,6 +760,22 @@ const RAW_PROVIDER_CATALOG: ProviderCatalogEntry[] = [
|
|
|
760
760
|
maxEffort: "max",
|
|
761
761
|
pricing: { inputPer1mTokens: 1.74, outputPer1mTokens: 3.48 },
|
|
762
762
|
},
|
|
763
|
+
{
|
|
764
|
+
id: "accounts/fireworks/models/deepseek-v4-flash",
|
|
765
|
+
displayName: "DeepSeek V4 Flash",
|
|
766
|
+
contextWindowTokens: 1040000,
|
|
767
|
+
maxOutputTokens: 131072,
|
|
768
|
+
supportsThinking: true,
|
|
769
|
+
supportsCaching: true,
|
|
770
|
+
supportsVision: false,
|
|
771
|
+
supportsToolUse: true,
|
|
772
|
+
maxEffort: "max",
|
|
773
|
+
pricing: {
|
|
774
|
+
inputPer1mTokens: 0.14,
|
|
775
|
+
outputPer1mTokens: 0.28,
|
|
776
|
+
cacheReadPer1mTokens: 0.03,
|
|
777
|
+
},
|
|
778
|
+
},
|
|
763
779
|
],
|
|
764
780
|
defaultModel: "accounts/fireworks/models/kimi-k2p5",
|
|
765
781
|
apiKeyUrl: "https://fireworks.ai/account/api-keys",
|