npm - @vellumai/assistant - Versions diffs - 0.10.0-dev.202606232139.0a4341a → 0.10.0-dev.202606232234.a0ec2ee - Mend

@vellumai/assistant 0.10.0-dev.202606232139.0a4341a → 0.10.0-dev.202606232234.a0ec2ee

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/__tests__/config-loader-backfill.test.ts +5 -1
package/src/cli/commands/__tests__/cache.test.ts +8 -1
package/src/cli/commands/cache.ts +194 -181
package/src/config/seed-inference-profiles.ts +14 -5
package/src/providers/model-catalog.ts +16 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.10.0-dev.202606232139.0a4341a",
+  "version": "0.10.0-dev.202606232234.a0ec2ee",
   "license": "MIT",
   "type": "module",
   "exports": {

package/src/__tests__/config-loader-backfill.test.ts CHANGED Viewed

@@ -745,7 +745,11 @@ describe("loadConfig startup behavior", () => {
     );
     expect(raw.llm.profiles.frontier.provider).toBe("anthropic");
     expect(raw.llm.profiles.frontier.model).toBe("claude-opus-4-8");
-    expect(raw.llm.profiles["cost-optimized"].provider).toBe("anthropic");
+    // Speed is served by DeepSeek V4 Flash on Fireworks.
+    expect(raw.llm.profiles["cost-optimized"].provider).toBe("fireworks");
+    expect(raw.llm.profiles["cost-optimized"].model).toBe(
+      "accounts/fireworks/models/deepseek-v4-flash",
+    );
   });
   test("off-platform managed profiles are overwritten on every boot", () => {

package/src/cli/commands/__tests__/cache.test.ts CHANGED Viewed

@@ -88,8 +88,15 @@ mock.module("../../../util/logger.js", () => ({
 }));
 mock.module("../../lib/cache-fs.js", () => ({
-  readFileSync: (path: string, encoding?: BufferEncoding) => {
+  readFileSync: (path: string | number, encoding?: BufferEncoding) => {
+    // Stdin must be read via fd 0, not by reopening "/dev/stdin": a spawned
+    // subprocess whose stdin is a pipe (Bun.spawn stdin:"pipe") cannot reopen
+    // its read-end by path — open("/dev/stdin") fails ENXIO. Throwing here on
+    // the path makes any regression to path-based reading fail loudly.
     if (path === "/dev/stdin") {
+      throw new Error("ENXIO: no such device or address, open '/dev/stdin'");
+    }
+    if (path === 0) {
       if (mockStdinContent === null) {
         throw new Error("EAGAIN: resource temporarily unavailable");
       }

package/src/cli/commands/cache.ts CHANGED Viewed

@@ -17,6 +17,9 @@ import { log } from "../logger.js";
 /** Warn (stderr) when a raw payload exceeds this byte count. */
 const MAX_PAYLOAD_BYTES = 1_000_000; // 1 MB
+/** Standard input file descriptor. */
+const STDIN_FD = 0;
 // ── TTL parsing ───────────────────────────────────────────────────────
 const TTL_PATTERN = /^(\d+(?:\.\d+)?)\s*(ms|s|m|h)$/;
@@ -99,6 +102,11 @@ function parseJsonPayload(raw: string, source: string): unknown {
  * Read JSON payload from stdin when piped. Throws when stdin is a TTY
  * (no piped input) or when the input is empty/invalid JSON, so the CLI
  * can surface actionable parse errors.
+ *
+ * Reads file descriptor 0 directly rather than reopening the `/dev/stdin`
+ * path. When the caller is a spawned subprocess whose stdin is a pipe (e.g.
+ * `Bun.spawn(..., { stdin: "pipe" })`), `open("/dev/stdin")` fails with ENXIO
+ * because a pipe read-end cannot be reopened by path; the fd is readable.
  */
 function readPayloadFromStdin(): unknown {
   if (process.stdin.isTTY) {
@@ -111,7 +119,7 @@ function readPayloadFromStdin(): unknown {
   let raw: string;
   try {
-    raw = readFileSync("/dev/stdin", "utf-8");
+    raw = readFileSync(STDIN_FD, "utf-8");
   } catch (err) {
     throw new Error(
       `Failed to read stdin: ${err instanceof Error ? err.message : String(err)}.\n` +
@@ -173,10 +181,9 @@ export function registerCacheCommand(program: Command): void {
     transport: "ipc",
     description: "Interact with the assistant's in-memory key/value cache",
     build: (cache) => {
-  cache.addHelpText(
-    "after",
-    `
+      cache.addHelpText(
+        "after",
+        `
 The cache is a TTL-aware, LRU-evicting in-memory store managed by the
 running assistant. Data is scoped to the assistant process lifetime and
 is not persisted across restarts.
@@ -190,33 +197,33 @@ Examples:
   $ echo '{"result": [1,2,3]}' | assistant cache set --ttl 5m
   $ assistant cache get my-key
   $ assistant cache delete my-key`,
-  );
-  // ── set ───────────────────────────────────────────────────────────
-  cache
-    .command("set")
-    .description("Store a JSON value in the cache")
-    .option(
-      "--key <key>",
-      "Cache key for idempotent upsert. Omit to auto-generate.",
-    )
-    .option(
-      "--ttl <duration>",
-      "Time-to-live (minimum 1s). Units: ms, s, m, h (e.g. 1000ms, 30s, 5m, 2h). Defaults to 30m if omitted.",
-    )
-    .option(
-      "--value <json>",
-      "JSON payload to store. Alternative to piping via stdin.",
-    )
-    .option(
-      "--file <path>",
-      "Path to a file containing the JSON payload. Alternative to piping via stdin.",
-    )
-    .option("--json", "Output result as machine-readable JSON.")
-    .addHelpText(
-      "after",
-      `
+      );
+      // ── set ───────────────────────────────────────────────────────────
+      cache
+        .command("set")
+        .description("Store a JSON value in the cache")
+        .option(
+          "--key <key>",
+          "Cache key for idempotent upsert. Omit to auto-generate.",
+        )
+        .option(
+          "--ttl <duration>",
+          "Time-to-live (minimum 1s). Units: ms, s, m, h (e.g. 1000ms, 30s, 5m, 2h). Defaults to 30m if omitted.",
+        )
+        .option(
+          "--value <json>",
+          "JSON payload to store. Alternative to piping via stdin.",
+        )
+        .option(
+          "--file <path>",
+          "Path to a file containing the JSON payload. Alternative to piping via stdin.",
+        )
+        .option("--json", "Output result as machine-readable JSON.")
+        .addHelpText(
+          "after",
+          `
 Stores a JSON payload in the cache and prints the assigned key. The payload
 can be provided via --value, --file, or piped through stdin. If --key is
 provided, the entry is upserted (created or replaced). If omitted, a new
@@ -240,86 +247,86 @@ Examples:
   $ assistant cache set --file /tmp/payload.json --key scores --ttl 10m
   $ echo '{"scores":[98,85,72]}' | assistant cache set
   $ echo '"simple string"' | assistant cache set --ttl 1h --json`,
-    )
-    .action(
-      async (opts: {
-        key?: string;
-        ttl?: string;
-        value?: string;
-        file?: string;
-        json?: boolean;
-      }) => {
-        let data: unknown;
-        try {
-          data = resolvePayload(opts);
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : String(err);
-          if (opts.json) {
-            process.stdout.write(
-              JSON.stringify({ ok: false, error: msg }) + "\n",
-            );
-          } else {
-            log.error(msg);
-          }
-          process.exitCode = 1;
-          return;
-        }
-        let ttl_ms: number | undefined;
-        try {
-          ttl_ms = parseTtl(opts.ttl);
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : String(err);
-          if (opts.json) {
-            process.stdout.write(
-              JSON.stringify({ ok: false, error: msg }) + "\n",
-            );
-          } else {
-            log.error(msg);
-          }
-          process.exitCode = 1;
-          return;
-        }
-        const params: Record<string, unknown> = { data };
-        if (ttl_ms !== undefined) params.ttl_ms = ttl_ms;
-        if (opts.key) params.key = opts.key;
-        const result = await cliIpcCall<{ key: string }>("cache_set", {
-          body: params,
-        });
-        if (!result.ok) {
-          if (opts.json) {
-            process.stdout.write(
-              JSON.stringify({ ok: false, error: result.error }) + "\n",
-            );
-          } else {
-            log.error(`Error: ${result.error}`);
-          }
-          process.exitCode = 1;
-          return;
-        }
-        if (opts.json) {
-          process.stdout.write(
-            JSON.stringify({ ok: true, key: result.result!.key }) + "\n",
-          );
-        } else {
-          log.info(`Cached with key: ${result.result!.key}`);
-        }
-      },
-    );
+        )
+        .action(
+          async (opts: {
+            key?: string;
+            ttl?: string;
+            value?: string;
+            file?: string;
+            json?: boolean;
+          }) => {
+            let data: unknown;
+            try {
+              data = resolvePayload(opts);
+            } catch (err) {
+              const msg = err instanceof Error ? err.message : String(err);
+              if (opts.json) {
+                process.stdout.write(
+                  JSON.stringify({ ok: false, error: msg }) + "\n",
+                );
+              } else {
+                log.error(msg);
+              }
+              process.exitCode = 1;
+              return;
+            }
+            let ttl_ms: number | undefined;
+            try {
+              ttl_ms = parseTtl(opts.ttl);
+            } catch (err) {
+              const msg = err instanceof Error ? err.message : String(err);
+              if (opts.json) {
+                process.stdout.write(
+                  JSON.stringify({ ok: false, error: msg }) + "\n",
+                );
+              } else {
+                log.error(msg);
+              }
+              process.exitCode = 1;
+              return;
+            }
+            const params: Record<string, unknown> = { data };
+            if (ttl_ms !== undefined) params.ttl_ms = ttl_ms;
+            if (opts.key) params.key = opts.key;
+            const result = await cliIpcCall<{ key: string }>("cache_set", {
+              body: params,
+            });
+            if (!result.ok) {
+              if (opts.json) {
+                process.stdout.write(
+                  JSON.stringify({ ok: false, error: result.error }) + "\n",
+                );
+              } else {
+                log.error(`Error: ${result.error}`);
+              }
+              process.exitCode = 1;
+              return;
+            }
+            if (opts.json) {
+              process.stdout.write(
+                JSON.stringify({ ok: true, key: result.result!.key }) + "\n",
+              );
+            } else {
+              log.info(`Cached with key: ${result.result!.key}`);
+            }
+          },
+        );
-  // ── get ───────────────────────────────────────────────────────────
+      // ── get ───────────────────────────────────────────────────────────
-  cache
-    .command("get <key>")
-    .description("Retrieve a cached value by key")
-    .option("--json", "Output result as machine-readable JSON.")
-    .addHelpText(
-      "after",
-      `
+      cache
+        .command("get <key>")
+        .description("Retrieve a cached value by key")
+        .option("--json", "Output result as machine-readable JSON.")
+        .addHelpText(
+          "after",
+          `
 Arguments:
   key   The cache key to look up. Run 'assistant cache set' to store a
         value and receive its key.
@@ -331,49 +338,52 @@ exist or has expired, reports not-found. In --json mode, a miss returns
 Examples:
   $ assistant cache get my-key
   $ assistant cache get my-key --json`,
-    )
-    .action(async (key: string, opts: { json?: boolean }) => {
-      const result = await cliIpcCall<{ data: unknown } | null>("cache_get", {
-        body: { key },
-      });
-      if (!result.ok) {
-        if (opts.json) {
-          process.stdout.write(
-            JSON.stringify({ ok: false, error: result.error }) + "\n",
+        )
+        .action(async (key: string, opts: { json?: boolean }) => {
+          const result = await cliIpcCall<{ data: unknown } | null>(
+            "cache_get",
+            {
+              body: { key },
+            },
           );
-        } else {
-          log.error(`Error: ${result.error}`);
-        }
-        process.exitCode = 1;
-        return;
-      }
-      if (opts.json) {
-        process.stdout.write(
-          JSON.stringify({
-            ok: true,
-            data: result.result ? result.result.data : null,
-          }) + "\n",
-        );
-      } else {
-        if (result.result == null) {
-          log.info(`No cache entry found for key "${key}".`);
-        } else {
-          log.info(JSON.stringify(result.result.data, null, 2));
-        }
-      }
-    });
-  // ── delete ────────────────────────────────────────────────────────
-  cache
-    .command("delete <key>")
-    .description("Remove a cached entry by key")
-    .option("--json", "Output result as machine-readable JSON.")
-    .addHelpText(
-      "after",
-      `
+          if (!result.ok) {
+            if (opts.json) {
+              process.stdout.write(
+                JSON.stringify({ ok: false, error: result.error }) + "\n",
+              );
+            } else {
+              log.error(`Error: ${result.error}`);
+            }
+            process.exitCode = 1;
+            return;
+          }
+          if (opts.json) {
+            process.stdout.write(
+              JSON.stringify({
+                ok: true,
+                data: result.result ? result.result.data : null,
+              }) + "\n",
+            );
+          } else {
+            if (result.result == null) {
+              log.info(`No cache entry found for key "${key}".`);
+            } else {
+              log.info(JSON.stringify(result.result.data, null, 2));
+            }
+          }
+        });
+      // ── delete ────────────────────────────────────────────────────────
+      cache
+        .command("delete <key>")
+        .description("Remove a cached entry by key")
+        .option("--json", "Output result as machine-readable JSON.")
+        .addHelpText(
+          "after",
+          `
 Arguments:
   key   The cache key to remove. Run 'assistant cache get <key>' to
         verify a key exists before deleting.
@@ -384,36 +394,39 @@ existed or not, but reports whether an entry was actually removed.
 Examples:
   $ assistant cache delete my-key
   $ assistant cache delete my-key --json`,
-    )
-    .action(async (key: string, opts: { json?: boolean }) => {
-      const result = await cliIpcCall<{ deleted: boolean }>("cache_delete", {
-        body: { key },
-      });
-      if (!result.ok) {
-        if (opts.json) {
-          process.stdout.write(
-            JSON.stringify({ ok: false, error: result.error }) + "\n",
+        )
+        .action(async (key: string, opts: { json?: boolean }) => {
+          const result = await cliIpcCall<{ deleted: boolean }>(
+            "cache_delete",
+            {
+              body: { key },
+            },
           );
-        } else {
-          log.error(`Error: ${result.error}`);
-        }
-        process.exitCode = 1;
-        return;
-      }
-      const deleted = result.result!.deleted;
-      if (opts.json) {
-        process.stdout.write(JSON.stringify({ ok: true, deleted }) + "\n");
-      } else {
-        if (deleted) {
-          log.info(`Deleted cache entry "${key}".`);
-        } else {
-          log.info(`No cache entry "${key}" (nothing to delete).`);
-        }
-      }
-    });
+          if (!result.ok) {
+            if (opts.json) {
+              process.stdout.write(
+                JSON.stringify({ ok: false, error: result.error }) + "\n",
+              );
+            } else {
+              log.error(`Error: ${result.error}`);
+            }
+            process.exitCode = 1;
+            return;
+          }
+          const deleted = result.result!.deleted;
+          if (opts.json) {
+            process.stdout.write(JSON.stringify({ ok: true, deleted }) + "\n");
+          } else {
+            if (deleted) {
+              log.info(`Deleted cache entry "${key}".`);
+            } else {
+              log.info(`No cache entry "${key}" (nothing to delete).`);
+            }
+          }
+        });
     },
   });
 }

package/src/config/seed-inference-profiles.ts CHANGED Viewed

@@ -88,15 +88,24 @@ const MANAGED_PROFILE_TEMPLATES: Record<string, ManagedProfileTemplate> = {
     // profile there's nothing stronger to consult, so the advisor defaults off.
     advisorEnabled: false,
   },
+  // Served by DeepSeek V4 Flash on Fireworks via managed platform inference: a
+  // fast, low-cost open model. `model` is pinned explicitly rather than
+  // resolved via the `latency-optimized` intent (which still maps to Kimi K2.5
+  // on Fireworks and Anthropic Haiku elsewhere).
+  //
+  // `effort: "none"` (not "low") because Fireworks is not thinking-aware: the
+  // disabled `thinking` config is stripped before the request, so a non-"none"
+  // effort would be sent as `reasoning_effort` and make this profile pay for
+  // reasoning despite thinking being off. "none" keeps Speed non-reasoning.
   "cost-optimized": {
-    intent: "latency-optimized",
-    provider: "anthropic",
-    connectionName: "anthropic-managed",
+    model: "accounts/fireworks/models/deepseek-v4-flash",
+    provider: "fireworks",
+    connectionName: "fireworks-managed",
     source: "managed",
     label: "Speed",
-    description: "Fastest responses at lower cost",
+    description: "Fastest responses at lower cost (DeepSeek V4 Flash)",
     maxTokens: 8192,
-    effort: "low",
+    effort: "none",
     thinking: { enabled: false, streamThinking: false },
     contextWindow: { maxInputTokens: DEFAULT_CONTEXT_WINDOW_MAX_INPUT_TOKENS },
   },

package/src/providers/model-catalog.ts CHANGED Viewed

@@ -760,6 +760,22 @@ const RAW_PROVIDER_CATALOG: ProviderCatalogEntry[] = [
         maxEffort: "max",
         pricing: { inputPer1mTokens: 1.74, outputPer1mTokens: 3.48 },
       },
+      {
+        id: "accounts/fireworks/models/deepseek-v4-flash",
+        displayName: "DeepSeek V4 Flash",
+        contextWindowTokens: 1040000,
+        maxOutputTokens: 131072,
+        supportsThinking: true,
+        supportsCaching: true,
+        supportsVision: false,
+        supportsToolUse: true,
+        maxEffort: "max",
+        pricing: {
+          inputPer1mTokens: 0.14,
+          outputPer1mTokens: 0.28,
+          cacheReadPer1mTokens: 0.03,
+        },
+      },
     ],
     defaultModel: "accounts/fireworks/models/kimi-k2p5",
     apiKeyUrl: "https://fireworks.ai/account/api-keys",