npm - @assistant-ui/mcp-docs-server - Versions diffs - 0.1.24 → 0.1.26 - Mend

@assistant-ui/mcp-docs-server 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

package/.docs/raw/docs/cloud/ai-sdk-assistant-ui.mdx CHANGED Viewed

@@ -26,7 +26,7 @@ The `useChatRuntime` hook from `@assistant-ui/react-ai-sdk` wraps AI SDK's `useC
 3. Generates a conversation title after the assistant's first response
 4. Loads historical messages when switching threads via `<ThreadList />`
-You provide the AI SDK endpoint (`api: "/api/chat"`) and the cloud configuration—everything else is handled.
+You provide the cloud configuration—everything else is handled. The default `AssistantChatTransport` automatically sends requests to `/api/chat`.
 ## Prerequisites
@@ -84,16 +84,21 @@ Create a client-side AssistantCloud instance and integrate it with your AI SDK r
 ```tsx title="app/chat/page.tsx"
 "use client";
+import { useMemo } from "react";
 import { AssistantCloud, AssistantRuntimeProvider } from "@assistant-ui/react";
 import { useChatRuntime } from "@assistant-ui/react-ai-sdk";
 import { ThreadList } from "@/components/assistant-ui/thread-list";
 import { Thread } from "@/components/assistant-ui/thread";
 export default function ChatPage() {
-  const cloud = new AssistantCloud({
-    baseUrl: process.env.NEXT_PUBLIC_ASSISTANT_BASE_URL!,
-    anonymous: true, // Creates browser-session based user ID
-  });
+  const cloud = useMemo(
+    () =>
+      new AssistantCloud({
+        baseUrl: process.env.NEXT_PUBLIC_ASSISTANT_BASE_URL!,
+        anonymous: true, // Creates browser-session based user ID
+      }),
+    [],
+  );
   const runtime = useChatRuntime({
     cloud,
@@ -114,6 +119,74 @@ export default function ChatPage() {
 </Steps>
+## `useChatRuntime` Options
+<ParametersTable
+  parameters={[
+    {
+      name: "cloud",
+      type: "AssistantCloud",
+      description:
+        "Optional AssistantCloud instance for chat persistence and thread management.",
+    },
+    {
+      name: "adapters",
+      type: "RuntimeAdapters",
+      description:
+        "Optional runtime adapters to extend or override built-in functionality.",
+      children: [
+        {
+          type: "RuntimeAdapters",
+          parameters: [
+            {
+              name: "attachments",
+              type: "AttachmentAdapter",
+              description:
+                "Custom attachment adapter for file uploads. Defaults to the Vercel AI SDK attachment adapter.",
+            },
+            {
+              name: "speech",
+              type: "SpeechSynthesisAdapter",
+              description:
+                "Adapter for text-to-speech functionality.",
+            },
+            {
+              name: "dictation",
+              type: "DictationAdapter",
+              description:
+                "Adapter for speech-to-text dictation input.",
+            },
+            {
+              name: "feedback",
+              type: "FeedbackAdapter",
+              description:
+                "Adapter for collecting user feedback on messages.",
+            },
+            {
+              name: "history",
+              type: "ThreadHistoryAdapter",
+              description:
+                "Adapter for loading and saving thread history. Used to restore previous messages when switching threads.",
+            },
+          ],
+        },
+      ],
+    },
+    {
+      name: "toCreateMessage",
+      type: "(message: AppendMessage) => CreateUIMessage",
+      description:
+        "Optional custom function to convert an assistant-ui AppendMessage into an AI SDK CreateUIMessage before sending. Use this to customize how outgoing messages are formatted, for example to add custom metadata or transform content parts.",
+    },
+    {
+      name: "transport",
+      type: "ChatTransport",
+      description:
+        "Custom transport implementation. Defaults to AssistantChatTransport which sends requests to '/api/chat'.",
+    },
+  ]}
+/>
 ## Telemetry
 The `useChatRuntime` hook captures full run telemetry including timing data. This integrates with the assistant-ui runtime to provide:
@@ -134,10 +207,13 @@ To capture model and usage data, add the `messageMetadata` callback to your AI S
 ```tsx title="app/api/chat/route.ts"
 import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
 export async function POST(req: Request) {
+  const { messages } = await req.json();
   const result = streamText({
-    model: openai("gpt-5-mini"),
+    model: openai("gpt-4o-mini"),
     messages,
   });
@@ -168,6 +244,7 @@ Use the `beforeReport` hook to add custom metadata or filter reports:
 ```tsx
 const cloud = new AssistantCloud({
   baseUrl: process.env.NEXT_PUBLIC_ASSISTANT_BASE_URL!,
+  anonymous: true,
   telemetry: {
     beforeReport: (report) => ({
       ...report,
@@ -179,11 +256,18 @@ const cloud = new AssistantCloud({
 Return `null` from `beforeReport` to skip reporting a specific run. To disable telemetry entirely, pass `telemetry: false`.
+### Sub-Agent Model Tracking
+When tool calls delegate to a different model (e.g., the main run uses GPT but a tool invokes Gemini), you can track the delegated model's usage. Pass sampling call data through `messageMetadata.samplingCalls` in your API route, and the telemetry reporter will automatically include it in the report.
+See the [AI SDK Telemetry guide](/docs/cloud/ai-sdk#sub-agent-model-tracking) for the full setup with `createSamplingCollector` and `wrapSamplingHandler`.
 ## Authentication
 The example above uses anonymous mode (browser session-based user ID) via the env var. For production apps with user accounts, pass an explicit cloud instance:
 ```tsx
+import { useMemo } from "react";
 import { useAuth } from "@clerk/nextjs";
 import { AssistantCloud } from "@assistant-ui/react";
 import { useChatRuntime } from "@assistant-ui/react-ai-sdk";

package/.docs/raw/docs/cloud/ai-sdk.mdx CHANGED Viewed

@@ -49,7 +49,7 @@ NEXT_PUBLIC_ASSISTANT_BASE_URL=https://proj-[YOUR-ID].assistant-api.com
 ### Install Dependencies
-<InstallCommand npm={["@assistant-ui/cloud-ai-sdk", "assistant-cloud", "@ai-sdk/react", "ai"]} />
+<InstallCommand npm={["@assistant-ui/cloud-ai-sdk", "@ai-sdk/react", "ai"]} />
 </Step>
@@ -151,7 +151,7 @@ const chat = useCloudChat({ threads: myThreads });
 | `options.threads` | `UseThreadsResult` | External thread management from `useThreads()`. Use when you need thread operations in a separate component or custom thread options like `includeArchived` |
 | `options.onSyncError` | `(error: Error) => void` | Callback invoked when a sync error occurs |
-All other [AI SDK `useChat` options](https://sdk.vercel.ai/docs/reference/ai-sdk-ui/use-chat) are also accepted.
+A subset of [AI SDK `useChat` options](https://sdk.vercel.ai/docs/reference/ai-sdk-ui/use-chat) are also accepted (those defined on `ChatInit`). Some options available on `useChat` such as `experimental_throttle` and `resume` are not supported.
 **Returns:** `UseCloudChatResult`
@@ -169,12 +169,15 @@ Plus all other properties from AI SDK's [`UseChatHelpers`](https://sdk.vercel.ai
 | Value | Type | Description |
 |-------|------|-------------|
+| `threads.cloud` | `AssistantCloud` | The cloud instance used for thread operations |
 | `threads.threads` | `CloudThread[]` | Active threads sorted by recency |
 | `threads.threadId` | `string \| null` | Current thread ID (`null` for a new unsaved chat) |
 | `threads.selectThread` | `(id: string \| null) => void` | Switch threads or pass `null` for a new chat |
 | `threads.isLoading` | `boolean` | `true` during initial load or refresh |
 | `threads.error` | `Error \| null` | Last error, if any |
 | `threads.refresh` | `() => Promise<boolean>` | Re-fetch the thread list |
+| `threads.get` | `(id: string) => Promise<CloudThread \| null>` | Fetch a single thread by ID |
+| `threads.create` | `(options?: \{ externalId?: string \}) => Promise<CloudThread \| null>` | Create a new thread |
 | `threads.delete` | `(id: string) => Promise<boolean>` | Delete a thread |
 | `threads.rename` | `(id: string, title: string) => Promise<boolean>` | Rename a thread |
 | `threads.archive` | `(id: string) => Promise<boolean>` | Archive a thread |
@@ -206,22 +209,27 @@ The `useCloudChat` hook automatically reports run telemetry to Assistant Cloud a
 **Automatically captured:**
 - `status` — `"completed"` or `"incomplete"` based on response content
-- `tool_calls` — Tool invocations with name, arguments, results, and source (MCP, frontend, or backend)
+- `tool_calls` — Tool invocations with name, arguments, and results. MCP tool calls are explicitly tagged with `tool_source: "mcp"`
 - `total_steps` — Number of reasoning/tool steps in the response
 - `output_text` — Full response text (truncated at 50K characters)
 **Requires route configuration:**
 - `model_id` — The model used for the response
 - `input_tokens` / `output_tokens` — Token usage statistics
+- `reasoning_tokens` — Tokens used for chain-of-thought reasoning (e.g. o1/o3 models)
+- `cached_input_tokens` — Input tokens served from the provider's prompt cache
 To capture model and usage data, configure the `messageMetadata` callback in your AI SDK route:
 ```tsx title="app/api/chat/route.ts"
 import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
 export async function POST(req: Request) {
+  const { messages } = await req.json();
   const result = streamText({
-    model: openai("gpt-5-mini"),
+    model: openai("gpt-4o-mini"),
     messages,
   });
@@ -244,7 +252,7 @@ export async function POST(req: Request) {
 ```
 <Callout type="info">
-  The standalone hook does not capture `duration_ms`, per-step breakdowns (`steps`), custom `metadata` pass-through, or `"error"` status. These require the full runtime integration available via [`useChatRuntime`](/docs/cloud/ai-sdk-assistant-ui).
+  The standalone hook captures message metadata when it is JSON-serializable, but it does not capture `duration_ms`, per-step breakdowns (`steps`), or `"error"` status. Those require the full runtime integration available via [`useChatRuntime`](/docs/cloud/ai-sdk-assistant-ui).
 </Callout>
 ### Customizing Reports
@@ -254,6 +262,7 @@ Use the `beforeReport` hook to enrich or filter telemetry:
 ```tsx
 const cloud = new AssistantCloud({
   baseUrl: process.env.NEXT_PUBLIC_ASSISTANT_BASE_URL!,
+  anonymous: true,
   telemetry: {
     beforeReport: (report) => ({
       ...report,
@@ -265,11 +274,92 @@ const cloud = new AssistantCloud({
 Return `null` from `beforeReport` to skip reporting a specific run. To disable telemetry entirely, pass `telemetry: false`.
+### Sub-Agent Model Tracking
+In multi-agent setups where tool calls delegate to a different model (e.g., the main run uses GPT but a tool invokes Gemini), you can track the delegated model's usage by passing sampling call data through `messageMetadata`.
+**Step 1: Collect sampling data on the server**
+Use `createSamplingCollector` and `wrapSamplingHandler` from `assistant-cloud` to capture LLM calls made during tool execution:
+```ts title="app/api/chat/route.ts"
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import {
+  createSamplingCollector,
+  wrapSamplingHandler,
+} from "assistant-cloud";
+export async function POST(req: Request) {
+  const { messages } = await req.json();
+  // Collect sub-agent sampling calls per tool call
+  const samplingCalls: Record<string, SamplingCallData[]> = {};
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages,
+    tools: {
+      delegate_to_gemini: tool({
+        parameters: z.object({ task: z.string() }),
+        execute: async ({ task }, { toolCallId }) => {
+          const collector = createSamplingCollector();
+          // Your sub-agent logic that calls another model
+          const result = await runSubAgent(task, {
+            onSamplingCall: collector.collect,
+          });
+          samplingCalls[toolCallId] = collector.getCalls();
+          return result;
+        },
+      }),
+    },
+  });
+  return result.toUIMessageStreamResponse({
+    messageMetadata: ({ part }) => {
+      if (part.type === "finish") {
+        return {
+          usage: part.totalUsage,
+          samplingCalls, // attach collected sampling data
+        };
+      }
+      if (part.type === "finish-step") {
+        return { modelId: part.response.modelId };
+      }
+      return undefined;
+    },
+  });
+}
+```
+**Step 2: That's it.** The telemetry reporter automatically reads `samplingCalls` from message metadata and attaches the data to matching tool calls in the report. The Cloud dashboard will show each delegated model in the model distribution chart with its own token and cost breakdown.
+<Callout type="info">
+  For MCP tools that use the sampling protocol, `wrapSamplingHandler` can wrap the MCP client's sampling handler directly to capture all nested LLM calls transparently.
+</Callout>
+<Callout type="tip">
+  **On older versions** that don't yet read `samplingCalls` from metadata, use `beforeReport` to inject the data manually:
+  ```ts
+  telemetry: {
+    beforeReport: (report) => ({
+      ...report,
+      tool_calls: report.tool_calls?.map((tc) => ({
+        ...tc,
+        sampling_calls: samplingCalls[tc.tool_call_id],
+      })),
+    }),
+  }
+  ```
+</Callout>
 ## Authentication
 The example above uses anonymous mode (browser session-based user ID) via the env var. For production apps with user accounts, pass an explicit cloud instance:
 ```tsx
+import { useMemo } from "react";
 import { useAuth } from "@clerk/nextjs";
 import { AssistantCloud } from "assistant-cloud";
 import { useCloudChat } from "@assistant-ui/cloud-ai-sdk";

package/.docs/raw/docs/cloud/langgraph.mdx CHANGED Viewed

@@ -74,7 +74,7 @@ import {
   AssistantRuntimeProvider,
 } from "@assistant-ui/react";
 import { useLangGraphRuntime } from "@assistant-ui/react-langgraph";
-import { createThread, getThreadState, sendMessage } from "@/lib/chatApi";
+import { createThread, deleteThread, getThreadState, sendMessage } from "@/lib/chatApi";
 import { LangChainMessage } from "@assistant-ui/react-langgraph";
 import { useMemo } from "react";
@@ -114,6 +114,9 @@ export function MyRuntimeProvider({
           (state.values as { messages?: LangChainMessage[] }).messages ?? [],
       };
     },
+    delete: async (externalId) => {
+      await deleteThread(externalId);
+    },
   });
   return (
@@ -136,7 +139,7 @@ import {
   AssistantRuntimeProvider,
 } from "@assistant-ui/react";
 import { useLangGraphRuntime } from "@assistant-ui/react-langgraph";
-import { createThread, getThreadState, sendMessage } from "@/lib/chatApi";
+import { createThread, deleteThread, getThreadState, sendMessage } from "@/lib/chatApi";
 import { LangChainMessage } from "@assistant-ui/react-langgraph";
 import { useAuth } from "@clerk/nextjs";
 import { useMemo } from "react";
@@ -179,6 +182,9 @@ export function MyRuntimeProvider({
           (state.values as { messages?: LangChainMessage[] }).messages ?? [],
       };
     },
+    delete: async (externalId) => {
+      await deleteThread(externalId);
+    },
   });
   return (
@@ -199,7 +205,11 @@ export function MyRuntimeProvider({
 </Tabs>
 <Callout type="info">
-  The `useLangGraphRuntime` hook now directly accepts `cloud`, `create`, and `load` parameters for simplified thread management. The runtime handles thread lifecycle internally.
+  The `useLangGraphRuntime` hook accepts `cloud`, `create`, `load`, and `delete` parameters for simplified thread management. The runtime handles the thread lifecycle internally.
+  - **`create`**: Called when creating a new thread. Returns `{ externalId }` with your backend's thread ID.
+  - **`load`**: Called when switching to an existing thread. Returns the thread's messages (and optionally interrupts).
+  - **`delete`**: Called when deleting a thread. Receives the thread's `externalId`. When provided, users can delete threads from the thread list UI.
 </Callout>
 </Step>

package/.docs/raw/docs/ink/adapters.mdx ADDED Viewed

@@ -0,0 +1,41 @@
+---
+title: Adapters
+description: Title generation adapters for React Ink.
+---
+Adapters customize runtime behavior. They can be passed as options to `useLocalRuntime` or `useRemoteThreadListRuntime`.
+## RemoteThreadListAdapter
+Title generation is configured via the `generateTitle` method on `RemoteThreadListAdapter`. See the [Custom Backend](/docs/ink/custom-backend) page for a full example.
+```tsx
+import type { RemoteThreadListAdapter } from "@assistant-ui/react-ink";
+import { createAssistantStream } from "assistant-stream";
+const myAdapter: RemoteThreadListAdapter = {
+  // ... other methods ...
+  async generateTitle(remoteId, unstable_messages) {
+    return createAssistantStream(async (controller) => {
+      const res = await fetch(`/api/threads/${remoteId}/title`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ messages: unstable_messages }),
+      });
+      const { title } = await res.json();
+      controller.appendText(title);
+    });
+  },
+};
+```
+## Which option to choose?
+| | ChatModelAdapter + `useLocalRuntime` | RemoteThreadListAdapter + `useRemoteThreadListRuntime` |
+|---|---|---|
+| **Thread storage** | In-memory | Your backend |
+| **Message storage** | In-memory | In-memory (can add history adapter for server-side) |
+| **Cross-session persistence** | No | Yes |
+| **Setup complexity** | Minimal | Moderate |
+| **Best for** | CLI tools, demos, prototypes | Production apps with persistence |

package/.docs/raw/docs/ink/custom-backend.mdx ADDED Viewed

@@ -0,0 +1,203 @@
+---
+title: Custom Backend
+description: Connect your terminal app to your own backend API.
+---
+By default, `useLocalRuntime` manages threads and messages in memory. You can connect to your own backend in two ways depending on your needs.
+## Option 1: ChatModelAdapter only
+The simplest approach — keep thread management local, but send messages to your backend for inference.
+```tsx title="adapters/my-chat-adapter.ts"
+import type { ChatModelAdapter } from "@assistant-ui/react-ink";
+export const myChatAdapter: ChatModelAdapter = {
+  async *run({ messages, abortSignal }) {
+    const response = await fetch("https://my-api.com/chat", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ messages }),
+      signal: abortSignal,
+    });
+    const reader = response.body?.getReader();
+    if (!reader) throw new Error("No response body");
+    const decoder = new TextDecoder();
+    let fullText = "";
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      const chunk = decoder.decode(value, { stream: true });
+      fullText += chunk;
+      yield { content: [{ type: "text", text: fullText }] };
+    }
+  },
+};
+```
+```tsx title="app.tsx"
+import { useLocalRuntime, AssistantRuntimeProvider } from "@assistant-ui/react-ink";
+import { myChatAdapter } from "./adapters/my-chat-adapter.js";
+export function App() {
+  const runtime = useLocalRuntime(myChatAdapter);
+  return (
+    <AssistantRuntimeProvider runtime={runtime}>
+      {/* your chat UI */}
+    </AssistantRuntimeProvider>
+  );
+}
+```
+This gives you:
+- Streaming chat responses from your API
+- In-memory thread list (lost on process exit)
+- Multi-thread support
+## Option 2: Full backend thread management
+When you want your backend to own thread state (e.g. for persistence across sessions, team sharing, or server-side history), implement a `RemoteThreadListAdapter`.
+<Steps>
+  <Step>
+### Implement the adapter
+```tsx title="adapters/my-thread-list-adapter.ts"
+import type { RemoteThreadListAdapter } from "@assistant-ui/react-ink";
+import { createAssistantStream } from "assistant-stream";
+const API_BASE = "https://my-api.com";
+export const myThreadListAdapter: RemoteThreadListAdapter = {
+  async list() {
+    const res = await fetch(`${API_BASE}/threads`);
+    const threads = await res.json();
+    return {
+      threads: threads.map((t: any) => ({
+        remoteId: t.id,
+        status: t.archived ? "archived" : "regular",
+        title: t.title,
+      })),
+    };
+  },
+  async initialize(localId) {
+    const res = await fetch(`${API_BASE}/threads`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ localId }),
+    });
+    const { id } = await res.json();
+    return { remoteId: id, externalId: undefined };
+  },
+  async rename(remoteId, title) {
+    await fetch(`${API_BASE}/threads/${remoteId}`, {
+      method: "PATCH",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ title }),
+    });
+  },
+  async archive(remoteId) {
+    await fetch(`${API_BASE}/threads/${remoteId}/archive`, {
+      method: "POST",
+    });
+  },
+  async unarchive(remoteId) {
+    await fetch(`${API_BASE}/threads/${remoteId}/unarchive`, {
+      method: "POST",
+    });
+  },
+  async delete(remoteId) {
+    await fetch(`${API_BASE}/threads/${remoteId}`, { method: "DELETE" });
+  },
+  async fetch(remoteId) {
+    const res = await fetch(`${API_BASE}/threads/${remoteId}`);
+    const t = await res.json();
+    return {
+      remoteId: t.id,
+      status: t.archived ? "archived" : "regular",
+      title: t.title,
+    };
+  },
+  async generateTitle(remoteId, unstable_messages) {
+    return createAssistantStream(async (controller) => {
+      const res = await fetch(`${API_BASE}/threads/${remoteId}/title`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ messages: unstable_messages }),
+      });
+      const { title } = await res.json();
+      controller.appendText(title);
+    });
+  },
+};
+```
+  </Step>
+  <Step>
+### Compose the runtime
+```tsx title="app.tsx"
+import {
+  useLocalRuntime,
+  useRemoteThreadListRuntime,
+  AssistantRuntimeProvider,
+} from "@assistant-ui/react-ink";
+import { myChatAdapter } from "./adapters/my-chat-adapter.js";
+import { myThreadListAdapter } from "./adapters/my-thread-list-adapter.js";
+function useAppRuntime() {
+  return useRemoteThreadListRuntime({
+    runtimeHook: () => useLocalRuntime(myChatAdapter),
+    adapter: myThreadListAdapter,
+  });
+}
+export function App() {
+  const runtime = useAppRuntime();
+  return (
+    <AssistantRuntimeProvider runtime={runtime}>
+      {/* your chat UI */}
+    </AssistantRuntimeProvider>
+  );
+}
+```
+  </Step>
+</Steps>
+## Adapter methods
+| Method | Description |
+|--------|-------------|
+| `list()` | Return all threads on mount |
+| `initialize(localId)` | Create a thread server-side, return `{ remoteId }` |
+| `rename(remoteId, title)` | Persist title changes |
+| `archive(remoteId)` | Mark thread as archived |
+| `unarchive(remoteId)` | Restore archived thread |
+| `delete(remoteId)` | Permanently remove thread |
+| `fetch(remoteId)` | Fetch single thread metadata |
+| `generateTitle(remoteId, unstable_messages)` | Return an `AssistantStream` with the generated title |
+## Which option to choose?
+| | Option 1: ChatModelAdapter | Option 2: RemoteThreadListAdapter |
+|---|---|---|
+| **Thread storage** | In-memory (process lifetime) | Your backend |
+| **Message storage** | In-memory | On-device (can add history adapter for server-side) |
+| **Cross-session persistence** | No | Yes |
+| **Setup complexity** | Minimal | Moderate |
+| **Best for** | CLI tools, demos, prototypes | Production apps with persistence |