@mcp-b/embedded-agent 0.0.7 → 0.0.8-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,286 +2,330 @@
2
2
 
3
3
  React components for embedding an AI agent UI with MCP (Model Context Protocol) tool support and voice mode.
4
4
 
5
- ## Architecture Overview
5
+ ## Table of Contents
6
+
7
+ - [Quick Start](#quick-start)
8
+ - [Architecture Overview](#architecture-overview)
9
+ - [Core API](#core-api)
10
+ - [Widgets](#widgets)
11
+ - [Voice Mode](#voice-mode)
12
+ - [MCP Tools](#mcp-tools)
13
+ - [Legacy API](#legacy-api)
14
+ - [Development](#development)
15
+ - [Design Documents](#design-documents)
6
16
 
7
- The package follows a **core/widgets** architecture:
17
+ ## Quick Start
8
18
 
9
- - **Core** (`src/core/`): Data layer - providers and hooks, no UI
10
- - **Widgets** (`src/widgets/`): UI layer - independent components that consume core
19
+ ### Option 1: Web Component (Easiest)
11
20
 
12
- ```
13
- src/
14
- ├── core/ # DATA LAYER
15
- │ ├── providers/
16
- │ │ ├── AgentProvider.tsx # Unified provider (START HERE)
17
- │ │ └── index.ts
18
- │ ├── hooks/
19
- │ │ ├── useAgent.ts # Main facade hook (START HERE)
20
- │ │ └── index.ts
21
- │ └── index.ts
22
-
23
- ├── widgets/ # UI LAYER
24
- │ ├── pill/ # Compact floating UI
25
- │ │ └── index.ts # Exports AgentPill, PillContainer, etc.
26
- │ ├── modal/ # Full chat modal
27
- │ │ └── index.ts # Exports AssistantModal, Thread, etc.
28
- │ ├── shared/ # Shared components
29
- │ │ └── index.ts # Exports ActionList, SummaryBlock, etc.
30
- │ └── index.ts
31
-
32
- ├── components/ # IMPLEMENTATION (internal)
33
- │ ├── pill/ # Pill component implementations
34
- │ │ ├── AgentPill.tsx # Main pill component
35
- │ │ ├── PillContainer.tsx # Morphing container
36
- │ │ ├── PillVoice.tsx # Voice mode UI
37
- │ │ ├── ActionList.tsx # Action display
38
- │ │ └── ...
39
- │ └── ... # Other component implementations
40
-
41
- ├── hooks/ # INDIVIDUAL HOOKS (internal)
42
- │ ├── useActions.ts # Derives actions from tool calls
43
- │ ├── useVoiceActions.ts # Voice mode action tracking
44
- │ ├── useVoiceSummary.ts # Voice session summary
45
- │ ├── useVoiceMode.ts # Voice mode state machine
46
- │ └── ...
47
-
48
- ├── providers/ # INDIVIDUAL PROVIDERS (internal)
49
- │ ├── MCPToolsProvider.tsx # MCP tool registry
50
- │ ├── VoiceModeProvider.tsx # Voice mode context
51
- │ └── VoiceMCPBridge.tsx # Connects MCP tools to voice
52
-
53
- ├── services/ # EXTERNAL SERVICES
54
- │ └── realtime/ # OpenAI Realtime API via WebRTC
55
- │ ├── openai-realtime-service.ts
56
- │ ├── webrtc-manager.ts
57
- │ └── ...
58
-
59
- ├── lib/ # UTILITIES
60
- │ ├── constants.ts # Magic numbers live here
61
- │ ├── utils.ts # cn(), etc.
62
- │ └── ...
63
-
64
- └── index.ts # PUBLIC EXPORTS
21
+ ```html
22
+ <script src="@mcp-b/embedded-agent/standalone"></script>
23
+ <webmcp-agent
24
+ app-id="your-app-id"
25
+ api-base="https://your-worker.workers.dev"
26
+ view-mode="pill"
27
+ ></webmcp-agent>
65
28
  ```
66
29
 
67
- ## Quick Start
30
+ ### Option 2: React Component
31
+
32
+ ```tsx
33
+ import { EmbeddedAgent } from '@mcp-b/embedded-agent'
34
+ import '@mcp-b/embedded-agent/styles'
35
+
36
+ function App() {
37
+ return (
38
+ <EmbeddedAgent
39
+ appId="your-app-id"
40
+ apiBase="https://your-worker.workers.dev"
41
+ viewMode="pill"
42
+ />
43
+ )
44
+ }
45
+ ```
68
46
 
69
- ### Recommended: AgentProvider + Widget
47
+ ### Option 3: Composable (Recommended for Custom UIs)
70
48
 
71
49
  ```tsx
72
- import { AgentPill, AgentProvider } from '@mcp-b/embedded-agent'
50
+ import { AgentProvider, AgentPill } from '@mcp-b/embedded-agent'
51
+ import '@mcp-b/embedded-agent/styles'
73
52
 
74
53
  function App() {
75
- return (
76
- <AgentProvider apiBase="https://your-worker.workers.dev">
77
- <AgentPill position="bottom-center" showVoiceButton />
78
- </AgentProvider>
79
- )
54
+ return (
55
+ <AgentProvider apiBase="https://your-worker.workers.dev">
56
+ <AgentPill position="bottom-center" showVoiceButton />
57
+ </AgentProvider>
58
+ )
80
59
  }
81
60
  ```
82
61
 
83
- ### Custom UI with useAgent
62
+ ### Option 4: Fully Custom UI
84
63
 
85
64
  ```tsx
86
65
  import { AgentProvider, useAgent } from '@mcp-b/embedded-agent'
87
66
 
88
67
  function CustomUI() {
89
- const agent = useAgent()
90
-
91
- return (
92
- <div>
93
- {agent.isRunning && <p>Processing...</p>}
94
- {agent.activeActions.map((action) => (
95
- <div key={action.id}>{action.label}</div>
96
- ))}
97
- {agent.voice?.isActive && <button onClick={agent.voice.stop}>End Voice</button>}
98
- </div>
99
- )
68
+ const agent = useAgent()
69
+
70
+ return (
71
+ <div>
72
+ {agent.isRunning && <p>Processing...</p>}
73
+ {agent.activeActions.map((action) => (
74
+ <div key={action.id}>{action.label}</div>
75
+ ))}
76
+ {agent.voice?.isActive && (
77
+ <button onClick={agent.voice.stop}>End Voice</button>
78
+ )}
79
+ </div>
80
+ )
100
81
  }
101
82
 
102
83
  function App() {
103
- return (
104
- <AgentProvider apiBase="https://...">
105
- <CustomUI />
106
- </AgentProvider>
107
- )
84
+ return (
85
+ <AgentProvider apiBase="https://...">
86
+ <CustomUI />
87
+ </AgentProvider>
88
+ )
108
89
  }
109
90
  ```
110
91
 
111
- ## Core API
92
+ ## Architecture Overview
112
93
 
113
- ### AgentProvider
94
+ The package follows a **core/widgets** architecture with strict separation between data and UI layers:
114
95
 
115
- **File:** `src/core/providers/AgentProvider.tsx`
96
+ ```
97
+ src/
98
+ ├── core/ # DATA LAYER (no UI)
99
+ │ ├── providers/
100
+ │ │ └── AgentProvider.tsx # Unified provider
101
+ │ └── hooks/
102
+ │ └── useAgent.ts # Main facade hook
103
+
104
+ ├── widgets/ # UI LAYER
105
+ │ ├── pill/ # Compact floating UI
106
+ │ ├── modal/ # Full chat modal
107
+ │ └── shared/ # Shared components
108
+
109
+ ├── providers/ # Individual providers
110
+ │ ├── MCPToolsProvider.tsx # MCP tool registry
111
+ │ ├── VoiceModeProvider.tsx # Voice mode context
112
+ │ └── VoiceMCPBridge.tsx # MCP-to-voice bridge
113
+
114
+ ├── hooks/ # Individual hooks
115
+ │ ├── useActions.ts # Derive actions from tool calls
116
+ │ ├── useVoiceMode.ts # Voice session state
117
+ │ └── ...
118
+
119
+ ├── services/realtime/ # OpenAI Realtime API
120
+ │ ├── openai-realtime-service.ts
121
+ │ ├── webrtc-manager.ts
122
+ │ └── ...
123
+
124
+ └── components/ # Internal implementations
125
+ ```
116
126
 
117
- Unified provider that sets up:
127
+ For detailed architecture information, see the [Design Documents](#design-documents).
118
128
 
119
- - MCP tool registry (`MCPToolsProvider`)
120
- - Voice mode bridge (`VoiceMCPBridge`)
121
- - Chat runtime (`AssistantRuntimeProvider` from @assistant-ui/react)
129
+ ## Core API
130
+
131
+ ### AgentProvider
132
+
133
+ Unified provider that sets up all agent infrastructure.
122
134
 
123
135
  ```tsx
124
136
  interface AgentProviderProps {
125
- children: ReactNode
126
- apiBase?: string // Backend URL
127
- tokenEndpoint?: string // Voice token endpoint (defaults to {apiBase}/api/realtime/session)
128
- autoConnectLocal?: boolean // Auto-connect to local MCP source (default: true)
129
- onToolsChange?: (tools: ToolWithSource[]) => void
130
- onVoiceError?: (error: string) => void
131
- onVoiceConnect?: () => void
132
- onVoiceDisconnect?: (duration: number) => void
137
+ children: ReactNode
138
+ apiBase?: string // Backend URL
139
+ tokenEndpoint?: string // Voice token endpoint (auto-computed)
140
+ autoConnectLocal?: boolean // Auto-connect to local MCP (default: true)
141
+ onToolsChange?: (tools: ToolWithSource[]) => void
142
+ onVoiceError?: (error: string) => void
143
+ onVoiceConnect?: () => void
144
+ onVoiceDisconnect?: (duration: number) => void
133
145
  }
134
146
  ```
135
147
 
136
- ### useAgent Hook
148
+ **What it provides:**
149
+ - MCP tool registry (`MCPToolsProvider`)
150
+ - Voice mode with MCP tools (`VoiceMCPBridge`)
151
+ - Chat runtime (`AssistantRuntimeProvider` from @assistant-ui/react)
137
152
 
138
- **File:** `src/core/hooks/useAgent.ts`
153
+ ### useAgent Hook
139
154
 
140
- Main facade hook - combines all agent capabilities into one interface.
155
+ Main facade hook combining all agent capabilities.
141
156
 
142
157
  ```tsx
143
- interface AgentState {
144
- // Thread
145
- messages: ReadonlyArray<ThreadMessage>
146
- isRunning: boolean
147
- hasMessages: boolean
148
-
149
- // Actions (from tool calls)
150
- actions: Action[] // All text mode actions
151
- currentAction: Action | null // Currently running
152
- recentActions: Action[] // Last 3 completed
153
-
154
- // Voice mode actions
155
- voiceActions: Action[] // Actions from voice mode
156
- activeActions: Action[] // Voice or text depending on mode
157
-
158
- // Summary
159
- summary: string | null // Latest summary text
160
- voiceSummary: VoiceSummary | null
161
-
162
- // Voice controls (null if not configured)
163
- voice: {
164
- isActive: boolean
165
- isConnecting: boolean
166
- isError: boolean
167
- isMuted: boolean
168
- error?: string
169
- audioLevel?: AudioLevelData
170
- transcript?: TranscriptData
171
- start: () => Promise<void>
172
- stop: () => void
173
- toggleMute: (muted?: boolean) => void
174
- sendMessage: (text: string) => void
175
- } | null
176
-
177
- // MCP tools (null if not in context)
178
- tools: {
179
- list: ToolWithSource[]
180
- call: (name: string, args: Record<string, unknown>) => Promise<CallToolResult>
181
- } | null
182
-
183
- // Derived
184
- isVoiceActive: boolean
185
- }
158
+ const agent = useAgent()
159
+
160
+ // Thread state
161
+ agent.messages // All messages
162
+ agent.isRunning // Is agent processing?
163
+ agent.hasMessages // Has any messages?
164
+
165
+ // Actions (from tool calls)
166
+ agent.actions // All text mode actions
167
+ agent.currentAction // Currently running action
168
+ agent.recentActions // Last 3 completed
169
+ agent.voiceActions // Voice mode actions
170
+ agent.activeActions // Voice or text (auto-switches)
171
+
172
+ // Summaries
173
+ agent.summary // Latest text summary
174
+ agent.voiceSummary // Voice session summary
175
+
176
+ // Voice controls (null if not configured)
177
+ agent.voice?.isActive
178
+ agent.voice?.isConnecting
179
+ agent.voice?.isMuted
180
+ agent.voice?.start()
181
+ agent.voice?.stop()
182
+ agent.voice?.toggleMute()
183
+ agent.voice?.sendMessage(text)
184
+ agent.voice?.audioLevel // Real-time levels
185
+ agent.voice?.transcript // Current transcript
186
+
187
+ // MCP tools (null if not in context)
188
+ agent.tools?.list
189
+ agent.tools?.call(name, args)
190
+
191
+ // Derived
192
+ agent.isVoiceActive
186
193
  ```
187
194
 
188
195
  ## Widgets
189
196
 
190
197
  ### AgentPill
191
198
 
192
- **File:** `src/components/pill/AgentPill.tsx`
193
-
194
- Compact, morphing floating UI. Shows actions, voice mode, and composer.
199
+ Compact, morphing floating UI with action-first design.
195
200
 
196
201
  ```tsx
197
- interface AgentPillProps {
198
- position?: 'bottom-center' | 'bottom-right'
199
- onOpenHistory?: () => void
200
- showVoiceButton?: boolean
201
- autoCollapse?: boolean // Auto-collapse after 30s inactivity
202
- className?: string
203
- }
202
+ import { AgentPill } from '@mcp-b/embedded-agent'
203
+
204
+ <AgentPill
205
+ position="bottom-center" // or "bottom-right"
206
+ showVoiceButton={true}
207
+ autoCollapse={true} // Collapse after 30s inactivity
208
+ onOpenHistory={() => {}}
209
+ className="custom-class"
210
+ />
204
211
  ```
205
212
 
206
- ### AssistantModal
213
+ **States:**
214
+ - `collapsed` - Tiny bar (ambient)
215
+ - `hovered` - Shows keyboard hint
216
+ - `composing` - User typing
217
+ - `active` - Agent working
218
+ - `expanded` - Shows summary/results
207
219
 
208
- **File:** `src/components/assistant-modal.tsx`
220
+ ### AssistantModal
209
221
 
210
222
  Traditional chat modal with full message thread.
211
223
 
212
224
  ```tsx
213
- // No props - uses context from AgentProvider
214
- <AssistantModal />
225
+ import { AssistantModal } from '@mcp-b/embedded-agent'
226
+
227
+ <AssistantModal /> // Uses context from AgentProvider
215
228
  ```
216
229
 
217
- ## Key Implementation Details
230
+ ### Shared Components
231
+
232
+ For building custom UIs:
233
+
234
+ ```tsx
235
+ import {
236
+ ActionList,
237
+ CurrentActivity,
238
+ SummaryBlock,
239
+ LiveWaveform,
240
+ VoiceIndicator,
241
+ ToolStatusBorder,
242
+ } from '@mcp-b/embedded-agent'
243
+ ```
218
244
 
219
- ### Action Tracking
245
+ ## Voice Mode
220
246
 
221
- **Files:** `src/hooks/useActions.ts`, `src/hooks/useVoiceActions.ts`
247
+ Voice mode uses WebRTC to connect to OpenAI's Realtime API with MCP tool integration.
222
248
 
223
- Actions are derived from tool calls in assistant messages. The `useActions` hook:
249
+ ### How It Works
224
250
 
225
- 1. Subscribes to thread messages via `useThread`
226
- 2. Extracts tool calls from assistant messages
227
- 3. Maps tool calls to Action objects with status (running/success/error)
251
+ 1. **VoiceMCPBridge** converts MCP tools to OpenAI's function format
252
+ 2. **WebRTCManager** establishes peer connection with OpenAI
253
+ 3. **ToolManager** executes tool calls through MCP when voice requests them
254
+ 4. **AudioAnalyzer** provides real-time audio levels for visualization
255
+
256
+ ### Requirements
257
+
258
+ - Backend endpoint at `{apiBase}/api/realtime/session` that returns ephemeral tokens
259
+ - Browser with WebRTC and getUserMedia support
260
+
261
+ ### Checking Support
262
+
263
+ ```tsx
264
+ const agent = useAgent()
228
265
 
229
- Voice actions work similarly but track tool calls from the realtime voice API.
266
+ if (agent.voice) {
267
+ // Voice is available
268
+ await agent.voice.start()
269
+ }
270
+ ```
230
271
 
231
- ### Voice Mode
272
+ ## MCP Tools
232
273
 
233
- **Files:**
274
+ ### How Tools Are Registered
234
275
 
235
- - `src/hooks/useVoiceMode.ts` - State machine for voice sessions
236
- - `src/services/realtime/` - OpenAI Realtime API integration
237
- - `src/providers/VoiceMCPBridge.tsx` - Connects MCP tools to voice mode
276
+ 1. `MCPToolsProvider` manages connections to MCP sources
277
+ 2. `MCPToolRegistry` bridges MCP tools to the chat runtime
278
+ 3. Tools are tagged with `_sourceId` for auto-routing
238
279
 
239
- Voice mode uses WebRTC to connect to OpenAI's Realtime API. The VoiceMCPBridge:
280
+ ### Multiple Sources
240
281
 
241
- 1. Watches MCP tools from MCPToolsProvider
242
- 2. Converts them to RegisteredTool format
243
- 3. Provides tool executor that calls through MCP
282
+ ```tsx
283
+ const { addSource, tools, callTool } = useMCPTools()
244
284
 
245
- ### MCP Tools
285
+ // Add remote server
286
+ await addSource('remote', {
287
+ type: 'http',
288
+ url: 'http://localhost:8888/mcp'
289
+ })
246
290
 
247
- **File:** `src/providers/MCPToolsProvider.tsx`
291
+ // Tools from all sources are aggregated
292
+ console.log(tools) // [...localTools, ...remoteTools]
248
293
 
249
- Manages MCP tool sources (local tab, remote servers). Key exports:
294
+ // Calls auto-route to correct source
295
+ await callTool('some_tool', { arg: 'value' })
296
+ ```
250
297
 
251
- - `useMCPTools()` - Required context hook
252
- - `useOptionalMCPTools()` - Returns null if not in provider
298
+ ### Prompts
253
299
 
254
- ### Constants
300
+ MCP prompts are also supported:
255
301
 
256
- **File:** `src/lib/constants.ts`
302
+ ```tsx
303
+ const { prompts, getPrompt } = useMCPTools()
257
304
 
258
- All magic numbers are centralized here:
305
+ // List available prompts
306
+ console.log(prompts)
259
307
 
260
- - `VOICE_ACTIONS_RETENTION_MS` (3000ms) - How long voice actions persist
261
- - `VOICE_SUMMARY_RETENTION_MS` (30000ms) - How long voice summary persists
262
- - `TOOL_CALL_DISPLAY_DURATION_MS` (2000ms) - Tool status display time
263
- - etc.
308
+ // Get expanded prompt content
309
+ const result = await getPrompt('my-prompt', { arg: 'value' })
310
+ ```
264
311
 
265
312
  ## Legacy API
266
313
 
267
- The `EmbeddedAgent` component is preserved for backward compatibility but wraps the new architecture internally.
314
+ The `EmbeddedAgent` component wraps the new architecture for backward compatibility:
268
315
 
269
316
  ```tsx
270
- // Legacy (still works)
271
317
  import { EmbeddedAgent } from '@mcp-b/embedded-agent'
272
318
 
273
319
  <EmbeddedAgent
274
320
  appId="your-app"
275
321
  apiBase="https://..."
276
- viewMode="pill"
322
+ viewMode="pill" // or "modal"
323
+ autoConnectLocal={true}
324
+ onToolsChange={(tools) => {}}
325
+ onVoiceError={(error) => {}}
326
+ onVoiceConnect={() => {}}
327
+ onVoiceDisconnect={(duration) => {}}
277
328
  />
278
-
279
- // New (recommended)
280
- import { AgentProvider, AgentPill } from '@mcp-b/embedded-agent'
281
-
282
- <AgentProvider apiBase="https://...">
283
- <AgentPill />
284
- </AgentProvider>
285
329
  ```
286
330
 
287
331
  ## Development
@@ -303,28 +347,77 @@ pnpm test
303
347
  pnpm play
304
348
  ```
305
349
 
350
+ ## Design Documents
351
+
352
+ For detailed architecture and implementation information:
353
+
354
+ - **[High-Level Design](./docs/HIGH_LEVEL_DESIGN.md)** - Architecture overview, data flow, design principles
355
+ - **[Low-Level Design](./docs/LOW_LEVEL_DESIGN.md)** - Implementation details, file organization, patterns
356
+
306
357
  ## Exports Reference
307
358
 
308
- ### From `@mcp-b/embedded-agent`
359
+ ### Main Entry (`@mcp-b/embedded-agent`)
309
360
 
310
- **Core:**
361
+ ```typescript
362
+ // Main component
363
+ export { EmbeddedAgent, registerWebMCPAgent }
364
+ export type { EmbeddedAgentProps, AgentViewMode }
365
+ ```
366
+
367
+ ### Core (`@mcp-b/embedded-agent/core`) *
368
+
369
+ ```typescript
370
+ // Provider
371
+ export { AgentProvider }
372
+ export type { AgentProviderProps }
311
373
 
312
- - `AgentProvider`, `AgentProviderProps`
313
- - `useAgent`, `AgentState`, `AgentVoice`, `AgentTools`
314
- - `useActions`, `useCurrentAction`, `useRecentActions`, `Action`
315
- - `useVoiceActions`, `useVoiceSummary`, `VoiceSummary`
316
- - `useMCPTools`, `useOptionalMCPTools`, `MCPToolsContextValue`
374
+ // Main hook
375
+ export { useAgent }
376
+ export type { AgentState, AgentVoice, AgentTools }
317
377
 
318
- **Widgets:**
378
+ // Action hooks
379
+ export { useActions, useCurrentAction, useRecentActions, humanizeToolName }
380
+ export type { Action }
319
381
 
320
- - `AgentPill`, `AgentPillProps`, `PillPosition`
321
- - `AssistantModal`
322
- - `ActionList`, `SummaryBlock`, `CurrentActivity`
323
- - `PillContainer`, `PillComposer`, `PillVoice`
324
- - `LiveWaveform`, `VoiceIndicator`, `ToolStatusBorder`
382
+ // Voice hooks
383
+ export { useVoiceActions, useVoiceSummary, useHasVoiceSummary, useVoiceMode }
384
+ export type { VoiceSummary, UseVoiceModeOptions, UseVoiceModeReturn }
325
385
 
326
- **Legacy:**
386
+ // Prompt hooks
387
+ export { usePrompts }
388
+ export type { UsePromptsResult }
327
389
 
328
- - `EmbeddedAgent`, `EmbeddedAgentProps`
390
+ // Provider access
391
+ export { useMCPTools, useOptionalMCPTools, useOptionalVoiceModeContext }
392
+ export type { MCPToolsContextValue, VoiceModeContextValue }
393
+
394
+ // Constants
395
+ export {
396
+ VOICE_ACTIONS_RETENTION_MS,
397
+ VOICE_SUMMARY_RETENTION_MS,
398
+ TOOL_CALL_DISPLAY_DURATION_MS,
399
+ TOOL_CALL_ERROR_DISPLAY_DURATION_MS,
400
+ }
401
+ ```
402
+
403
+ ### Widgets (`@mcp-b/embedded-agent/widgets`) *
404
+
405
+ ```typescript
406
+ // Pill widget
407
+ export { AgentPill, PillContainer, PillComposer, PillVoice, HistorySidebar }
408
+ export { useVoiceBorderStatus, PromptBadge, PromptSuggestions }
409
+ export type { AgentPillProps, PillPosition, PillMode, Conversation }
410
+ export type { PromptBadgeProps, PromptSuggestionsProps }
411
+
412
+ // Modal widget
413
+ export { AssistantModal, Thread, Composer, ThreadWithVoice }
414
+ export { UserMessage, AssistantMessage }
415
+
416
+ // Shared components
417
+ export { ActionList, ActionSummary, CurrentActivity, ActionStatusIcon }
418
+ export { IdleIndicator, SummaryBlock, WelcomeMessage, ThreadContent }
419
+ export { useLatestSummary, LiveWaveform, VoiceIndicator }
420
+ export { ToolStatusBorder, PillMarkdown }
421
+ ```
329
422
 
330
- See `src/index.ts` for the complete export list.
423
+ \* *These are internal module paths available for advanced usage. The main entry point re-exports commonly used items.*