osborn 0.1.6 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -1,5 +1,5 @@
1
1
  # LLM Provider: 'openai' or 'gemini'
2
- LLM_PROVIDER=openai
2
+ LLM_PROVIDER=gemini
3
3
 
4
4
  # LiveKit
5
5
  LIVEKIT_URL=wss://your-project.livekit.cloud
@@ -12,3 +12,10 @@ OPENAI_API_KEY=sk-...
12
12
  # Google AI (for Gemini Live - FREE during preview!)
13
13
  # Get your key at: https://aistudio.google.com/apikey
14
14
  GOOGLE_API_KEY=AIzaSy...
15
+
16
+ # Anthropic (required for Claude Agent SDK + Fast Brain)
17
+ ANTHROPIC_API_KEY=sk-ant-...
18
+
19
+ # Smithery (cloud-hosted MCP servers - YouTube, GitHub, etc.)
20
+ # Get your key at: https://smithery.ai
21
+ # SMITHERY_API_KEY=your-smithery-api-key
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Bridge LLM Module - Creates LLM instances for pipelined voice sessions
3
+ *
4
+ * In pipelined mode, we use a separate LLM (Gemini or GPT-4o) as the
5
+ * "conversation manager" that handles voice I/O and routes to Claude Code.
6
+ */
7
+ import * as google from '@livekit/agents-plugin-google';
8
+ import * as openai from '@livekit/agents-plugin-openai';
9
+ export interface BridgeLLMConfig {
10
+ provider: 'gemini-pro' | 'gemini-flash' | 'gpt-4o' | 'gpt-4o-mini';
11
+ model?: string;
12
+ }
13
+ /**
14
+ * Create Bridge LLM instance for pipelined voice sessions
15
+ *
16
+ * Options:
17
+ * - gemini-pro: Gemini 2.5 Pro (smart, good reasoning)
18
+ * - gemini-flash: Gemini 2.0 Flash (faster, cheaper)
19
+ * - gpt-4o: GPT-4o (alternative if OpenAI preferred)
20
+ * - gpt-4o-mini: GPT-4o Mini (faster, cheaper)
21
+ */
22
+ export declare function createBridgeLLM(config: BridgeLLMConfig): google.LLM | openai.LLM;
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Bridge LLM Module - Creates LLM instances for pipelined voice sessions
3
+ *
4
+ * In pipelined mode, we use a separate LLM (Gemini or GPT-4o) as the
5
+ * "conversation manager" that handles voice I/O and routes to Claude Code.
6
+ */
7
+ import * as google from '@livekit/agents-plugin-google';
8
+ import * as openai from '@livekit/agents-plugin-openai';
9
+ /**
10
+ * Create Bridge LLM instance for pipelined voice sessions
11
+ *
12
+ * Options:
13
+ * - gemini-pro: Gemini 2.5 Pro (smart, good reasoning)
14
+ * - gemini-flash: Gemini 2.0 Flash (faster, cheaper)
15
+ * - gpt-4o: GPT-4o (alternative if OpenAI preferred)
16
+ * - gpt-4o-mini: GPT-4o Mini (faster, cheaper)
17
+ */
18
+ export function createBridgeLLM(config) {
19
+ switch (config.provider) {
20
+ case 'gemini-pro':
21
+ return new google.LLM({
22
+ model: config.model || 'gemini-2.5-pro',
23
+ });
24
+ case 'gemini-flash':
25
+ return new google.LLM({
26
+ model: config.model || 'gemini-2.0-flash',
27
+ });
28
+ case 'gpt-4o':
29
+ return new openai.LLM({
30
+ model: config.model || 'gpt-4o',
31
+ });
32
+ case 'gpt-4o-mini':
33
+ return new openai.LLM({
34
+ model: config.model || 'gpt-4o-mini',
35
+ });
36
+ default:
37
+ throw new Error(`Unknown Bridge LLM provider: ${config.provider}`);
38
+ }
39
+ }
@@ -35,6 +35,12 @@ export declare class ClaudeHandler extends EventEmitter {
35
35
  private static readonly ALL_TOOLS;
36
36
  private static readonly PLAN_TOOLS;
37
37
  private static readonly EXECUTE_TOOLS;
38
+ private static readonly MCP_READ_ONLY_PATTERNS;
39
+ /**
40
+ * Check if an MCP tool is safe for read-only/plan mode
41
+ * Returns true if the tool only reads data (doesn't modify external resources)
42
+ */
43
+ private static isMcpToolReadOnly;
38
44
  private agentRole;
39
45
  constructor(options?: ClaudeHandlerOptions);
40
46
  /**
@@ -67,6 +67,30 @@ export class ClaudeHandler extends EventEmitter {
67
67
  ];
68
68
  // Execute mode tools - full access
69
69
  static EXECUTE_TOOLS = ClaudeHandler.ALL_TOOLS;
70
+ // MCP Read-Only patterns - tools that don't modify external resources
71
+ // These patterns match MCP tool names that are safe for read-only/plan mode
72
+ static MCP_READ_ONLY_PATTERNS = [
73
+ // GitHub - read operations only (search, list, get)
74
+ /^mcp__github__(search|list|get)_/,
75
+ // YouTube - all tools are typically read-only
76
+ /^mcp__youtube__/,
77
+ // LiveKit - read operations only (list, get)
78
+ /^mcp__livekit__(list|get)_/,
79
+ // LiveKit docs - all read-only
80
+ /^mcp__livekit-docs__/,
81
+ // Filesystem - read only
82
+ /^mcp__filesystem__read/,
83
+ // Generic patterns for common read operations across any MCP server
84
+ /^mcp__[^_]+__(get|list|search|read|fetch|query|describe|show|find)_/,
85
+ /^mcp__[^_]+__(get|list|search|read|fetch|query|describe|show|find)$/,
86
+ ];
87
+ /**
88
+ * Check if an MCP tool is safe for read-only/plan mode
89
+ * Returns true if the tool only reads data (doesn't modify external resources)
90
+ */
91
+ static isMcpToolReadOnly(toolName) {
92
+ return ClaudeHandler.MCP_READ_ONLY_PATTERNS.some(pattern => pattern.test(toolName));
93
+ }
70
94
  agentRole;
71
95
  constructor(options = {}) {
72
96
  super();
@@ -175,6 +199,24 @@ export class ClaudeHandler extends EventEmitter {
175
199
  const description = this.getToolDescription(toolName, toolInput);
176
200
  // Record start time for duration tracking
177
201
  this.toolStartTimes.set(id, Date.now());
202
+ // Block write MCP operations in plan/read-only mode
203
+ if (this.agentRole === 'plan' && toolName.startsWith('mcp__')) {
204
+ if (!ClaudeHandler.isMcpToolReadOnly(toolName)) {
205
+ console.log(`❌ Blocked write MCP tool in plan mode: ${toolName}`);
206
+ logToolCall({
207
+ timestamp: new Date().toISOString(),
208
+ toolName,
209
+ toolUseId: id,
210
+ input: toolInput,
211
+ status: 'blocked',
212
+ error: 'MCP write operation blocked in read-only mode',
213
+ });
214
+ return {
215
+ decision: 'block',
216
+ reason: 'Write operations are not allowed in read-only mode. Switch to edit mode to use this tool.'
217
+ };
218
+ }
219
+ }
178
220
  // Log tool start (background, non-blocking)
179
221
  logToolCall({
180
222
  timestamp: new Date().toISOString(),
@@ -248,7 +290,7 @@ export class ClaudeHandler extends EventEmitter {
248
290
  duration,
249
291
  });
250
292
  console.log(`✅ Completed: ${toolName} (${duration ? duration + 'ms' : 'unknown duration'})`);
251
- this.emit('tool_result', { name: toolName, output: toolOutput, duration });
293
+ this.emit('tool_result', { name: toolName, input: input?.tool_input || {}, output: toolOutput, duration });
252
294
  return {};
253
295
  }]
254
296
  }]
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Claude LLM Wrapper for LiveKit Agents
3
+ *
4
+ * Wraps the Claude Agent SDK (@anthropic-ai/claude-agent-sdk) to work
5
+ * with LiveKit's AgentSession as an LLM provider.
6
+ *
7
+ * Flow: User speaks → STT → ClaudeLLM (Agent SDK) → TTS → User hears
8
+ */
9
+ import { llm, type APIConnectOptions } from '@livekit/agents';
10
+ import { type McpServerConfig } from '@anthropic-ai/claude-agent-sdk';
11
+ import { EventEmitter } from 'events';
12
+ export interface ClaudeLLMOptions {
13
+ workingDirectory?: string;
14
+ permissionMode?: 'default' | 'acceptEdits' | 'bypassPermissions';
15
+ allowedTools?: string[];
16
+ eventEmitter?: EventEmitter;
17
+ resumeSessionId?: string;
18
+ continueSession?: boolean;
19
+ mcpServers?: Record<string, McpServerConfig>;
20
+ model?: string;
21
+ }
22
+ /**
23
+ * Claude LLM - Wraps Claude Agent SDK for LiveKit
24
+ * Research mode: reads anything, writes only to session workspace
25
+ */
26
+ export declare class ClaudeLLM extends llm.LLM {
27
+ #private;
28
+ constructor(opts?: ClaudeLLMOptions);
29
+ /**
30
+ * Respond to a pending permission request
31
+ * Call this after receiving 'permission_request' event
32
+ */
33
+ respondToPermission(allow: boolean, message?: string): void;
34
+ /**
35
+ * Check if there's a pending permission request
36
+ */
37
+ hasPendingPermission(): boolean;
38
+ /**
39
+ * Get pending permission details
40
+ */
41
+ getPendingPermission(): {
42
+ toolName: string;
43
+ input: any;
44
+ } | null;
45
+ /**
46
+ * Get all currently enabled MCP servers
47
+ */
48
+ getMcpServers(): Record<string, McpServerConfig>;
49
+ /**
50
+ * Get list of enabled MCP server keys
51
+ */
52
+ getEnabledMcpServerKeys(): string[];
53
+ /**
54
+ * Replace all MCP servers at once
55
+ */
56
+ setMcpServers(servers: Record<string, McpServerConfig>): void;
57
+ /**
58
+ * Enable a single MCP server
59
+ */
60
+ enableMcpServer(key: string, config: McpServerConfig): void;
61
+ /**
62
+ * Disable a single MCP server
63
+ */
64
+ disableMcpServer(key: string): void;
65
+ label(): string;
66
+ get model(): string;
67
+ get sessionId(): string | null;
68
+ /**
69
+ * Set session ID to resume a specific conversation
70
+ * Call this before sending the first message to resume from a previous session
71
+ */
72
+ setResumeSessionId(sessionId: string | null): void;
73
+ /**
74
+ * Reset state for mid-conversation session switch
75
+ * Clears pending permissions and resets conversation tracking
76
+ */
77
+ resetForSessionSwitch(): void;
78
+ /**
79
+ * Enable "continue" mode - resumes most recent session
80
+ */
81
+ setContinueSession(enabled: boolean): void;
82
+ /**
83
+ * Check if this instance is configured to resume a session
84
+ */
85
+ get isResumingSession(): boolean;
86
+ get events(): EventEmitter;
87
+ /**
88
+ * Capture a checkpoint UUID for potential file rewind
89
+ * Called internally when receiving user message UUIDs from the SDK
90
+ */
91
+ captureCheckpoint(checkpointId: string): void;
92
+ /**
93
+ * Get the most recent checkpoint UUID
94
+ * Use this to rewind all file changes back to the beginning
95
+ */
96
+ getLatestCheckpoint(): string | null;
97
+ /**
98
+ * Get the first checkpoint UUID (initial state)
99
+ * Rewinding to this restores all files to their original state
100
+ */
101
+ getFirstCheckpoint(): string | null;
102
+ /**
103
+ * Get all captured checkpoint UUIDs
104
+ * Ordered from oldest to newest
105
+ */
106
+ getCheckpoints(): string[];
107
+ /**
108
+ * Clear all captured checkpoints
109
+ * Call this when starting a new session
110
+ */
111
+ clearCheckpoints(): void;
112
+ /**
113
+ * Check if checkpoints are available
114
+ */
115
+ hasCheckpoints(): boolean;
116
+ chat({ chatCtx, toolCtx, connOptions, }: {
117
+ chatCtx: llm.ChatContext;
118
+ toolCtx?: llm.ToolContext;
119
+ connOptions?: APIConnectOptions;
120
+ parallelToolCalls?: boolean;
121
+ toolChoice?: llm.ToolChoice;
122
+ extraKwargs?: Record<string, unknown>;
123
+ }): llm.LLMStream;
124
+ }
125
+ /**
126
+ * Create a ClaudeLLM instance
127
+ */
128
+ export declare function createClaudeLLM(opts?: ClaudeLLMOptions): ClaudeLLM;