nitrostack 1.0.70 → 1.0.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/src/studio/app/api/chat/route.ts +33 -15
  3. package/src/studio/app/auth/callback/page.tsx +6 -6
  4. package/src/studio/app/chat/page.tsx +1124 -415
  5. package/src/studio/app/chat/page.tsx.backup +1046 -187
  6. package/src/studio/app/globals.css +361 -191
  7. package/src/studio/app/health/page.tsx +72 -76
  8. package/src/studio/app/layout.tsx +9 -11
  9. package/src/studio/app/logs/page.tsx +29 -30
  10. package/src/studio/app/page.tsx +134 -230
  11. package/src/studio/app/prompts/page.tsx +115 -97
  12. package/src/studio/app/resources/page.tsx +115 -124
  13. package/src/studio/app/settings/page.tsx +1080 -125
  14. package/src/studio/app/tools/page.tsx +343 -0
  15. package/src/studio/components/EnlargeModal.tsx +76 -65
  16. package/src/studio/components/LogMessage.tsx +5 -5
  17. package/src/studio/components/MarkdownRenderer.tsx +4 -4
  18. package/src/studio/components/Sidebar.tsx +150 -210
  19. package/src/studio/components/SplashScreen.tsx +109 -0
  20. package/src/studio/components/ToolCard.tsx +50 -41
  21. package/src/studio/components/VoiceOrbOverlay.tsx +469 -0
  22. package/src/studio/components/WidgetRenderer.tsx +8 -3
  23. package/src/studio/components/tools/ToolsCanvas.tsx +327 -0
  24. package/src/studio/lib/llm-service.ts +104 -1
  25. package/src/studio/lib/store.ts +36 -21
  26. package/src/studio/lib/types.ts +1 -1
  27. package/src/studio/package-lock.json +3303 -0
  28. package/src/studio/package.json +3 -1
  29. package/src/studio/public/NitroStudio Isotype Color.png +0 -0
  30. package/src/studio/tailwind.config.ts +63 -17
  31. package/templates/typescript-starter/package-lock.json +4112 -0
  32. package/templates/typescript-starter/package.json +2 -3
  33. package/templates/typescript-starter/src/modules/calculator/calculator.tools.ts +100 -5
  34. package/src/studio/app/auth/page.tsx +0 -560
  35. package/src/studio/app/ping/page.tsx +0 -209
@@ -5,23 +5,33 @@ import { useStudioStore } from '@/lib/store';
5
5
  import { api } from '@/lib/api';
6
6
  import { WidgetRenderer } from '@/components/WidgetRenderer';
7
7
  import { MarkdownRenderer } from '@/components/MarkdownRenderer';
8
+ import { VoiceOrbOverlay, MiniVoiceOrb } from '@/components/VoiceOrbOverlay';
8
9
  import type { ChatMessage, Tool, ToolCall, Prompt } from '@/lib/types';
9
10
  import {
10
- Bot,
11
- Settings,
12
- Trash2,
13
- Image as ImageIcon,
14
- Send,
15
- Wrench,
16
- Save,
17
- X,
18
- Sparkles,
19
- FileText,
20
- Play,
21
- ExternalLink,
22
- Info,
23
- MoreVertical
24
- } from 'lucide-react';
11
+ SparklesIcon,
12
+ Cog6ToothIcon,
13
+ TrashIcon,
14
+ PhotoIcon,
15
+ PaperAirplaneIcon,
16
+ WrenchScrewdriverIcon,
17
+ BookmarkIcon,
18
+ XMarkIcon,
19
+ DocumentTextIcon,
20
+ PlayIcon,
21
+ ArrowTopRightOnSquareIcon,
22
+ InformationCircleIcon,
23
+ EllipsisVerticalIcon,
24
+ MicrophoneIcon,
25
+ SpeakerWaveIcon,
26
+ StopIcon
27
+ } from '@heroicons/react/24/outline';
28
+
29
+ // Add type for webkitSpeechRecognition
30
+ declare global {
31
+ interface Window {
32
+ webkitSpeechRecognition: any;
33
+ }
34
+ }
25
35
 
26
36
  export default function ChatPage() {
27
37
  const {
@@ -30,16 +40,17 @@ export default function ChatPage() {
30
40
  clearChat,
31
41
  currentProvider,
32
42
  setCurrentProvider,
33
- currentImage,
34
- setCurrentImage,
43
+ currentFile,
44
+ setCurrentFile,
35
45
  tools,
36
46
  setTools,
47
+ elevenLabsApiKey,
48
+ setElevenLabsApiKey
37
49
  } = useStudioStore();
38
50
 
39
- // Get jwtToken and apiKey dynamically to ensure we always have the latest value
51
+ // ... (existing helper methods)
40
52
  const getAuthTokens = () => {
41
53
  const state = useStudioStore.getState();
42
- // Check both jwtToken and OAuth token (from OAuth tab)
43
54
  const jwtToken = state.jwtToken || state.oauthState?.currentToken;
44
55
  return {
45
56
  jwtToken,
@@ -54,165 +65,329 @@ export default function ChatPage() {
54
65
  const [selectedPrompt, setSelectedPrompt] = useState<Prompt | null>(null);
55
66
  const [promptArgs, setPromptArgs] = useState<Record<string, string>>({});
56
67
  const [fullscreenWidget, setFullscreenWidget] = useState<{ uri: string, data: any } | null>(null);
68
+
69
+ // Language presets for quick selection
70
+ const LANG_PRESETS: Record<string, { model: string; voice: string; input: string; name: string; greeting: string }> = {
71
+ 'en': { model: 'eleven_flash_v2_5', voice: '21m00Tcm4TlvDq8ikWAM', input: 'en-US', name: 'English', greeting: 'Hi! How can I help you today?' },
72
+ 'hi': { model: 'eleven_multilingual_v2', voice: 'C2S5J6WvmHnrQWjUu6Rg', input: 'hi-IN', name: 'Hindi', greeting: 'नमस्ते! मैं आज आपकी कैसे मदद कर सकता हूं?' },
73
+ 'es': { model: 'eleven_multilingual_v2', voice: 'ErXwobaYiN019PkySvjV', input: 'es-ES', name: 'Spanish', greeting: '¡Hola! ¿Cómo puedo ayudarte hoy?' },
74
+ 'fr': { model: 'eleven_multilingual_v2', voice: 'CwhRBWXzGAHq8TQ4Fs17', input: 'fr-FR', name: 'French', greeting: 'Bonjour! Comment puis-je vous aider aujourd\'hui?' },
75
+ 'de': { model: 'eleven_multilingual_v2', voice: 'EXAVITQu4vr4xnSDxMaL', input: 'de-DE', name: 'German', greeting: 'Hallo! Wie kann ich Ihnen heute helfen?' },
76
+ 'ja': { model: 'eleven_multilingual_v2', voice: 'MF3mGyEYCl7XYWbV9V6O', input: 'ja-JP', name: 'Japanese', greeting: 'こんにちは!今日はどのようにお手伝いできますか?' },
77
+ 'zh': { model: 'eleven_multilingual_v2', voice: 'TxGEqnHWrfWFTfGW9XjX', input: 'zh-CN', name: 'Chinese', greeting: '你好!我今天能帮你什么?' },
78
+ };
79
+
80
+ // Voice Mode State
81
+ type LLMState = 'idle' | 'listening' | 'thinking' | 'speaking';
82
+ const [llmState, setLlmState] = useState<LLMState>('idle');
83
+ const [voiceModeEnabled, setVoiceModeEnabled] = useState(false);
84
+ const [voiceOverlayOpen, setVoiceOverlayOpen] = useState(false);
85
+ const [spokenText, setSpokenText] = useState('');
86
+ const [voiceDisplayMode, setVoiceDisplayMode] = useState<'voice-only' | 'voice-chat'>('voice-only');
87
+ const [showVoiceSettings, setShowVoiceSettings] = useState(false);
88
+
89
+ // Voice Configuration - load from localStorage
90
+ const [voiceModel, setVoiceModel] = useState(() => {
91
+ if (typeof window !== 'undefined') {
92
+ return localStorage.getItem('voice_model') || 'eleven_multilingual_v2';
93
+ }
94
+ return 'eleven_multilingual_v2';
95
+ });
96
+ const [outputLanguage, setOutputLanguage] = useState(() => {
97
+ if (typeof window !== 'undefined') {
98
+ return localStorage.getItem('output_language') || 'en';
99
+ }
100
+ return 'en';
101
+ });
102
+ const [inputLanguage, setInputLanguage] = useState(() => {
103
+ if (typeof window !== 'undefined') {
104
+ return localStorage.getItem('input_language') || 'en-US';
105
+ }
106
+ return 'en-US';
107
+ });
108
+ const [voiceId, setVoiceId] = useState(() => {
109
+ if (typeof window !== 'undefined') {
110
+ return localStorage.getItem('voice_id') || '21m00Tcm4TlvDq8ikWAM';
111
+ }
112
+ return '21m00Tcm4TlvDq8ikWAM';
113
+ });
114
+
115
+ // Dynamic API data
116
+ interface ElevenLabsModel {
117
+ model_id: string;
118
+ name: string;
119
+ languages?: { language_id: string; name: string }[];
120
+ }
121
+ interface ElevenLabsVoice {
122
+ voice_id: string;
123
+ name: string;
124
+ labels?: { accent?: string; language?: string;[key: string]: string | undefined };
125
+ category?: string;
126
+ }
127
+ const [availableModels, setAvailableModels] = useState<ElevenLabsModel[]>([]);
128
+ const [availableVoices, setAvailableVoices] = useState<ElevenLabsVoice[]>([]);
129
+ const [loadingVoiceData, setLoadingVoiceData] = useState(false);
130
+
131
+ const audioRef = useRef<HTMLAudioElement | null>(null);
132
+ const hasSpokenGreeting = useRef(false); // Prevent double greeting
133
+
57
134
  const messagesEndRef = useRef<HTMLDivElement>(null);
58
135
  const fileInputRef = useRef<HTMLInputElement>(null);
59
136
  const textareaRef = useRef<HTMLTextAreaElement>(null);
60
137
  const initialToolExecuted = useRef(false);
61
138
 
139
+ // Fetch ElevenLabs models when settings opens
62
140
  useEffect(() => {
63
- loadTools();
64
- loadPrompts();
141
+ if ((!showVoiceSettings && !showSettings) || !elevenLabsApiKey) return;
65
142
 
66
- // Check if there's a suggested message from localStorage
67
- if (typeof window !== 'undefined') {
68
- const chatInput = window.localStorage.getItem('chatInput');
69
- if (chatInput) {
70
- setInputValue(chatInput);
71
- window.localStorage.removeItem('chatInput');
72
- // Focus after a short delay to ensure component is mounted
73
- setTimeout(() => textareaRef.current?.focus(), 100);
143
+ const fetchModels = async () => {
144
+ try {
145
+ const modelsRes = await fetch('https://api.elevenlabs.io/v1/models', {
146
+ headers: { 'xi-api-key': elevenLabsApiKey }
147
+ });
148
+ if (modelsRes.ok) {
149
+ const modelsData = await modelsRes.json();
150
+ setAvailableModels(modelsData);
151
+ }
152
+ } catch (err) {
153
+ console.error('Failed to fetch ElevenLabs models:', err);
74
154
  }
75
- }
76
- }, []);
155
+ };
77
156
 
78
- useEffect(() => {
79
- if (tools.length > 0 && !initialToolExecuted.current) {
80
- checkAndRunInitialTool();
81
- }
82
- }, [tools]);
157
+ fetchModels();
158
+ }, [showVoiceSettings, showSettings, elevenLabsApiKey]);
83
159
 
160
+ // Fetch voices when settings opens or output language changes
84
161
  useEffect(() => {
85
- messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
86
- }, [chatMessages]);
162
+ if ((!showVoiceSettings && !showSettings) || !elevenLabsApiKey) return;
87
163
 
88
- // Auto-focus textarea on mount and after sending
89
- useEffect(() => {
90
- textareaRef.current?.focus();
91
- }, [chatMessages, loading]);
164
+ const fetchVoices = async () => {
165
+ setLoadingVoiceData(true);
166
+ try {
167
+ // Map output language to ElevenLabs language code
168
+ const langMap: Record<string, string> = {
169
+ 'en': 'en', 'hi': 'hi', 'es': 'es', 'fr': 'fr', 'de': 'de',
170
+ 'ja': 'ja', 'ko': 'ko', 'zh': 'zh', 'pt': 'pt', 'it': 'it'
171
+ };
172
+ const langCode = langMap[outputLanguage] || 'en';
92
173
 
93
- // Auto-resize textarea based on content
94
- useEffect(() => {
95
- const textarea = textareaRef.current;
96
- if (textarea) {
97
- textarea.style.height = '44px'; // Reset to min height
98
- const scrollHeight = textarea.scrollHeight;
99
- textarea.style.height = Math.min(scrollHeight, 200) + 'px'; // Max 200px
100
- }
101
- }, [inputValue]);
174
+ // Fetch user's own voices
175
+ const userVoicesRes = await fetch('https://api.elevenlabs.io/v1/voices', {
176
+ headers: { 'xi-api-key': elevenLabsApiKey }
177
+ });
178
+ let userVoices: ElevenLabsVoice[] = [];
179
+ if (userVoicesRes.ok) {
180
+ const data = await userVoicesRes.json();
181
+ userVoices = data.voices || [];
182
+ }
102
183
 
103
- // Listen for widget fullscreen requests
104
- useEffect(() => {
105
- const handleFullscreenRequest = (event: CustomEvent) => {
106
- const { uri, data } = event.detail;
107
- setFullscreenWidget({ uri, data });
184
+ // Fetch shared voices filtered by language
185
+ const sharedVoicesRes = await fetch(
186
+ `https://api.elevenlabs.io/v1/shared-voices?language=${langCode}&page_size=50`,
187
+ { headers: { 'xi-api-key': elevenLabsApiKey } }
188
+ );
189
+ let sharedVoices: ElevenLabsVoice[] = [];
190
+ if (sharedVoicesRes.ok) {
191
+ const data = await sharedVoicesRes.json();
192
+ sharedVoices = (data.voices || []).map((v: any) => ({
193
+ voice_id: v.voice_id,
194
+ name: v.name,
195
+ labels: { accent: v.accent || v.language },
196
+ category: 'shared'
197
+ }));
198
+ }
199
+
200
+ // Combine: user voices first, then shared voices
201
+ setAvailableVoices([...userVoices, ...sharedVoices]);
202
+ } catch (err) {
203
+ console.error('Failed to fetch ElevenLabs voices:', err);
204
+ } finally {
205
+ setLoadingVoiceData(false);
206
+ }
108
207
  };
109
208
 
110
- window.addEventListener('widget-fullscreen-request', handleFullscreenRequest as EventListener);
111
- return () => window.removeEventListener('widget-fullscreen-request', handleFullscreenRequest as EventListener);
112
- }, []);
209
+ fetchVoices();
210
+ }, [showVoiceSettings, elevenLabsApiKey, outputLanguage]);
113
211
 
114
- // Listen for widget tool call requests
115
- useEffect(() => {
116
- let isProcessingToolCall = false;
212
+ // Note: Speech recognition is now handled by VoiceOrbOverlay component
117
213
 
118
- const handleToolCall = async (event: any) => {
119
- // Prevent multiple simultaneous calls
120
- if (isProcessingToolCall) {
121
- console.log('⏭️ Skipping duplicate tool call');
122
- return;
214
+ // Text-to-Speech logic for new messages (when in voice mode or overlay is open)
215
+ useEffect(() => {
216
+ // Only trigger TTS if voice mode is enabled OR overlay is open
217
+ if ((!voiceModeEnabled && !voiceOverlayOpen) || !elevenLabsApiKey || chatMessages.length === 0) return;
218
+
219
+ const lastMessage = chatMessages[chatMessages.length - 1];
220
+ if (lastMessage.role === 'assistant' && lastMessage.content) {
221
+ // Stop any current audio
222
+ if (audioRef.current) {
223
+ audioRef.current.pause();
224
+ audioRef.current = null;
123
225
  }
226
+ // Set the text being spoken for overlay display
227
+ const voiceText = convertToVoiceFriendlyText(lastMessage.content);
228
+ setSpokenText(voiceText);
229
+ playTextToSpeech(voiceText);
230
+ }
231
+ }, [chatMessages, voiceModeEnabled, voiceOverlayOpen, elevenLabsApiKey]);
124
232
 
125
- const { toolName, toolArgs } = event.detail;
126
- console.log('📞 Chat received tool call from widget:', toolName, toolArgs);
233
+ // Convert markdown content to voice-friendly, conversational text
234
+ // Optimized for minimal TTS token usage
235
+ const convertToVoiceFriendlyText = (text: string): string => {
236
+ if (!text) return '';
127
237
 
128
- isProcessingToolCall = true;
238
+ let result = text;
129
239
 
130
- try {
131
- // Get current state directly from store to avoid stale closure
132
- const currentMessages = useStudioStore.getState().chatMessages;
133
- const currentProv = useStudioStore.getState().currentProvider;
240
+ // Remove code blocks entirely (not suitable for voice)
241
+ result = result.replace(/```[\s\S]*?```/g, 'I\'ve included code in the chat.');
242
+ result = result.replace(/`[^`]+`/g, '');
134
243
 
135
- // Directly send the tool call message without showing in input
136
- const toolCallMessage = `Use the ${toolName} tool with these arguments: ${JSON.stringify(toolArgs)}`;
244
+ // Remove tables
245
+ result = result.replace(/\|[\s\S]*?\|/g, '');
246
+ if (text.includes('|')) {
247
+ result = result + ' Check the chat for table details.';
248
+ }
137
249
 
138
- // Add user message
139
- const userMessage: ChatMessage = {
140
- role: 'user',
141
- content: toolCallMessage,
142
- };
143
- addChatMessage(userMessage);
144
-
145
- // Call LLM
146
- setLoading(true);
147
- try {
148
- const { jwtToken, mcpApiKey } = getAuthTokens();
149
- const apiKey = localStorage.getItem(`${currentProv}_api_key`);
150
- const response = await api.chat({
151
- provider: currentProv,
152
- messages: [...currentMessages, userMessage],
153
- apiKey: apiKey || '',
154
- jwtToken: jwtToken || undefined,
155
- mcpApiKey: mcpApiKey || undefined,
156
- });
157
-
158
- // Handle tool calls (same as handleSend)
159
- if (response.toolCalls && response.toolResults) {
160
- // Attach results to tool calls for widget rendering
161
- const toolCallsWithResults = response.toolCalls.map((tc: any, i: any) => {
162
- const toolResult = response.toolResults[i];
163
- let parsedResult;
164
- if (toolResult.content) {
165
- try {
166
- parsedResult = JSON.parse(toolResult.content);
167
- } catch (e) {
168
- parsedResult = { raw: toolResult.content };
169
- }
170
- }
171
- return { ...tc, result: parsedResult };
172
- });
250
+ // Remove markdown bold/italic
251
+ result = result.replace(/\*\*([^*]+)\*\*/g, '$1');
252
+ result = result.replace(/\*([^*]+)\*/g, '$1');
253
+ result = result.replace(/__([^_]+)__/g, '$1');
254
+ result = result.replace(/_([^_]+)_/g, '$1');
173
255
 
174
- if (response.message) {
175
- response.message.toolCalls = toolCallsWithResults;
176
- addChatMessage(response.message);
177
- }
256
+ // Remove markdown headers
257
+ result = result.replace(/^#{1,6}\s+/gm, '');
178
258
 
179
- // Add tool results
180
- const toolResultMessages: ChatMessage[] = [];
181
- for (const result of response.toolResults) {
182
- addChatMessage(result);
183
- toolResultMessages.push(result);
184
- }
259
+ // Remove markdown links, keep text
260
+ result = result.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
185
261
 
186
- // Continue conversation
187
- const messagesForContinuation = [
188
- ...currentMessages,
189
- userMessage,
190
- response.message!,
191
- ...toolResultMessages,
192
- ];
193
-
194
- // Call continueChatWithToolResults
195
- await continueChatWithToolResults(apiKey || '', messagesForContinuation);
196
- } else if (response.message) {
197
- addChatMessage(response.message);
198
- }
262
+ // Handle bullet lists - summarize aggressively
263
+ const bulletMatches = result.match(/^[-*]\s+.+$/gm);
264
+ if (bulletMatches && bulletMatches.length > 3) {
265
+ // Get first 2 clean items
266
+ const first2 = bulletMatches.slice(0, 2).map(item =>
267
+ item.replace(/^[-*]\s+/, '').replace(/\*\*/g, '').replace(/\s*\([A-Z]{2,4}\)\s*/g, '').trim()
268
+ );
269
+ const count = bulletMatches.length;
270
+
271
+ // Replace entire list with summary
272
+ const listPattern = /((?:^[-*]\s+.+$\n?)+)/gm;
273
+ result = result.replace(listPattern, `I found ${count} items, including ${first2[0]} and ${first2[1]}. `);
274
+ } else if (bulletMatches) {
275
+ // For short lists, just mention count and first item
276
+ const first = bulletMatches[0].replace(/^[-*]\s+/, '').replace(/\*\*/g, '').trim();
277
+ result = result.replace(/((?:^[-*]\s+.+$\n?)+)/gm, `${bulletMatches.length} options: ${first} and others. `);
278
+ }
199
279
 
200
- setLoading(false);
201
- } catch (error) {
202
- console.error('Tool call failed:', error);
203
- setLoading(false);
204
- }
205
- } finally {
206
- // Reset flag after a short delay to allow next call
207
- setTimeout(() => {
208
- isProcessingToolCall = false;
209
- }, 1000);
280
+ // Remove numbered lists, summarize
281
+ const numberedMatches = result.match(/^\d+\.\s+.+$/gm);
282
+ if (numberedMatches && numberedMatches.length > 3) {
283
+ const first = numberedMatches[0].replace(/^\d+\.\s+/, '').trim();
284
+ result = result.replace(/((?:^\d+\.\s+.+$\n?)+)/gm, `${numberedMatches.length} steps, starting with: ${first}. `);
285
+ } else {
286
+ result = result.replace(/^\d+\.\s+/gm, '');
287
+ }
288
+
289
+ // Remove parenthetical codes like (LON), (STN) for voice
290
+ result = result.replace(/\s*\([A-Z]{2,4}\)\s*/g, ' ');
291
+
292
+ // Clean up multiple newlines and spaces
293
+ result = result.replace(/\n{2,}/g, '. ');
294
+ result = result.replace(/\n/g, ', ');
295
+ result = result.replace(/\s{2,}/g, ' ');
296
+
297
+ // Hard limit: 80 words max for voice response
298
+ const words = result.split(/\s+/).filter(w => w.length > 0);
299
+ if (words.length > 80) {
300
+ result = words.slice(0, 80).join(' ') + '. Would you like more details?';
301
+ }
302
+
303
+ // Clean up any remaining artifacts
304
+ result = result.replace(/,\s*,/g, ',');
305
+ result = result.replace(/\.\s*\./g, '.');
306
+ result = result.replace(/,\s*\./g, '.');
307
+ result = result.trim();
308
+
309
+ return result;
310
+ };
311
+
312
+ // Note: toggleRecording removed - VoiceOrbOverlay handles speech recognition
313
+
314
+ const playTextToSpeech = async (text: string) => {
315
+ console.log('🔊 playTextToSpeech called with:', text?.substring(0, 50));
316
+ console.log('🎤 Using voiceId:', voiceId);
317
+ console.log('🎤 Using voiceModel:', voiceModel);
318
+
319
+ if (!elevenLabsApiKey) {
320
+ console.error('❌ No ElevenLabs API key configured');
321
+ return;
322
+ }
323
+
324
+ try {
325
+ setLlmState('speaking');
326
+
327
+ const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`, {
328
+ method: 'POST',
329
+ headers: {
330
+ 'Content-Type': 'application/json',
331
+ 'xi-api-key': elevenLabsApiKey,
332
+ },
333
+ body: JSON.stringify({
334
+ text,
335
+ model_id: voiceModel,
336
+ voice_settings: {
337
+ stability: 0.5,
338
+ similarity_boost: 0.75,
339
+ },
340
+ }),
341
+ });
342
+
343
+ console.log('📡 ElevenLabs response status:', response.status);
344
+
345
+ if (!response.ok) {
346
+ const errorText = await response.text();
347
+ console.error('❌ ElevenLabs API error:', errorText);
348
+ throw new Error(`TTS failed: ${response.status} - ${errorText}`);
210
349
  }
211
- };
212
350
 
213
- window.addEventListener('widget-tool-call', handleToolCall);
214
- return () => window.removeEventListener('widget-tool-call', handleToolCall);
215
- }, []); // Empty dependency array - only register once
351
+ const blob = await response.blob();
352
+ console.log('🎵 Audio blob size:', blob.size, 'bytes');
353
+
354
+ const url = URL.createObjectURL(blob);
355
+ const audio = new Audio(url);
356
+
357
+ audio.onended = () => {
358
+ console.log('🔊 Audio playback ended');
359
+ setLlmState('listening'); // Resume listening after speaking
360
+ URL.revokeObjectURL(url);
361
+ };
362
+
363
+ audio.onerror = (e) => {
364
+ console.error('❌ Audio playback error:', e);
365
+ setLlmState('idle');
366
+ URL.revokeObjectURL(url);
367
+ };
368
+
369
+ audioRef.current = audio;
370
+
371
+ try {
372
+ await audio.play();
373
+ console.log('▶️ Audio playing');
374
+ } catch (playError) {
375
+ console.error('❌ Audio play failed (autoplay policy?):', playError);
376
+ setLlmState('idle');
377
+ }
378
+ } catch (error) {
379
+ console.error('❌ TTS Error:', error);
380
+ setLlmState('idle');
381
+ }
382
+ };
383
+
384
+ const stopSpeaking = () => {
385
+ if (audioRef.current) {
386
+ audioRef.current.pause();
387
+ audioRef.current = null;
388
+ }
389
+ setLlmState('idle');
390
+ };
216
391
 
217
392
  const loadTools = async () => {
218
393
  try {
@@ -362,7 +537,7 @@ export default function ChatPage() {
362
537
  }
363
538
  };
364
539
 
365
- const handleImageUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
540
+ const handleFileUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
366
541
  const file = e.target.files?.[0];
367
542
  if (!file) return;
368
543
 
@@ -373,7 +548,7 @@ export default function ChatPage() {
373
548
 
374
549
  const reader = new FileReader();
375
550
  reader.onload = (event) => {
376
- setCurrentImage({
551
+ setCurrentFile({
377
552
  data: event.target?.result as string,
378
553
  type: file.type,
379
554
  name: file.name,
@@ -382,8 +557,11 @@ export default function ChatPage() {
382
557
  reader.readAsDataURL(file);
383
558
  };
384
559
 
385
- const handleSend = async () => {
386
- if (!inputValue.trim() && !currentImage) return;
560
+ const handleSend = async (directMessage?: string) => {
561
+ // Use direct message if provided (from voice mode), otherwise use inputValue
562
+ const messageText = directMessage || inputValue;
563
+
564
+ if (!messageText.trim() && !currentFile) return;
387
565
 
388
566
  const apiKey = localStorage.getItem(`${currentProvider}_api_key`);
389
567
  if (!apiKey) {
@@ -394,16 +572,16 @@ export default function ChatPage() {
394
572
 
395
573
  const userMessage: ChatMessage = {
396
574
  role: 'user',
397
- content: inputValue,
575
+ content: messageText,
398
576
  };
399
577
 
400
- if (currentImage) {
401
- userMessage.image = currentImage;
578
+ if (currentFile) {
579
+ userMessage.file = currentFile;
402
580
  }
403
581
 
404
582
  addChatMessage(userMessage);
405
583
  setInputValue('');
406
- setCurrentImage(null);
584
+ setCurrentFile(null);
407
585
  setLoading(true);
408
586
 
409
587
  try {
@@ -425,9 +603,9 @@ export default function ChatPage() {
425
603
  }
426
604
 
427
605
  // Skip image property for now (not supported by OpenAI chat completions)
428
- // if (msg.image) {
429
- // cleaned.image = msg.image;
430
- // }
606
+ if (msg.file) {
607
+ cleaned.file = msg.file;
608
+ }
431
609
 
432
610
  return cleaned;
433
611
  });
@@ -435,14 +613,29 @@ export default function ChatPage() {
435
613
  // Get fresh auth tokens from store
436
614
  const { jwtToken, mcpApiKey } = getAuthTokens();
437
615
 
438
- console.log('Sending messages to API:', cleanedMessages);
616
+ // Add language instruction for voice mode (if non-English)
617
+ let messagesForApi = cleanedMessages;
618
+ if (voiceModeEnabled && outputLanguage !== 'en') {
619
+ const langNames: Record<string, string> = {
620
+ 'hi': 'Hindi', 'es': 'Spanish', 'fr': 'French', 'de': 'German',
621
+ 'ja': 'Japanese', 'ko': 'Korean', 'zh': 'Chinese', 'pt': 'Portuguese', 'it': 'Italian'
622
+ };
623
+ const langName = langNames[outputLanguage] || outputLanguage;
624
+ const langInstruction = {
625
+ role: 'system',
626
+ content: `IMPORTANT: The user is using voice mode with ${langName} language. You MUST respond in ${langName}. Keep responses concise for voice output.`
627
+ };
628
+ messagesForApi = [langInstruction, ...cleanedMessages];
629
+ }
630
+
631
+ console.log('Sending messages to API:', messagesForApi);
439
632
  console.log('Auth tokens:', { hasJwtToken: !!jwtToken, hasMcpApiKey: !!mcpApiKey });
440
633
  console.log('Original messages:', messagesToSend);
441
- console.log('Cleaned messages JSON:', JSON.stringify(cleanedMessages));
634
+ console.log('Voice mode:', voiceModeEnabled, 'Output language:', outputLanguage);
442
635
 
443
636
  const response = await api.chat({
444
637
  provider: currentProvider,
445
- messages: cleanedMessages,
638
+ messages: messagesForApi,
446
639
  apiKey, // LLM API key (OpenAI/Gemini)
447
640
  jwtToken: jwtToken || undefined,
448
641
  mcpApiKey: mcpApiKey || undefined, // MCP server API key
@@ -451,7 +644,7 @@ export default function ChatPage() {
451
644
  // Handle tool calls FIRST (before adding the message)
452
645
  if (response.toolCalls && response.toolResults) {
453
646
  // Attach results to tool calls for widget rendering
454
- const toolCallsWithResults = response.toolCalls.map((tc, i) => {
647
+ const toolCallsWithResults = response.toolCalls.map((tc: ToolCall, i: number) => {
455
648
  const toolResult = response.toolResults[i];
456
649
 
457
650
  // Parse the result content
@@ -536,7 +729,7 @@ export default function ChatPage() {
536
729
  }
537
730
  };
538
731
 
539
- const continueChatWithToolResults = async (apiKey: string, messages?: Message[]) => {
732
+ const continueChatWithToolResults = async (apiKey: string, messages?: ChatMessage[]) => {
540
733
  try {
541
734
  // Use provided messages or fall back to store (for recursive calls)
542
735
  const messagesToUse = messages || chatMessages;
@@ -579,7 +772,7 @@ export default function ChatPage() {
579
772
 
580
773
  // Recursive tool calls
581
774
  if (response.toolCalls && response.toolResults) {
582
- const newToolResults: Message[] = [];
775
+ const newToolResults: ChatMessage[] = [];
583
776
  for (const result of response.toolResults) {
584
777
  addChatMessage(result);
585
778
  newToolResults.push(result);
@@ -614,174 +807,369 @@ export default function ChatPage() {
614
807
 
615
808
  return (
616
809
  <div className="fixed inset-0 flex flex-col" style={{ left: 'var(--sidebar-width, 15rem)', backgroundColor: '#0a0a0a' }}>
617
- {/* Sticky Header */}
618
- <div className="sticky top-0 z-10 border-b border-border/50 px-3 sm:px-6 py-3 flex flex-col sm:flex-row items-start sm:items-center justify-between bg-card/80 backdrop-blur-md shadow-sm gap-3 sm:gap-0">
810
+ {/* Minimal Professional Header */}
811
+ <div className="sticky top-0 z-10 border-b border-border/50 px-3 sm:px-6 py-4 flex flex-col sm:flex-row items-start sm:items-center justify-between bg-card/50 backdrop-blur-sm gap-3 sm:gap-0">
619
812
  <div className="flex items-center gap-3">
620
- <div className="w-8 h-8 rounded-lg bg-gradient-to-br from-primary to-amber-500 flex items-center justify-center shadow-md">
621
- <Bot className="w-5 h-5 text-white" strokeWidth={2.5} />
622
- </div>
623
- <div>
624
- <h1 className="text-lg font-bold text-foreground">AI Chat</h1>
625
- </div>
813
+ <h1 className="text-lg font-semibold text-foreground">AI Chat</h1>
814
+
815
+ {/* Professional Voice Banner - shows when voice mode active */}
816
+ {voiceModeEnabled && (
817
+ <button
818
+ onClick={() => setVoiceOverlayOpen(true)}
819
+ className="flex items-center gap-3 bg-zinc-800/90 rounded-full px-4 py-2 hover:bg-zinc-700/90 transition-colors"
820
+ >
821
+ {/* Metallic Orb */}
822
+ <div
823
+ className="w-7 h-7 rounded-full flex-shrink-0"
824
+ style={{
825
+ background: 'conic-gradient(from 0deg, #9ca3af, #374151, #9ca3af, #374151, #9ca3af)',
826
+ boxShadow: 'inset 0 2px 4px rgba(255,255,255,0.1), 0 2px 8px rgba(0,0,0,0.3)'
827
+ }}
828
+ />
829
+ {/* State Text */}
830
+ <span className="text-sm text-zinc-300">
831
+ {llmState === 'listening' && 'Listening'}
832
+ {llmState === 'thinking' && 'Processing'}
833
+ {llmState === 'speaking' && 'Speaking'}
834
+ {llmState === 'idle' && 'Ready'}
835
+ </span>
836
+ </button>
837
+ )}
626
838
  </div>
627
839
 
628
840
  <div className="flex items-center gap-2 w-full sm:w-auto">
629
- <select
630
- value={currentProvider}
631
- onChange={(e) => setCurrentProvider(e.target.value as 'openai' | 'gemini')}
632
- className="input text-sm px-3 py-1.5 w-full sm:w-28 flex-1 sm:flex-none"
633
- >
634
- <option value="gemini">Gemini</option>
635
- <option value="openai">OpenAI</option>
636
- </select>
841
+ {/* Voice Output Toggle */}
842
+ {elevenLabsApiKey && (
843
+ <button
844
+ onClick={() => {
845
+ if (llmState === 'speaking') stopSpeaking();
846
+ setVoiceModeEnabled(!voiceModeEnabled);
847
+ }}
848
+ className={`h-8 w-8 rounded-lg flex items-center justify-center transition-all flex-shrink-0 ${voiceModeEnabled
849
+ ? 'bg-primary/20 text-primary ring-1 ring-primary/50'
850
+ : 'bg-muted/50 text-muted-foreground hover:text-foreground'
851
+ }`}
852
+ title={voiceModeEnabled ? "Disable Voice Output" : "Enable Voice Output"}
853
+ >
854
+ {llmState === 'speaking' ? <SpeakerWaveIcon className="h-4 w-4 animate-pulse" /> : <MicrophoneIcon className="h-4 w-4" />}
855
+ </button>
856
+ )}
857
+
858
+
637
859
  <button
638
860
  onClick={() => setShowSettings(!showSettings)}
639
- className={`w-8 h-8 rounded-lg flex items-center justify-center transition-all flex-shrink-0 ${showSettings
861
+ className={`h-8 w-8 rounded-lg flex items-center justify-center transition-all flex-shrink-0 ${showSettings
640
862
  ? 'bg-primary/10 text-primary ring-1 ring-primary/30'
641
863
  : 'bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground'
642
864
  }`}
643
865
  title="Settings"
644
866
  >
645
- <Settings className="w-4 h-4" />
867
+ <Cog6ToothIcon className="h-4 w-4" />
646
868
  </button>
647
869
  <button
648
870
  onClick={clearChat}
649
- className="w-8 h-8 rounded-lg flex items-center justify-center bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground transition-all flex-shrink-0"
871
+ className="h-8 w-8 rounded-lg flex items-center justify-center bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground transition-all flex-shrink-0"
650
872
  title="Clear chat"
651
873
  >
652
- <Trash2 className="w-4 h-4" />
874
+ <TrashIcon className="h-4 w-4" />
653
875
  </button>
654
876
  </div>
655
877
  </div>
656
878
 
657
- {/* Enhanced Settings Panel */}
879
+ {/* Enhanced Settings Side Drawer - Animated from Left */}
658
880
  {showSettings && (
659
- <div className="border-b border-border/50 px-3 sm:px-6 py-4 sm:py-5 bg-muted/20 backdrop-blur-md shadow-sm">
660
- <div className="max-w-4xl mx-auto">
661
- <div className="flex items-start justify-between mb-4">
662
- <div>
663
- <h3 className="text-sm font-semibold text-foreground flex items-center gap-2">
664
- <Settings className="w-4 h-4" />
665
- API Configuration
666
- </h3>
667
- <p className="text-xs text-muted-foreground mt-1">Configure your AI provider API keys to enable chat functionality</p>
881
+ <div
882
+ className="fixed inset-0 z-50 bg-black/50 backdrop-blur-sm transition-opacity"
883
+ onClick={() => setShowSettings(false)}
884
+ >
885
+ <div
886
+ className="absolute right-0 top-0 h-full w-[400px] bg-card/95 backdrop-blur-xl border-l border-border shadow-2xl animate-slide-in-right overflow-y-auto"
887
+ onClick={(e) => e.stopPropagation()}
888
+ >
889
+ <div className="p-6">
890
+ <div className="flex items-center justify-between mb-8">
891
+ <div>
892
+ <h2 className="text-xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">Settings</h2>
893
+ <p className="text-sm text-muted-foreground mt-1">Configure your workspace</p>
894
+ </div>
895
+ <button
896
+ onClick={() => setShowSettings(false)}
897
+ className="p-2 rounded-full hover:bg-muted/50 transition-colors"
898
+ >
899
+ <XMarkIcon className="w-5 h-5 text-muted-foreground" />
900
+ </button>
668
901
  </div>
669
- </div>
670
902
 
671
- <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
672
- {/* OpenAI Section */}
673
- <div className="card p-4">
674
- <div className="flex items-center justify-between mb-3">
675
- <label className="text-xs font-semibold text-foreground flex items-center gap-2">
676
- <div className="w-6 h-6 rounded bg-green-500/10 flex items-center justify-center">
677
- <span className="text-xs font-bold text-green-600">AI</span>
903
+ <div className="space-y-8">
904
+ {/* AI Provider Selection */}
905
+ <section>
906
+ <h3 className="text-sm font-semibold text-foreground uppercase tracking-wider mb-4 flex items-center gap-2">
907
+ <SparklesIcon className="w-4 h-4 text-primary" />
908
+ AI Model
909
+ </h3>
910
+ <div className="card p-1">
911
+ <div className="grid grid-cols-2 p-1 gap-1 bg-muted/30 rounded-lg">
912
+ <button
913
+ onClick={() => setCurrentProvider('gemini')}
914
+ className={`flex items-center justify-center gap-2 py-2.5 rounded-md text-sm font-medium transition-all ${currentProvider === 'gemini'
915
+ ? 'bg-background shadow-sm text-foreground ring-1 ring-border'
916
+ : 'text-muted-foreground hover:text-foreground'
917
+ }`}
918
+ >
919
+ <div className="w-4 h-4 rounded-sm bg-blue-500/20 flex items-center justify-center">
920
+ <span className="text-[10px] font-bold text-blue-600">G</span>
921
+ </div>
922
+ Gemini
923
+ </button>
924
+ <button
925
+ onClick={() => setCurrentProvider('openai')}
926
+ className={`flex items-center justify-center gap-2 py-2.5 rounded-md text-sm font-medium transition-all ${currentProvider === 'openai'
927
+ ? 'bg-background shadow-sm text-foreground ring-1 ring-border'
928
+ : 'text-muted-foreground hover:text-foreground'
929
+ }`}
930
+ >
931
+ <div className="w-4 h-4 rounded-sm bg-green-500/20 flex items-center justify-center">
932
+ <span className="text-[10px] font-bold text-green-600">AI</span>
933
+ </div>
934
+ OpenAI
935
+ </button>
678
936
  </div>
679
- OpenAI API Key
680
- </label>
681
- <a
682
- href="https://platform.openai.com/api-keys"
683
- target="_blank"
684
- rel="noopener noreferrer"
685
- className="text-xs text-primary hover:text-primary/80 flex items-center gap-1 transition-colors"
686
- >
687
- Get Key <ExternalLink className="w-3 h-3" />
688
- </a>
689
- </div>
690
- <div className="flex gap-2 mb-3">
691
- <input
692
- id="openai-api-key"
693
- type="password"
694
- className="input flex-1 text-sm py-2"
695
- placeholder="sk-proj-..."
696
- />
697
- <button onClick={() => saveApiKey('openai')} className="btn btn-primary text-xs px-4 py-2">
698
- <Save className="w-3 h-3 mr-1" />
699
- Save
700
- </button>
701
- </div>
702
- <div className="flex items-start gap-2 p-2 bg-blue-500/5 rounded-lg border border-blue-500/10">
703
- <Info className="w-3 h-3 text-blue-500 mt-0.5 flex-shrink-0" />
704
- <div className="text-xs text-muted-foreground">
705
- <p className="mb-1">
706
- <strong>How to get:</strong> Sign up at{' '}
707
- <a href="https://platform.openai.com/signup" target="_blank" rel="noopener noreferrer" className="text-primary hover:underline">
708
- OpenAI Platform
709
- </a>
710
- , navigate to API Keys, and create a new secret key.
937
+ <p className="p-3 text-xs text-muted-foreground border-t border-border/50 mt-1">
938
+ {currentProvider === 'gemini' ? 'Google Gemini Pro 1.5 - Great for general reasoning and large context.' : 'GPT-4o - Best in class reasoning and code generation.'}
711
939
  </p>
712
- <a
713
- href="https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key"
714
- target="_blank"
715
- rel="noopener noreferrer"
716
- className="text-primary hover:underline inline-flex items-center gap-1"
717
- >
718
- View Guide <ExternalLink className="w-2.5 h-2.5" />
719
- </a>
720
940
  </div>
721
- </div>
722
- </div>
941
+ </section>
942
+
943
+ <hr className="border-border/50" />
944
+
945
+ {/* API Keys Configuration */}
946
+ <section>
947
+ <h3 className="text-sm font-semibold text-foreground uppercase tracking-wider mb-4 flex items-center gap-2">
948
+ <Cog6ToothIcon className="w-4 h-4 text-primary" />
949
+ API Credentials
950
+ </h3>
951
+
952
+ <div className="space-y-4">
953
+ {/* OpenAI Section */}
954
+ <div className="card p-4 card-hover">
955
+ <div className="flex items-center justify-between mb-3">
956
+ <label className="text-sm font-medium text-foreground flex items-center gap-2">
957
+ <div className="w-6 h-6 rounded bg-green-500/10 flex items-center justify-center">
958
+ <span className="text-xs font-bold text-green-600">AI</span>
959
+ </div>
960
+ OpenAI
961
+ </label>
962
+ <a
963
+ href="https://platform.openai.com/api-keys"
964
+ target="_blank"
965
+ rel="noopener noreferrer"
966
+ className="text-xs text-primary hover:text-primary/80 flex items-center gap-1"
967
+ >
968
+ Get Key <ArrowTopRightOnSquareIcon className="w-3 h-3" />
969
+ </a>
970
+ </div>
971
+ <div className="flex gap-2">
972
+ <input
973
+ id="openai-api-key"
974
+ type="password"
975
+ className="input flex-1 text-sm bg-background/50"
976
+ placeholder="sk-proj-..."
977
+ defaultValue={localStorage.getItem('openai_api_key') || ''}
978
+ />
979
+ <button onClick={() => saveApiKey('openai')} className="btn btn-primary btn-sm px-4">
980
+ Save
981
+ </button>
982
+ </div>
983
+ </div>
723
984
 
724
- {/* Gemini Section */}
725
- <div className="card p-4">
726
- <div className="flex items-center justify-between mb-3">
727
- <label className="text-xs font-semibold text-foreground flex items-center gap-2">
728
- <div className="w-6 h-6 rounded bg-blue-500/10 flex items-center justify-center">
729
- <span className="text-xs font-bold text-blue-600">G</span>
985
+ {/* Gemini Section */}
986
+ <div className="card p-4 card-hover">
987
+ <div className="flex items-center justify-between mb-3">
988
+ <label className="text-sm font-medium text-foreground flex items-center gap-2">
989
+ <div className="w-6 h-6 rounded bg-blue-500/10 flex items-center justify-center">
990
+ <span className="text-xs font-bold text-blue-600">G</span>
991
+ </div>
992
+ Gemini
993
+ </label>
994
+ <a
995
+ href="https://aistudio.google.com/app/apikey"
996
+ target="_blank"
997
+ rel="noopener noreferrer"
998
+ className="text-xs text-primary hover:text-primary/80 flex items-center gap-1"
999
+ >
1000
+ Get Key <ArrowTopRightOnSquareIcon className="w-3 h-3" />
1001
+ </a>
1002
+ </div>
1003
+ <div className="flex gap-2">
1004
+ <input
1005
+ id="gemini-api-key"
1006
+ type="password"
1007
+ className="input flex-1 text-sm bg-background/50"
1008
+ placeholder="AIza..."
1009
+ defaultValue={localStorage.getItem('gemini_api_key') || ''}
1010
+ />
1011
+ <button onClick={() => saveApiKey('gemini')} className="btn btn-primary btn-sm px-4">
1012
+ Save
1013
+ </button>
1014
+ </div>
730
1015
  </div>
731
- Gemini API Key
732
- </label>
733
- <a
734
- href="https://aistudio.google.com/app/apikey"
735
- target="_blank"
736
- rel="noopener noreferrer"
737
- className="text-xs text-primary hover:text-primary/80 flex items-center gap-1 transition-colors"
738
- >
739
- Get Key <ExternalLink className="w-3 h-3" />
740
- </a>
741
- </div>
742
- <div className="flex gap-2 mb-3">
743
- <input
744
- id="gemini-api-key"
745
- type="password"
746
- className="input flex-1 text-sm py-2"
747
- placeholder="AIza..."
748
- />
749
- <button onClick={() => saveApiKey('gemini')} className="btn btn-primary text-xs px-4 py-2">
750
- <Save className="w-3 h-3 mr-1" />
751
- Save
752
- </button>
753
- </div>
754
- <div className="flex items-start gap-2 p-2 bg-blue-500/5 rounded-lg border border-blue-500/10">
755
- <Info className="w-3 h-3 text-blue-500 mt-0.5 flex-shrink-0" />
756
- <div className="text-xs text-muted-foreground">
757
- <p className="mb-1">
758
- <strong>How to get:</strong> Visit{' '}
759
- <a href="https://aistudio.google.com" target="_blank" rel="noopener noreferrer" className="text-primary hover:underline">
760
- Google AI Studio
761
- </a>
762
- , sign in with your Google account, and click "Get API key".
763
- </p>
764
- <a
765
- href="https://ai.google.dev/gemini-api/docs/api-key"
766
- target="_blank"
767
- rel="noopener noreferrer"
768
- className="text-primary hover:underline inline-flex items-center gap-1"
769
- >
770
- View Guide <ExternalLink className="w-2.5 h-2.5" />
771
- </a>
772
1016
  </div>
773
- </div>
774
- </div>
775
- </div>
1017
+ </section>
776
1018
 
777
- {/* Security Notice */}
778
- <div className="mt-4 p-3 bg-amber-500/5 rounded-lg border border-amber-500/10">
779
- <div className="flex items-start gap-2">
780
- <Info className="w-4 h-4 text-amber-500 mt-0.5 flex-shrink-0" />
781
- <div className="text-xs text-muted-foreground">
782
- <strong className="text-foreground">Security Note:</strong> Your API keys are stored locally in your browser and never sent to our servers.
783
- Keep them confidential and avoid sharing them publicly.
1019
+ <hr className="border-border/50" />
1020
+
1021
+ {/* Voice Configuration - Inline (Matches Global Settings) */}
1022
+ <section className="space-y-4 pt-4 border-t border-border">
1023
+ <div className="flex items-center justify-between">
1024
+ <label className="text-xs font-bold text-muted-foreground uppercase tracking-wider flex items-center gap-2">
1025
+ <MicrophoneIcon className="w-3 h-3" /> Voice Integration
1026
+ </label>
1027
+ {elevenLabsApiKey && <span className="text-[10px] bg-purple-500/10 text-purple-600 px-2 py-0.5 rounded-full font-medium border border-purple-500/20">Enabled</span>}
1028
+ </div>
1029
+
1030
+ <div className="bg-muted/10 rounded-xl border border-border p-4 space-y-4">
1031
+ {/* API Key Input */}
1032
+ <div>
1033
+ <label className="block text-xs font-medium text-foreground mb-1.5 flex items-center justify-between">
1034
+ <span>ElevenLabs API Key</span>
1035
+ <a
1036
+ href="https://elevenlabs.io/api"
1037
+ target="_blank"
1038
+ rel="noopener noreferrer"
1039
+ className="text-[10px] text-primary hover:underline flex items-center gap-1"
1040
+ >
1041
+ Get Key <ArrowTopRightOnSquareIcon className="w-2.5 h-2.5" />
1042
+ </a>
1043
+ </label>
1044
+ <div className="relative">
1045
+ <input
1046
+ type="password"
1047
+ value={elevenLabsApiKey || ''}
1048
+ onChange={(e) => setElevenLabsApiKey(e.target.value || null)}
1049
+ className="input w-full font-mono text-xs bg-background/50"
1050
+ placeholder={elevenLabsApiKey ? "••••••••••••••••" : "Paste your xi-api-key here"}
1051
+ />
1052
+ {elevenLabsApiKey && (
1053
+ <button
1054
+ onClick={() => setElevenLabsApiKey(null)}
1055
+ className="absolute right-2 top-1.5 text-[10px] text-destructive hover:underline"
1056
+ >
1057
+ Clear
1058
+ </button>
1059
+ )}
1060
+ </div>
1061
+ </div>
1062
+
1063
+ {/* Inline Configuration (Only if Key is set) */}
1064
+ {elevenLabsApiKey ? (
1065
+ <div className="space-y-3 animate-fade-in pt-2 border-t border-border/50">
1066
+ {/* TTS Model */}
1067
+ <div>
1068
+ <label className="block text-xs font-medium text-foreground mb-1.5">Voice Model</label>
1069
+ <select
1070
+ value={voiceModel}
1071
+ onChange={(e) => {
1072
+ setVoiceModel(e.target.value);
1073
+ localStorage.setItem('voice_model', e.target.value);
1074
+ }}
1075
+ className="input w-full text-xs bg-background/50"
1076
+ >
1077
+ {availableModels.length > 0 ? (
1078
+ availableModels.filter(m => m.model_id.includes('eleven')).map(model => (
1079
+ <option key={model.model_id} value={model.model_id}>
1080
+ {model.name}
1081
+ </option>
1082
+ ))
1083
+ ) : (
1084
+ <>
1085
+ <option value="eleven_multilingual_v2">Multilingual v2</option>
1086
+ <option value="eleven_flash_v2_5">Flash v2.5</option>
1087
+ <option value="eleven_turbo_v2_5">Turbo v2.5</option>
1088
+ </>
1089
+ )}
1090
+ </select>
1091
+ </div>
1092
+
1093
+ {/* Voice Selection */}
1094
+ <div>
1095
+ <label className="block text-xs font-medium text-foreground mb-1.5">Voice Character</label>
1096
+ <select
1097
+ value={voiceId}
1098
+ onChange={(e) => {
1099
+ setVoiceId(e.target.value);
1100
+ localStorage.setItem('voice_id', e.target.value);
1101
+ }}
1102
+ className="input w-full text-xs bg-background/50"
1103
+ >
1104
+ {availableVoices.length > 0 ? (
1105
+ availableVoices.map(voice => (
1106
+ <option key={voice.voice_id} value={voice.voice_id}>
1107
+ {voice.name} {voice.labels?.accent ? `(${voice.labels.accent})` : ''}
1108
+ </option>
1109
+ ))
1110
+ ) : (
1111
+ <>
1112
+ <option value="21m00Tcm4TlvDq8ikWAM">Rachel (English)</option>
1113
+ <option value="EXAVITQu4vr4xnSDxMaL">Bella (English)</option>
1114
+ </>
1115
+ )}
1116
+ </select>
1117
+ </div>
1118
+
1119
+ <div className="grid grid-cols-2 gap-2">
1120
+ {/* Output Language */}
1121
+ <div>
1122
+ <label className="block text-xs font-medium text-foreground mb-1.5">Output Lang</label>
1123
+ <select
1124
+ value={outputLanguage}
1125
+ onChange={(e) => {
1126
+ setOutputLanguage(e.target.value);
1127
+ localStorage.setItem('output_language', e.target.value);
1128
+ }}
1129
+ className="input w-full text-xs bg-background/50"
1130
+ >
1131
+ {Object.entries(LANG_PRESETS).map(([code, preset]) => (
1132
+ <option key={code} value={code}>{preset.name}</option>
1133
+ ))}
1134
+ </select>
1135
+ </div>
1136
+
1137
+ {/* Input Language */}
1138
+ <div>
1139
+ <label className="block text-xs font-medium text-foreground mb-1.5">Input Lang</label>
1140
+ <select
1141
+ value={inputLanguage}
1142
+ onChange={(e) => {
1143
+ setInputLanguage(e.target.value);
1144
+ localStorage.setItem('input_language', e.target.value);
1145
+ }}
1146
+ className="input w-full text-xs bg-background/50"
1147
+ >
1148
+ <option value="en-US">English (US)</option>
1149
+ <option value="en-GB">English (UK)</option>
1150
+ <option value="hi-IN">Hindi</option>
1151
+ <option value="es-ES">Spanish</option>
1152
+ <option value="fr-FR">French</option>
1153
+ <option value="de-DE">German</option>
1154
+ <option value="ja-JP">Japanese</option>
1155
+ </select>
1156
+ </div>
1157
+ </div>
1158
+ </div>
1159
+ ) : (
1160
+ <div className="p-3 bg-muted/30 rounded-lg border border-dashed border-border text-center">
1161
+ <p className="text-xs text-muted-foreground">Add API key to unlock premium voice capabilities.</p>
1162
+ </div>
1163
+ )}
1164
+ </div>
1165
+ </section>
1166
+
1167
+ <div className="pt-4">
1168
+ <p className="text-[10px] text-muted-foreground/50 text-center">
1169
+ NitroStudio v1.0.0 • Local Environment
1170
+ </p>
784
1171
  </div>
1172
+
785
1173
  </div>
786
1174
  </div>
787
1175
  </div>
@@ -794,21 +1182,104 @@ export default function ChatPage() {
794
1182
  {chatMessages.length === 0 && !loading ? (
795
1183
  /* Welcome Screen */
796
1184
  <div className="flex flex-col items-center justify-center min-h-[calc(100vh-300px)] animate-fade-in">
797
- <div className="w-16 h-16 rounded-2xl bg-gradient-to-br from-primary to-amber-500 flex items-center justify-center shadow-xl mb-6">
798
- <Bot className="w-10 h-10 text-white" strokeWidth={2.5} />
799
- </div>
800
1185
 
801
- <h2 className="text-3xl font-bold text-foreground mb-3">Welcome to NitroStudio</h2>
802
- <p className="text-muted-foreground text-center max-w-md mb-8">
803
- Your AI-powered development environment for Model Context Protocol (MCP) servers.
804
- Start a conversation or try a prompt below.
805
- </p>
1186
+ {/* Voice Mode UI - Only when ElevenLabs key is set */}
1187
+ {elevenLabsApiKey ? (
1188
+ <div className="flex flex-col items-center">
1189
+ {/* Custom Voice Orb - Same as VoiceOrbOverlay */}
1190
+ <button
1191
+ onClick={() => {
1192
+ // Apply language preset
1193
+ const preset = LANG_PRESETS[outputLanguage] || LANG_PRESETS['en'];
1194
+ setVoiceModel(preset.model);
1195
+ setVoiceId(preset.voice);
1196
+ setInputLanguage(preset.input);
1197
+ // Start voice mode
1198
+ setVoiceOverlayOpen(true);
1199
+ setVoiceModeEnabled(true);
1200
+ }}
1201
+ className="group relative w-44 h-44 rounded-full mb-6 cursor-pointer transition-transform duration-500 hover:scale-105"
1202
+ >
1203
+ {/* Rotating gradient ring - EXACT from VoiceOrbOverlay idle state */}
1204
+ <div
1205
+ className="absolute inset-0 rounded-full"
1206
+ style={{
1207
+ background: 'conic-gradient(from 0deg, #475569, #64748b, #475569)',
1208
+ padding: '3px',
1209
+ borderRadius: '50%'
1210
+ }}
1211
+ >
1212
+ {/* Inner orb */}
1213
+ <div
1214
+ className="w-full h-full rounded-full bg-[#0a0a0a] flex items-center justify-center"
1215
+ style={{
1216
+ boxShadow: '0 0 30px 5px rgba(71, 85, 105, 0.15)'
1217
+ }}
1218
+ >
1219
+ {/* Center gradient - EXACT from VoiceOrbOverlay idle state */}
1220
+ <div
1221
+ className="w-32 h-32 rounded-full flex items-center justify-center"
1222
+ style={{
1223
+ background: 'radial-gradient(circle, #64748b 0%, #0a0a0a 60%)'
1224
+ }}
1225
+ >
1226
+ {/* Small Mic Icon */}
1227
+ <svg className="w-10 h-10 text-slate-400/70 group-hover:text-slate-300 transition-colors" fill="none" stroke="currentColor" viewBox="0 0 24 24">
1228
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M12 1a3 3 0 00-3 3v8a3 3 0 006 0V4a3 3 0 00-3-3z" />
1229
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M19 10v2a7 7 0 01-14 0v-2M12 19v4M8 23h8" />
1230
+ </svg>
1231
+ </div>
1232
+ </div>
1233
+ </div>
1234
+ </button>
1235
+
1236
+ {/* Language Dropdown */}
1237
+ <select
1238
+ value={outputLanguage}
1239
+ onChange={(e) => {
1240
+ const lang = e.target.value;
1241
+ const preset = LANG_PRESETS[lang];
1242
+ if (preset) {
1243
+ setOutputLanguage(lang);
1244
+ setInputLanguage(preset.input);
1245
+ setVoiceModel(preset.model);
1246
+ setVoiceId(preset.voice);
1247
+ // Save to localStorage
1248
+ localStorage.setItem('output_language', lang);
1249
+ localStorage.setItem('input_language', preset.input);
1250
+ localStorage.setItem('voice_model', preset.model);
1251
+ localStorage.setItem('voice_id', preset.voice);
1252
+ }
1253
+ }}
1254
+ className="bg-muted/50 border border-border rounded-xl px-6 py-2.5 text-sm focus:outline-none focus:ring-2 focus:ring-primary/50 mb-4"
1255
+ >
1256
+ {Object.entries(LANG_PRESETS).map(([code, preset]) => (
1257
+ <option key={code} value={code}>{preset.name}</option>
1258
+ ))}
1259
+ </select>
1260
+
1261
+ <p className="text-sm text-muted-foreground/80 mb-8">Click to start voice conversation</p>
1262
+ </div>
1263
+ ) : (
1264
+ /* Traditional Welcome - Only when no ElevenLabs key */
1265
+ <>
1266
+ <div className="w-16 h-16 rounded bg-gradient-to-br from-primary to-secondary flex items-center justify-center shadow-xl mb-6">
1267
+ <SparklesIcon className="h-10 w-10 text-white" />
1268
+ </div>
1269
+
1270
+ <h2 className="text-3xl font-bold text-foreground mb-3">Welcome to NitroStudio</h2>
1271
+ <p className="text-muted-foreground text-center max-w-md mb-8">
1272
+ Your AI-powered development environment for Model Context Protocol (MCP) servers.
1273
+ Start a conversation or try a prompt below.
1274
+ </p>
1275
+ </>
1276
+ )}
806
1277
 
807
1278
  {/* Prompts Overview */}
808
1279
  {prompts.length > 0 && (
809
1280
  <div className="w-full max-w-2xl">
810
1281
  <div className="flex items-center gap-2 mb-4">
811
- <Sparkles className="w-5 h-5 text-primary" />
1282
+ <SparklesIcon className="h-5 w-5 text-primary" />
812
1283
  <h3 className="text-lg font-semibold text-foreground">Available Prompts</h3>
813
1284
  <span className="text-sm text-muted-foreground">({prompts.length})</span>
814
1285
  </div>
@@ -824,8 +1295,8 @@ export default function ChatPage() {
824
1295
  className="card card-hover p-4 text-left group transition-all hover:scale-[1.02]"
825
1296
  >
826
1297
  <div className="flex items-start gap-3">
827
- <div className="w-8 h-8 rounded-lg bg-primary/10 flex items-center justify-center group-hover:bg-primary/20 transition-colors flex-shrink-0">
828
- <FileText className="w-4 h-4 text-primary" />
1298
+ <div className="h-8 w-8 rounded-lg bg-primary/10 flex items-center justify-center group-hover:bg-primary/20 transition-colors flex-shrink-0">
1299
+ <DocumentTextIcon className="h-4 w-4 text-primary" />
829
1300
  </div>
830
1301
  <div className="flex-1 min-w-0">
831
1302
  <h4 className="font-semibold text-foreground text-sm mb-1 truncate">
@@ -884,10 +1355,10 @@ export default function ChatPage() {
884
1355
  ))}
885
1356
  {loading && (
886
1357
  <div className="flex gap-4 items-start animate-fade-in">
887
- <div className="w-8 h-8 rounded-full bg-gradient-to-br from-primary to-amber-500 flex items-center justify-center flex-shrink-0 shadow-md">
888
- <Bot className="w-5 h-5 text-white" strokeWidth={2.5} />
1358
+ <div className="h-8 w-8 rounded-full bg-gradient-to-br from-primary to-secondary flex items-center justify-center flex-shrink-0 shadow-md">
1359
+ <SparklesIcon className="h-5 w-5 text-white" />
889
1360
  </div>
890
- <div className="flex-1 bg-card/50 backdrop-blur-sm rounded-2xl px-5 py-4 border border-border/50">
1361
+ <div className="flex-1 bg-card/50 backdrop-blur-sm rounded px-5 py-4 border border-border/50">
891
1362
  <div className="flex items-center gap-2">
892
1363
  <div className="flex gap-1">
893
1364
  <span className="w-2 h-2 bg-primary rounded-full animate-bounce" style={{ animationDelay: '0s' }}></span>
@@ -905,77 +1376,273 @@ export default function ChatPage() {
905
1376
  </div>
906
1377
  </div>
907
1378
 
908
- {/* ChatGPT-style Input Area - Fixed at bottom */}
909
- <div className="sticky bottom-0 border-t border-border/50 bg-background/95 backdrop-blur-md shadow-[0_-2px_10px_rgba(0,0,0,0.1)]">
910
- <div className="max-w-5xl mx-auto px-3 sm:px-4 py-3 sm:py-4">
911
- {currentImage && (
912
- <div className="mb-3 p-3 bg-card rounded-xl flex items-start gap-3 border border-border/50 animate-fade-in">
913
- <img
914
- src={currentImage.data}
915
- alt={currentImage.name}
916
- className="w-20 h-20 object-cover rounded-lg border border-border"
917
- />
918
- <div className="flex-1 min-w-0">
919
- <p className="text-sm font-medium text-foreground truncate">{currentImage.name}</p>
920
- <p className="text-xs text-muted-foreground">{currentImage.type}</p>
921
- </div>
922
- <button
923
- onClick={() => setCurrentImage(null)}
924
- className="w-7 h-7 rounded-lg flex items-center justify-center bg-muted/50 hover:bg-muted text-muted-foreground hover:text-foreground transition-all flex-shrink-0"
925
- >
926
- <X className="w-4 h-4" />
1379
+ {/* Sleek Professional Input Area */}
1380
+ <div className="sticky bottom-0 bg-gradient-to-t from-background via-background to-transparent pt-6 pb-4">
1381
+ <div className="max-w-3xl mx-auto px-4">
1382
+ {/* Current file preview */}
1383
+ {currentFile && (
1384
+ <div className="mb-2 flex items-center gap-2 text-xs text-muted-foreground bg-muted/50 rounded-lg px-3 py-2">
1385
+ <PhotoIcon className="w-4 h-4" />
1386
+ <span className="truncate">{currentFile.name}</span>
1387
+ <button onClick={() => setCurrentFile(null)} className="ml-auto hover:text-foreground">
1388
+ <XMarkIcon className="w-4 h-4" />
927
1389
  </button>
928
1390
  </div>
929
1391
  )}
930
- <div className="flex items-center gap-2">
931
- <input
932
- type="file"
933
- ref={fileInputRef}
934
- onChange={handleImageUpload}
935
- accept="image/*"
936
- className="hidden"
937
- />
938
- <button
939
- onClick={() => fileInputRef.current?.click()}
940
- className="h-11 w-11 rounded-xl flex items-center justify-center bg-muted/50 hover:bg-muted text-muted-foreground hover:text-foreground transition-all flex-shrink-0"
941
- title="Upload image"
942
- >
943
- <ImageIcon className="w-5 h-5" />
944
- </button>
945
- <div className="flex-1 relative flex items-center">
1392
+
1393
+ {/* Main Input Container */}
1394
+ <div className={`
1395
+ relative rounded-2xl border transition-all duration-300
1396
+ ${loading ? 'opacity-60' : ''}
1397
+ ${llmState !== 'idle' ? 'border-primary/50 shadow-lg shadow-primary/5' : 'border-border/60 hover:border-border focus-within:border-primary/30'}
1398
+ bg-card/80 backdrop-blur-sm
1399
+ `}>
1400
+ <div className="flex items-end gap-1 p-2">
1401
+ {/* File attachment dropdown */}
1402
+ <div className="relative group">
1403
+ <input
1404
+ type="file"
1405
+ ref={fileInputRef}
1406
+ onChange={handleFileUpload}
1407
+ accept="image/*,.pdf,.txt,.md,.json,.csv,.docx"
1408
+ className="hidden"
1409
+ />
1410
+ <button
1411
+ onClick={() => fileInputRef.current?.click()}
1412
+ className="p-2 rounded-xl text-muted-foreground hover:text-foreground hover:bg-muted/80 transition-all"
1413
+ title="Attach file"
1414
+ >
1415
+ <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
1416
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M12 4v16m8-8H4" />
1417
+ </svg>
1418
+ </button>
1419
+ </div>
1420
+
1421
+ {/* Text Input */}
946
1422
  <textarea
947
1423
  ref={textareaRef}
948
1424
  value={inputValue}
949
1425
  onChange={(e) => setInputValue(e.target.value)}
950
1426
  onKeyDown={(e) => {
951
- // Send on Enter, new line on Shift+Enter
952
1427
  if (e.key === 'Enter' && !e.shiftKey) {
953
1428
  e.preventDefault();
954
1429
  handleSend();
955
1430
  }
956
1431
  }}
957
- placeholder="Message NitroStudio... (Shift + Enter for new line)"
958
- className="w-full px-4 py-3 rounded-xl bg-card border border-border/50 focus:border-primary/50 focus:ring-2 focus:ring-primary/20 resize-none text-sm text-foreground placeholder:text-muted-foreground transition-all outline-none"
1432
+ placeholder="Message..."
1433
+ className="flex-1 bg-transparent border-0 focus:ring-0 resize-none py-2 px-1 text-sm min-h-[40px] max-h-[120px] placeholder:text-muted-foreground/50"
959
1434
  rows={1}
960
- style={{
961
- minHeight: '44px',
962
- maxHeight: '200px',
963
- overflow: 'hidden',
964
- }}
1435
+ disabled={loading}
965
1436
  />
1437
+
1438
+ {/* Right side buttons */}
1439
+ <div className="flex items-center gap-1">
1440
+ {/* Voice mode button */}
1441
+ {elevenLabsApiKey && (
1442
+ <button
1443
+ onClick={() => {
1444
+ setVoiceModeEnabled(true);
1445
+ setVoiceOverlayOpen(true);
1446
+ }}
1447
+ className={`p-2 rounded-xl transition-all ${voiceModeEnabled
1448
+ ? 'text-primary bg-primary/10'
1449
+ : 'text-muted-foreground hover:text-foreground hover:bg-muted/80'
1450
+ }`}
1451
+ title="Voice mode"
1452
+ >
1453
+ <MicrophoneIcon className="w-5 h-5" />
1454
+ </button>
1455
+ )}
1456
+
1457
+ {/* Send button */}
1458
+ <button
1459
+ onClick={() => handleSend()}
1460
+ disabled={loading || (!inputValue.trim() && !currentFile)}
1461
+ className={`p-2 rounded-xl transition-all ${inputValue.trim() || currentFile
1462
+ ? 'bg-primary text-primary-foreground hover:bg-primary/90'
1463
+ : 'text-muted-foreground/50 cursor-not-allowed'
1464
+ }`}
1465
+ title="Send"
1466
+ >
1467
+ <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
1468
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 12h14M12 5l7 7-7 7" />
1469
+ </svg>
1470
+ </button>
1471
+ </div>
1472
+ </div>
1473
+ </div>
1474
+
1475
+ {/* Minimal footer hint */}
1476
+ <p className="text-[10px] text-muted-foreground/40 text-center mt-2">
1477
+ Press Enter to send, Shift+Enter for new line
1478
+ </p>
1479
+ </div>
1480
+ </div>
1481
+
1482
+ {/* Voice Settings Modal - z-60 to be above voice overlay (z-50) */}
1483
+ {showVoiceSettings && (
1484
+ <div
1485
+ className="fixed inset-0 z-[60] flex items-center justify-center bg-black/80 backdrop-blur-sm"
1486
+ onClick={() => setShowVoiceSettings(false)}
1487
+ >
1488
+ <div
1489
+ className="bg-card border border-border rounded-2xl p-6 w-[450px] max-h-[85vh] overflow-auto shadow-2xl"
1490
+ onClick={(e) => e.stopPropagation()}
1491
+ >
1492
+ <div className="flex items-center justify-between mb-6">
1493
+ <h2 className="text-lg font-semibold">Voice Settings</h2>
1494
+ <button
1495
+ onClick={() => setShowVoiceSettings(false)}
1496
+ className="p-1 rounded-lg hover:bg-muted transition-colors"
1497
+ >
1498
+ <XMarkIcon className="w-5 h-5" />
1499
+ </button>
966
1500
  </div>
1501
+
1502
+ {loadingVoiceData ? (
1503
+ <div className="flex items-center justify-center py-8">
1504
+ <div className="w-6 h-6 border-2 border-primary border-t-transparent rounded-full animate-spin" />
1505
+ <span className="ml-3 text-muted-foreground">Loading voice options...</span>
1506
+ </div>
1507
+ ) : (
1508
+ <div className="space-y-5">
1509
+ {/* Model Selection */}
1510
+ <div>
1511
+ <label className="block text-sm font-medium text-muted-foreground mb-2">
1512
+ TTS Model
1513
+ </label>
1514
+ <select
1515
+ value={voiceModel}
1516
+ onChange={(e) => setVoiceModel(e.target.value)}
1517
+ className="w-full bg-muted/50 border border-border rounded-xl px-3 py-2.5 text-sm focus:outline-none focus:ring-2 focus:ring-primary/50"
1518
+ >
1519
+ {availableModels.length > 0 ? (
1520
+ availableModels.filter(m => m.model_id.includes('eleven')).map(model => (
1521
+ <option key={model.model_id} value={model.model_id}>
1522
+ {model.name}
1523
+ </option>
1524
+ ))
1525
+ ) : (
1526
+ <>
1527
+ <option value="eleven_multilingual_v2">Multilingual v2</option>
1528
+ <option value="eleven_flash_v2_5">Flash v2.5</option>
1529
+ <option value="eleven_turbo_v2_5">Turbo v2.5</option>
1530
+ </>
1531
+ )}
1532
+ </select>
1533
+ </div>
1534
+
1535
+ {/* Output Language */}
1536
+ <div>
1537
+ <label className="block text-sm font-medium text-muted-foreground mb-2">
1538
+ Output Language (TTS)
1539
+ </label>
1540
+ <select
1541
+ value={outputLanguage}
1542
+ onChange={(e) => setOutputLanguage(e.target.value)}
1543
+ className="w-full bg-muted/50 border border-border rounded-xl px-3 py-2.5 text-sm focus:outline-none focus:ring-2 focus:ring-primary/50"
1544
+ >
1545
+ {/* Get languages from selected model if available */}
1546
+ {(() => {
1547
+ const selectedModel = availableModels.find(m => m.model_id === voiceModel);
1548
+ if (selectedModel?.languages && selectedModel.languages.length > 0) {
1549
+ return selectedModel.languages.map(lang => (
1550
+ <option key={lang.language_id} value={lang.language_id}>
1551
+ {lang.name}
1552
+ </option>
1553
+ ));
1554
+ }
1555
+ return (
1556
+ <>
1557
+ <option value="en">English</option>
1558
+ <option value="hi">Hindi</option>
1559
+ <option value="es">Spanish</option>
1560
+ <option value="fr">French</option>
1561
+ <option value="de">German</option>
1562
+ <option value="ja">Japanese</option>
1563
+ <option value="ko">Korean</option>
1564
+ <option value="zh">Chinese</option>
1565
+ <option value="pt">Portuguese</option>
1566
+ <option value="it">Italian</option>
1567
+ </>
1568
+ );
1569
+ })()}
1570
+ </select>
1571
+ </div>
1572
+
1573
+ {/* Voice Character - pre-filtered by language from API */}
1574
+ <div>
1575
+ <label className="block text-sm font-medium text-muted-foreground mb-2">
1576
+ Voice Character
1577
+ </label>
1578
+ <select
1579
+ value={voiceId}
1580
+ onChange={(e) => setVoiceId(e.target.value)}
1581
+ className="w-full bg-muted/50 border border-border rounded-xl px-3 py-2.5 text-sm focus:outline-none focus:ring-2 focus:ring-primary/50"
1582
+ >
1583
+ {availableVoices.length > 0 ? (
1584
+ availableVoices.map(voice => (
1585
+ <option key={voice.voice_id} value={voice.voice_id}>
1586
+ {voice.name} {voice.labels?.accent ? `(${voice.labels.accent})` : voice.category === 'shared' ? '(Shared)' : ''}
1587
+ </option>
1588
+ ))
1589
+ ) : (
1590
+ <>
1591
+ <option value="21m00Tcm4TlvDq8ikWAM">Rachel (English)</option>
1592
+ <option value="EXAVITQu4vr4xnSDxMaL">Bella (English)</option>
1593
+ </>
1594
+ )}
1595
+ </select>
1596
+ <p className="text-xs text-muted-foreground/60 mt-1">
1597
+ {loadingVoiceData ? 'Loading voices...' : `${availableVoices.length} voices for ${outputLanguage.toUpperCase()}`}
1598
+ </p>
1599
+ </div>
1600
+
1601
+ {/* Input Language (Speech Recognition) */}
1602
+ <div>
1603
+ <label className="block text-sm font-medium text-muted-foreground mb-2">
1604
+ Input Language (Speech Recognition)
1605
+ </label>
1606
+ <select
1607
+ value={inputLanguage}
1608
+ onChange={(e) => setInputLanguage(e.target.value)}
1609
+ className="w-full bg-muted/50 border border-border rounded-xl px-3 py-2.5 text-sm focus:outline-none focus:ring-2 focus:ring-primary/50"
1610
+ >
1611
+ <option value="en-US">English (US)</option>
1612
+ <option value="en-GB">English (UK)</option>
1613
+ <option value="hi-IN">Hindi</option>
1614
+ <option value="es-ES">Spanish</option>
1615
+ <option value="fr-FR">French</option>
1616
+ <option value="de-DE">German</option>
1617
+ <option value="ja-JP">Japanese</option>
1618
+ <option value="ko-KR">Korean</option>
1619
+ <option value="zh-CN">Chinese (Mandarin)</option>
1620
+ <option value="pt-BR">Portuguese (Brazil)</option>
1621
+ <option value="it-IT">Italian</option>
1622
+ </select>
1623
+ <p className="text-xs text-muted-foreground/60 mt-1">
1624
+ Language for voice input (what you speak)
1625
+ </p>
1626
+ </div>
1627
+ </div>
1628
+ )}
1629
+
967
1630
  <button
968
- onClick={handleSend}
969
- disabled={loading || (!inputValue.trim() && !currentImage)}
970
- className="h-11 w-11 rounded-xl flex items-center justify-center bg-gradient-to-br from-primary to-amber-500 text-white shadow-lg hover:shadow-xl disabled:opacity-50 disabled:cursor-not-allowed transition-all flex-shrink-0 hover:scale-105 active:scale-95"
971
- title="Send message (Enter)"
1631
+ onClick={() => {
1632
+ // Save to localStorage
1633
+ localStorage.setItem('voice_model', voiceModel);
1634
+ localStorage.setItem('output_language', outputLanguage);
1635
+ localStorage.setItem('input_language', inputLanguage);
1636
+ localStorage.setItem('voice_id', voiceId);
1637
+ setShowVoiceSettings(false);
1638
+ }}
1639
+ className="w-full mt-6 bg-primary text-primary-foreground rounded-xl py-2.5 text-sm font-medium hover:bg-primary/90 transition-colors"
972
1640
  >
973
- <Send className="w-5 h-5" strokeWidth={2.5} />
1641
+ Save Settings
974
1642
  </button>
975
1643
  </div>
976
-
977
1644
  </div>
978
- </div>
1645
+ )}
979
1646
 
980
1647
  {/* Prompt Executor Modal */}
981
1648
  {selectedPrompt && (
@@ -985,13 +1652,13 @@ export default function ChatPage() {
985
1652
  onClick={() => setSelectedPrompt(null)}
986
1653
  >
987
1654
  <div
988
- className="bg-card rounded-2xl p-6 w-[600px] max-h-[80vh] overflow-auto border border-border shadow-2xl animate-scale-in"
1655
+ className="bg-card rounded p-6 w-[600px] max-h-[80vh] overflow-auto border border-border shadow-2xl animate-scale-in"
989
1656
  onClick={(e) => e.stopPropagation()}
990
1657
  >
991
1658
  <div className="flex items-center justify-between mb-4">
992
1659
  <div className="flex items-center gap-3">
993
- <div className="w-10 h-10 rounded-lg bg-primary/10 flex items-center justify-center">
994
- <FileText className="w-5 h-5 text-primary" />
1660
+ <div className="h-10 w-10 rounded-lg bg-primary/10 flex items-center justify-center">
1661
+ <DocumentTextIcon className="h-5 w-5 text-primary" />
995
1662
  </div>
996
1663
  <h2 className="text-xl font-bold text-foreground">{selectedPrompt.name}</h2>
997
1664
  </div>
@@ -999,7 +1666,7 @@ export default function ChatPage() {
999
1666
  onClick={() => setSelectedPrompt(null)}
1000
1667
  className="btn btn-ghost w-10 h-10 p-0"
1001
1668
  >
1002
- <X className="w-5 h-5" />
1669
+ <XMarkIcon className="h-5 w-5" />
1003
1670
  </button>
1004
1671
  </div>
1005
1672
 
@@ -1040,7 +1707,7 @@ export default function ChatPage() {
1040
1707
  onClick={handleExecutePrompt}
1041
1708
  className="btn btn-primary w-full gap-2"
1042
1709
  >
1043
- <Play className="w-4 h-4" />
1710
+ <PlayIcon className="h-4 w-4" />
1044
1711
  Execute Prompt
1045
1712
  </button>
1046
1713
  </div>
@@ -1061,7 +1728,7 @@ export default function ChatPage() {
1061
1728
  className="absolute top-4 right-4 z-60 p-3 rounded-lg bg-white/10 hover:bg-white/20 backdrop-blur-sm border border-white/20 transition-all"
1062
1729
  title="Exit fullscreen"
1063
1730
  >
1064
- <X className="w-6 h-6 text-white" />
1731
+ <XMarkIcon className="w-6 h-6 text-white" />
1065
1732
  </button>
1066
1733
 
1067
1734
  {/* Widget Container */}
@@ -1072,43 +1739,104 @@ export default function ChatPage() {
1072
1739
  </div>
1073
1740
  </div>
1074
1741
  )}
1742
+
1743
+ {/* Voice Mode Overlay */}
1744
+ <VoiceOrbOverlay
1745
+ isOpen={voiceOverlayOpen}
1746
+ onClose={() => {
1747
+ setVoiceOverlayOpen(false);
1748
+ setVoiceModeEnabled(false);
1749
+ setSpokenText('');
1750
+ // Stop any playing audio
1751
+ if (audioRef.current) {
1752
+ audioRef.current.pause();
1753
+ audioRef.current = null;
1754
+ }
1755
+ setLlmState('idle');
1756
+ // Reset greeting flag so greeting plays on next open
1757
+ hasSpokenGreeting.current = false;
1758
+ }}
1759
+ onSendMessage={(text) => {
1760
+ console.log('📤 onSendMessage called with:', text);
1761
+ setLlmState('thinking');
1762
+ handleSend(text);
1763
+ }}
1764
+ onGreet={() => {
1765
+ // Only greet once per session to prevent overlap
1766
+ if (hasSpokenGreeting.current) {
1767
+ setLlmState('listening');
1768
+ return;
1769
+ }
1770
+ hasSpokenGreeting.current = true;
1771
+ // Use localized greeting based on output language
1772
+ const preset = LANG_PRESETS[outputLanguage] || LANG_PRESETS['en'];
1773
+ const greeting = preset.greeting;
1774
+ console.log('👋 onGreet called - playing welcome message in', preset.name);
1775
+ setSpokenText(greeting);
1776
+ setVoiceModeEnabled(true);
1777
+ playTextToSpeech(greeting);
1778
+ }}
1779
+ elevenLabsApiKey={elevenLabsApiKey || ''}
1780
+ llmState={llmState}
1781
+ spokenText={spokenText}
1782
+ displayMode={voiceDisplayMode}
1783
+ onDisplayModeChange={(mode) => {
1784
+ setVoiceDisplayMode(mode);
1785
+ if (mode === 'voice-chat') {
1786
+ setVoiceOverlayOpen(false);
1787
+ }
1788
+ }}
1789
+ onSettingsClick={() => setShowVoiceSettings(true)}
1790
+ inputLanguage={inputLanguage}
1791
+ voiceModeActive={voiceModeEnabled}
1792
+ onInterrupt={() => {
1793
+ // Talk-to-interrupt: stop TTS and switch to listening
1794
+ if (audioRef.current) {
1795
+ audioRef.current.pause();
1796
+ audioRef.current = null;
1797
+ }
1798
+ setSpokenText('');
1799
+ setLlmState('listening');
1800
+ }}
1801
+ />
1075
1802
  </div>
1076
1803
  );
1077
1804
  }
1078
1805
 
1079
1806
  function ChatMessageComponent({ message, tools }: { message: ChatMessage; tools: Tool[] }) {
1080
- if (message.role === 'tool') return null; // Don't render tool messages directly
1081
-
1807
+ if (message.role === 'tool') return null;
1082
1808
  const isUser = message.role === 'user';
1083
1809
 
1084
1810
  return (
1085
1811
  <div className="flex gap-4 items-start animate-fade-in group">
1086
- {/* Avatar */}
1087
1812
  {!isUser && (
1088
- <div className="w-8 h-8 rounded-full bg-gradient-to-br from-primary to-amber-500 flex items-center justify-center flex-shrink-0 shadow-md group-hover:shadow-lg transition-shadow">
1089
- <Bot className="w-5 h-5 text-white" strokeWidth={2.5} />
1813
+ <div className="h-8 w-8 rounded-full bg-gradient-to-br from-primary to-secondary flex items-center justify-center flex-shrink-0 shadow-md group-hover:shadow-lg transition-shadow">
1814
+ <SparklesIcon className="h-5 w-5 text-white" />
1090
1815
  </div>
1091
1816
  )}
1092
1817
  {isUser && (
1093
- <div className="w-8 h-8 rounded-full bg-gradient-to-br from-slate-600 to-slate-700 flex items-center justify-center flex-shrink-0 shadow-md group-hover:shadow-lg transition-shadow">
1818
+ <div className="h-8 w-8 rounded-full bg-gradient-to-br from-slate-600 to-slate-700 flex items-center justify-center flex-shrink-0 shadow-md group-hover:shadow-lg transition-shadow">
1094
1819
  <span className="text-white text-sm font-bold">You</span>
1095
1820
  </div>
1096
1821
  )}
1097
-
1098
- {/* Message Content */}
1099
1822
  <div className="flex-1 min-w-0">
1100
- {/* Image if present */}
1101
- {message.image && (
1102
- <div className="mb-3 rounded-xl overflow-hidden border border-border/50 shadow-sm">
1103
- <img
1104
- src={message.image.data}
1105
- alt={message.image.name}
1106
- className="max-w-full"
1107
- />
1823
+ {message.file && (
1824
+ <div className="mb-3 rounded-xl overflow-hidden border border-border/50 shadow-sm max-w-sm">
1825
+ {message.file.type.startsWith('image/') ? (
1826
+ <img src={message.file.data} alt={message.file.name} className="max-w-full" />
1827
+ ) : (
1828
+ <div className="p-4 bg-muted/30 flex items-center gap-3">
1829
+ <div className="h-10 w-10 rounded-lg bg-primary/10 flex items-center justify-center">
1830
+ <DocumentTextIcon className="h-5 w-5 text-primary" />
1831
+ </div>
1832
+ <div className="flex-1 min-w-0">
1833
+ <p className="text-sm font-medium text-foreground truncate">{message.file.name}</p>
1834
+ <p className="text-xs text-muted-foreground">{message.file.type}</p>
1835
+ </div>
1836
+ </div>
1837
+ )}
1108
1838
  </div>
1109
1839
  )}
1110
-
1111
- {/* Text content with markdown rendering */}
1112
1840
  {message.content && (
1113
1841
  <div className="text-sm leading-relaxed mb-4">
1114
1842
  {isUser ? (
@@ -1118,12 +1846,10 @@ function ChatMessageComponent({ message, tools }: { message: ChatMessage; tools:
1118
1846
  )}
1119
1847
  </div>
1120
1848
  )}
1121
-
1122
- {/* Tool Calls - ChatGPT-style cards */}
1123
1849
  {message.toolCalls && message.toolCalls.length > 0 && (
1124
1850
  <div className="space-y-3">
1125
- {message.toolCalls.map((toolCall) => (
1126
- <ToolCallComponent key={toolCall.id} toolCall={toolCall} tools={tools} />
1851
+ {message.toolCalls.map((tc: ToolCall) => (
1852
+ <ToolCallComponent key={tc.id} toolCall={tc} tools={tools} />
1127
1853
  ))}
1128
1854
  </div>
1129
1855
  )}
@@ -1136,55 +1862,39 @@ function ToolCallComponent({ toolCall, tools }: { toolCall: ToolCall; tools: Too
1136
1862
  const [showArgs, setShowArgs] = useState(false);
1137
1863
  const tool = tools.find((t) => t.name === toolCall.name);
1138
1864
 
1139
- // Get widget URI from multiple possible sources
1140
1865
  const componentUri =
1141
1866
  tool?.widget?.route ||
1142
1867
  tool?.outputTemplate ||
1143
1868
  tool?._meta?.['openai/outputTemplate'] ||
1144
1869
  tool?._meta?.['ui/template'];
1145
1870
 
1146
- // Get result data from toolCall and unwrap if needed
1147
1871
  let widgetData = toolCall.result || toolCall.arguments;
1148
1872
 
1149
- // Unwrap if response was wrapped by TransformInterceptor
1150
- // Check if it has the interceptor's structure: { success, data, metadata }
1151
1873
  if (widgetData && typeof widgetData === 'object' &&
1152
1874
  widgetData.success !== undefined && widgetData.data !== undefined) {
1153
- widgetData = widgetData.data; // Return the unwrapped data
1875
+ widgetData = widgetData.data;
1154
1876
  }
1155
1877
 
1156
- console.log('ToolCallComponent:', {
1157
- toolName: toolCall.name,
1158
- componentUri,
1159
- hasData: !!widgetData,
1160
- tool
1161
- });
1162
-
1163
1878
  return (
1164
1879
  <div className="relative group/widget">
1165
- {/* Widget - No frame, just the widget */}
1166
1880
  {componentUri && widgetData && (
1167
1881
  <div className="rounded-lg overflow-hidden max-w-5xl">
1168
1882
  <WidgetRenderer uri={componentUri} data={widgetData} className="widget-in-chat" />
1169
1883
  </div>
1170
1884
  )}
1171
-
1172
- {/* 3-dots menu button - positioned absolutely in top-right */}
1173
1885
  <button
1174
1886
  onClick={() => setShowArgs(!showArgs)}
1175
1887
  className="absolute top-2 right-2 w-8 h-8 rounded-lg flex items-center justify-center bg-background/80 backdrop-blur-sm border border-border/50 hover:bg-background hover:border-border transition-all opacity-0 group-hover/widget:opacity-100 shadow-sm z-10"
1176
1888
  title="View tool details"
1177
1889
  >
1178
- <MoreVertical className="w-4 h-4 text-muted-foreground" />
1890
+ <EllipsisVerticalIcon className="h-4 w-4 text-muted-foreground" />
1179
1891
  </button>
1180
-
1181
- {/* Arguments Modal/Dropdown - appears when 3-dots clicked */}
1182
1892
  {showArgs && (
1183
1893
  <div className="absolute top-12 right-2 w-96 max-w-[calc(100%-1rem)] bg-card rounded-xl border border-border shadow-2xl p-4 animate-fade-in z-20">
1184
1894
  <div className="flex items-center justify-between mb-3">
1185
1895
  <div className="flex items-center gap-2">
1186
1896
  <div className="w-6 h-6 rounded-md bg-primary/10 flex items-center justify-center">
1187
- <Wrench className="w-3.5 h-3.5 text-primary" />
1897
+ <WrenchScrewdriverIcon className="w-3.5 h-3.5 text-primary" />
1188
1898
  </div>
1189
1899
  <span className="font-semibold text-sm text-foreground">{toolCall.name}</span>
1190
1900
  </div>
@@ -1192,7 +1902,7 @@ function ToolCallComponent({ toolCall, tools }: { toolCall: ToolCall; tools: Too
1192
1902
  onClick={() => setShowArgs(false)}
1193
1903
  className="w-6 h-6 rounded-md flex items-center justify-center hover:bg-muted transition-colors"
1194
1904
  >
1195
- <X className="w-4 h-4 text-muted-foreground" />
1905
+ <XMarkIcon className="h-4 w-4 text-muted-foreground" />
1196
1906
  </button>
1197
1907
  </div>
1198
1908
  <div>
@@ -1206,4 +1916,3 @@ function ToolCallComponent({ toolCall, tools }: { toolCall: ToolCall; tools: Too
1206
1916
  </div>
1207
1917
  );
1208
1918
  }
1209
-