amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
gaia/talk/sdk.py CHANGED
@@ -1,538 +1,538 @@
1
- #!/usr/bin/env python3
2
- # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
3
- # SPDX-License-Identifier: MIT
4
-
5
- """
6
- Gaia Talk SDK - Unified voice and text chat integration
7
- """
8
-
9
- import logging
10
- from dataclasses import dataclass
11
- from enum import Enum
12
- from typing import Any, AsyncGenerator, Callable, Dict, Optional
13
-
14
- from gaia.audio.audio_client import AudioClient
15
- from gaia.chat.sdk import ChatConfig, ChatSDK
16
- from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
17
- from gaia.logger import get_logger
18
-
19
-
20
- class TalkMode(Enum):
21
- """Talk mode options."""
22
-
23
- TEXT_ONLY = "text_only"
24
- VOICE_ONLY = "voice_only"
25
- VOICE_AND_TEXT = "voice_and_text"
26
-
27
-
28
- @dataclass
29
- class TalkConfig:
30
- """Configuration for TalkSDK."""
31
-
32
- # Voice-specific settings
33
- whisper_model_size: str = "base"
34
- audio_device_index: Optional[int] = None # Use default input device
35
- silence_threshold: float = 0.5
36
- enable_tts: bool = True
37
- mode: TalkMode = TalkMode.VOICE_AND_TEXT
38
-
39
- # Chat settings (from ChatConfig)
40
- model: str = DEFAULT_MODEL_NAME
41
- max_tokens: int = 512
42
- system_prompt: Optional[str] = None
43
- max_history_length: int = 4 # Number of conversation pairs to keep
44
- assistant_name: str = "gaia"
45
-
46
- # General settings
47
- use_claude: bool = False # Use Claude API
48
- use_chatgpt: bool = False # Use ChatGPT/OpenAI API
49
- show_stats: bool = False
50
- logging_level: str = "INFO"
51
-
52
- # RAG settings (optional - for document Q&A)
53
- rag_documents: Optional[list] = None # PDF documents to index
54
-
55
-
56
- @dataclass
57
- class TalkResponse:
58
- """Response from talk operations."""
59
-
60
- text: str
61
- stats: Optional[Dict[str, Any]] = None
62
- is_complete: bool = True
63
-
64
-
65
- class TalkSDK:
66
- """
67
- Gaia Talk SDK - Unified voice and text chat integration.
68
-
69
- This SDK provides a simple interface for integrating Gaia's voice and text
70
- chat capabilities into applications.
71
-
72
- Example usage:
73
- ```python
74
- from gaia.talk.sdk import TalkSDK, TalkConfig
75
-
76
- # Create SDK instance
77
- config = TalkConfig(enable_tts=True, show_stats=True)
78
- talk = TalkSDK(config)
79
-
80
- # Text chat
81
- response = await talk.chat("Hello, how are you?")
82
- print(response.text)
83
-
84
- # Streaming chat
85
- async for chunk in talk.chat_stream("Tell me a story"):
86
- print(chunk.text, end="", flush=True)
87
-
88
- # Voice chat with document Q&A
89
- talk_rag = TalkSDK(TalkConfig(enable_tts=True))
90
- talk_rag.enable_rag(documents=["manual.pdf", "guide.pdf"])
91
- await talk_rag.start_voice_session()
92
-
93
- # Voice chat with callback
94
- def on_voice_input(text):
95
- print(f"User said: {text}")
96
-
97
- await talk.start_voice_session(on_voice_input)
98
- ```
99
- """
100
-
101
- def __init__(self, config: Optional[TalkConfig] = None):
102
- """
103
- Initialize the TalkSDK.
104
-
105
- Args:
106
- config: Configuration options. If None, uses defaults.
107
- """
108
- self.config = config or TalkConfig()
109
- self.log = get_logger(__name__)
110
- self.log.setLevel(getattr(logging, self.config.logging_level))
111
-
112
- # Initialize ChatSDK for text generation with conversation history
113
- chat_config = ChatConfig(
114
- model=self.config.model,
115
- max_tokens=self.config.max_tokens,
116
- system_prompt=self.config.system_prompt,
117
- max_history_length=self.config.max_history_length,
118
- assistant_name=self.config.assistant_name,
119
- show_stats=self.config.show_stats,
120
- logging_level=self.config.logging_level,
121
- use_claude=self.config.use_claude,
122
- use_chatgpt=self.config.use_chatgpt,
123
- )
124
- self.chat_sdk = ChatSDK(chat_config)
125
-
126
- # Initialize AudioClient with configuration (for voice features)
127
- self.audio_client = AudioClient(
128
- whisper_model_size=self.config.whisper_model_size,
129
- audio_device_index=self.config.audio_device_index,
130
- silence_threshold=self.config.silence_threshold,
131
- enable_tts=self.config.enable_tts,
132
- logging_level=self.config.logging_level,
133
- use_claude=self.config.use_claude,
134
- use_chatgpt=self.config.use_chatgpt,
135
- system_prompt=self.config.system_prompt,
136
- )
137
-
138
- self.show_stats = self.config.show_stats
139
- self._voice_session_active = False
140
-
141
- # Enable RAG if documents are provided
142
- if self.config.rag_documents:
143
- self.enable_rag(documents=self.config.rag_documents)
144
-
145
- self.log.info("TalkSDK initialized with ChatSDK integration")
146
-
147
- async def chat(self, message: str) -> TalkResponse:
148
- """
149
- Send a text message and get a complete response.
150
-
151
- Args:
152
- message: The message to send
153
-
154
- Returns:
155
- TalkResponse with the complete response
156
- """
157
- try:
158
- # Use ChatSDK for text generation (with conversation history)
159
- chat_response = self.chat_sdk.send(message)
160
-
161
- stats = None
162
- if self.show_stats:
163
- stats = chat_response.stats or self.get_stats()
164
-
165
- return TalkResponse(text=chat_response.text, stats=stats, is_complete=True)
166
-
167
- except Exception as e:
168
- self.log.error(f"Error in chat: {e}")
169
- raise
170
-
171
- async def chat_stream(self, message: str) -> AsyncGenerator[TalkResponse, None]:
172
- """
173
- Send a text message and get a streaming response.
174
-
175
- Args:
176
- message: The message to send
177
-
178
- Yields:
179
- TalkResponse chunks as they arrive
180
- """
181
- try:
182
- # Use ChatSDK for streaming text generation (with conversation history)
183
- for chat_chunk in self.chat_sdk.send_stream(message):
184
- if not chat_chunk.is_complete:
185
- yield TalkResponse(text=chat_chunk.text, is_complete=False)
186
- else:
187
- # Final chunk with stats
188
- stats = chat_chunk.stats if self.show_stats else None
189
- yield TalkResponse(text="", stats=stats, is_complete=True)
190
-
191
- except Exception as e:
192
- self.log.error(f"Error in chat_stream: {e}")
193
- raise
194
-
195
- async def process_voice_input(self, text: str) -> TalkResponse:
196
- """
197
- Process voice input text through the complete voice pipeline.
198
-
199
- This includes TTS output if enabled.
200
-
201
- Args:
202
- text: The transcribed voice input
203
-
204
- Returns:
205
- TalkResponse with the processed response
206
- """
207
- try:
208
- # Use ChatSDK to generate response (with conversation history)
209
- chat_response = self.chat_sdk.send(text)
210
-
211
- # If TTS is enabled, speak the response
212
- if self.config.enable_tts and getattr(self.audio_client, "tts", None):
213
- await self.audio_client.speak_text(chat_response.text)
214
-
215
- stats = None
216
- if self.show_stats:
217
- stats = chat_response.stats or self.get_stats()
218
-
219
- return TalkResponse(text=chat_response.text, stats=stats, is_complete=True)
220
-
221
- except Exception as e:
222
- self.log.error(f"Error processing voice input: {e}")
223
- raise
224
-
225
- async def start_voice_session(
226
- self,
227
- on_voice_input: Optional[Callable[[str], None]] = None,
228
- ) -> None:
229
- """
230
- Start an interactive voice session.
231
-
232
- Args:
233
- on_voice_input: Optional callback called when voice input is detected
234
- """
235
- try:
236
- self._voice_session_active = True
237
-
238
- # Initialize TTS if enabled
239
- self.audio_client.initialize_tts()
240
-
241
- # Create voice processor that uses ChatSDK for responses
242
- async def voice_processor(text: str):
243
- # Call user callback if provided
244
- if on_voice_input:
245
- on_voice_input(text)
246
-
247
- # Use ChatSDK to generate response (with conversation history)
248
- chat_response = self.chat_sdk.send(text)
249
-
250
- # If TTS is enabled, speak the response
251
- if self.config.enable_tts and getattr(self.audio_client, "tts", None):
252
- await self.audio_client.speak_text(chat_response.text)
253
-
254
- # Print the response for user feedback
255
- print(f"{self.config.assistant_name.title()}: {chat_response.text}")
256
-
257
- # Show stats if enabled
258
- if self.show_stats and chat_response.stats:
259
- print(f"Stats: {chat_response.stats}")
260
-
261
- # Start voice chat session with our processor
262
- await self.audio_client.start_voice_chat(voice_processor)
263
-
264
- except KeyboardInterrupt:
265
- self.log.info("Voice session interrupted by user")
266
- except Exception as e:
267
- self.log.error(f"Error in voice session: {e}")
268
- raise
269
- finally:
270
- self._voice_session_active = False
271
- self.log.info("Voice chat session ended")
272
-
273
- async def halt_generation(self) -> None:
274
- """Halt the current LLM generation."""
275
- try:
276
- await self.audio_client.halt_generation()
277
- except Exception as e:
278
- self.log.error(f"Error halting generation: {e}")
279
- raise
280
-
281
- def get_stats(self) -> Dict[str, Any]:
282
- """
283
- Get performance statistics.
284
-
285
- Returns:
286
- Dictionary of performance stats
287
- """
288
- try:
289
- # Get stats from ChatSDK instead of directly from LLMClient
290
- return self.chat_sdk.get_stats()
291
- except Exception as e:
292
- self.log.warning(f"Failed to get stats: {e}")
293
- return {}
294
-
295
- def update_config(self, **kwargs) -> None:
296
- """
297
- Update configuration dynamically.
298
-
299
- Args:
300
- **kwargs: Configuration parameters to update
301
- """
302
- # Update our config
303
- for key, value in kwargs.items():
304
- if hasattr(self.config, key):
305
- setattr(self.config, key, value)
306
-
307
- # Update show_stats
308
- if "show_stats" in kwargs:
309
- self.show_stats = kwargs["show_stats"]
310
-
311
- # Update AudioClient configuration
312
- if "silence_threshold" in kwargs:
313
- self.audio_client.silence_threshold = kwargs["silence_threshold"]
314
-
315
- # Update ChatSDK configuration
316
- chat_updates = {}
317
- if "system_prompt" in kwargs:
318
- chat_updates["system_prompt"] = kwargs["system_prompt"]
319
- # Also update AudioClient's system prompt for consistency
320
- self.audio_client.llm_client.system_prompt = kwargs["system_prompt"]
321
- if "max_tokens" in kwargs:
322
- chat_updates["max_tokens"] = kwargs["max_tokens"]
323
- if "max_history_length" in kwargs:
324
- chat_updates["max_history_length"] = kwargs["max_history_length"]
325
- if "assistant_name" in kwargs:
326
- chat_updates["assistant_name"] = kwargs["assistant_name"]
327
-
328
- if chat_updates:
329
- self.chat_sdk.update_config(**chat_updates)
330
-
331
- def clear_history(self) -> None:
332
- """Clear the conversation history."""
333
- self.chat_sdk.clear_history()
334
- self.log.debug("Conversation history cleared")
335
-
336
- def get_history(self) -> list:
337
- """Get the current conversation history."""
338
- return self.chat_sdk.get_history()
339
-
340
- def get_formatted_history(self) -> list:
341
- """Get the conversation history in structured format."""
342
- return self.chat_sdk.get_formatted_history()
343
-
344
- def enable_rag(self, documents: Optional[list] = None, **rag_kwargs) -> bool:
345
- """
346
- Enable RAG (Retrieval-Augmented Generation) for document-based voice/text chat.
347
-
348
- Args:
349
- documents: List of PDF file paths to index
350
- **rag_kwargs: Additional RAG configuration options
351
-
352
- Returns:
353
- True if RAG was successfully enabled
354
- """
355
- try:
356
- self.chat_sdk.enable_rag(documents=documents, **rag_kwargs)
357
- self.log.info(
358
- f"RAG enabled with {len(documents) if documents else 0} documents"
359
- )
360
- return True
361
- except ImportError:
362
- self.log.warning(
363
- 'RAG dependencies not available. Install with: uv pip install -e ".[rag]"'
364
- )
365
- return False
366
- except Exception as e:
367
- self.log.error(f"Failed to enable RAG: {e}")
368
- return False
369
-
370
- def disable_rag(self) -> None:
371
- """Disable RAG functionality."""
372
- self.chat_sdk.disable_rag()
373
- self.log.info("RAG disabled")
374
-
375
- def add_document(self, document_path: str) -> bool:
376
- """
377
- Add a document to the RAG index.
378
-
379
- Args:
380
- document_path: Path to PDF file to index
381
-
382
- Returns:
383
- True if document was successfully added
384
- """
385
- if not self.chat_sdk.rag_enabled:
386
- self.log.warning("RAG not enabled. Call enable_rag() first.")
387
- return False
388
-
389
- try:
390
- return self.chat_sdk.add_document(document_path)
391
- except Exception as e:
392
- self.log.error(f"Failed to add document {document_path}: {e}")
393
- return False
394
-
395
- @property
396
- def is_voice_session_active(self) -> bool:
397
- """Check if a voice session is currently active."""
398
- return self._voice_session_active
399
-
400
- @property
401
- def audio_devices(self) -> list:
402
- """Get list of available audio input devices."""
403
- try:
404
- from gaia.audio.audio_recorder import AudioRecorder
405
-
406
- recorder = AudioRecorder()
407
- return recorder.list_audio_devices()
408
- except Exception as e:
409
- self.log.error(f"Error listing audio devices: {e}")
410
- return []
411
-
412
-
413
- class SimpleTalk:
414
- """
415
- Ultra-simple interface for quick integration.
416
-
417
- Example usage:
418
- ```python
419
- from gaia.talk.sdk import SimpleTalk
420
-
421
- talk = SimpleTalk()
422
-
423
- # Simple text chat
424
- response = await talk.ask("What's the weather like?")
425
- print(response)
426
-
427
- # Simple voice chat
428
- await talk.voice_chat() # Starts interactive session
429
- ```
430
- """
431
-
432
- def __init__(
433
- self,
434
- system_prompt: Optional[str] = None,
435
- enable_tts: bool = True,
436
- assistant_name: str = "gaia",
437
- ):
438
- """
439
- Initialize SimpleTalk with minimal configuration.
440
-
441
- Args:
442
- system_prompt: Optional system prompt for the AI
443
- enable_tts: Whether to enable text-to-speech
444
- assistant_name: Name to use for the assistant
445
- """
446
- config = TalkConfig(
447
- system_prompt=system_prompt,
448
- enable_tts=enable_tts,
449
- assistant_name=assistant_name,
450
- show_stats=False,
451
- logging_level="WARNING", # Minimal logging
452
- )
453
- self._sdk = TalkSDK(config)
454
-
455
- async def ask(self, question: str) -> str:
456
- """
457
- Ask a question and get a text response.
458
-
459
- Args:
460
- question: The question to ask
461
-
462
- Returns:
463
- The AI's response as a string
464
- """
465
- response = await self._sdk.chat(question)
466
- return response.text
467
-
468
- async def ask_stream(self, question: str):
469
- """
470
- Ask a question and get a streaming response.
471
-
472
- Args:
473
- question: The question to ask
474
-
475
- Yields:
476
- Response chunks as they arrive
477
- """
478
- async for chunk in self._sdk.chat_stream(question):
479
- if not chunk.is_complete:
480
- yield chunk.text
481
-
482
- async def voice_chat(self) -> None:
483
- """Start an interactive voice chat session."""
484
- print("Starting voice chat... Say 'stop' to quit or press Ctrl+C")
485
-
486
- def on_voice_input(text: str):
487
- print(f"You: {text}")
488
-
489
- await self._sdk.start_voice_session(on_voice_input)
490
-
491
- def clear_memory(self) -> None:
492
- """Clear the conversation memory."""
493
- self._sdk.clear_history()
494
-
495
- def get_conversation(self) -> list:
496
- """Get the conversation history in a readable format."""
497
- return self._sdk.get_formatted_history()
498
-
499
-
500
- # Convenience functions for one-off usage
501
- async def quick_chat(
502
- message: str, system_prompt: Optional[str] = None, assistant_name: str = "gaia"
503
- ) -> str:
504
- """
505
- Quick one-off text chat with conversation memory.
506
-
507
- Args:
508
- message: Message to send
509
- system_prompt: Optional system prompt
510
- assistant_name: Name to use for the assistant
511
-
512
- Returns:
513
- AI response
514
- """
515
- config = TalkConfig(
516
- system_prompt=system_prompt,
517
- assistant_name=assistant_name,
518
- enable_tts=False,
519
- logging_level="WARNING",
520
- max_history_length=2, # Small history for quick chat
521
- )
522
- sdk = TalkSDK(config)
523
- response = await sdk.chat(message)
524
- return response.text
525
-
526
-
527
- async def quick_voice_chat(
528
- system_prompt: Optional[str] = None, assistant_name: str = "gaia"
529
- ) -> None:
530
- """
531
- Quick one-off voice chat session with conversation memory.
532
-
533
- Args:
534
- system_prompt: Optional system prompt
535
- assistant_name: Name to use for the assistant
536
- """
537
- simple = SimpleTalk(system_prompt=system_prompt, assistant_name=assistant_name)
538
- await simple.voice_chat()
1
+ #!/usr/bin/env python3
2
+ # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """
6
+ Gaia Talk SDK - Unified voice and text chat integration
7
+ """
8
+
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from enum import Enum
12
+ from typing import Any, AsyncGenerator, Callable, Dict, Optional
13
+
14
+ from gaia.audio.audio_client import AudioClient
15
+ from gaia.chat.sdk import ChatConfig, ChatSDK
16
+ from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
17
+ from gaia.logger import get_logger
18
+
19
+
20
+ class TalkMode(Enum):
21
+ """Talk mode options."""
22
+
23
+ TEXT_ONLY = "text_only"
24
+ VOICE_ONLY = "voice_only"
25
+ VOICE_AND_TEXT = "voice_and_text"
26
+
27
+
28
+ @dataclass
29
+ class TalkConfig:
30
+ """Configuration for TalkSDK."""
31
+
32
+ # Voice-specific settings
33
+ whisper_model_size: str = "base"
34
+ audio_device_index: Optional[int] = None # Use default input device
35
+ silence_threshold: float = 0.5
36
+ enable_tts: bool = True
37
+ mode: TalkMode = TalkMode.VOICE_AND_TEXT
38
+
39
+ # Chat settings (from ChatConfig)
40
+ model: str = DEFAULT_MODEL_NAME
41
+ max_tokens: int = 512
42
+ system_prompt: Optional[str] = None
43
+ max_history_length: int = 4 # Number of conversation pairs to keep
44
+ assistant_name: str = "gaia"
45
+
46
+ # General settings
47
+ use_claude: bool = False # Use Claude API
48
+ use_chatgpt: bool = False # Use ChatGPT/OpenAI API
49
+ show_stats: bool = False
50
+ logging_level: str = "INFO"
51
+
52
+ # RAG settings (optional - for document Q&A)
53
+ rag_documents: Optional[list] = None # PDF documents to index
54
+
55
+
56
+ @dataclass
57
+ class TalkResponse:
58
+ """Response from talk operations."""
59
+
60
+ text: str
61
+ stats: Optional[Dict[str, Any]] = None
62
+ is_complete: bool = True
63
+
64
+
65
+ class TalkSDK:
66
+ """
67
+ Gaia Talk SDK - Unified voice and text chat integration.
68
+
69
+ This SDK provides a simple interface for integrating Gaia's voice and text
70
+ chat capabilities into applications.
71
+
72
+ Example usage:
73
+ ```python
74
+ from gaia.talk.sdk import TalkSDK, TalkConfig
75
+
76
+ # Create SDK instance
77
+ config = TalkConfig(enable_tts=True, show_stats=True)
78
+ talk = TalkSDK(config)
79
+
80
+ # Text chat
81
+ response = await talk.chat("Hello, how are you?")
82
+ print(response.text)
83
+
84
+ # Streaming chat
85
+ async for chunk in talk.chat_stream("Tell me a story"):
86
+ print(chunk.text, end="", flush=True)
87
+
88
+ # Voice chat with document Q&A
89
+ talk_rag = TalkSDK(TalkConfig(enable_tts=True))
90
+ talk_rag.enable_rag(documents=["manual.pdf", "guide.pdf"])
91
+ await talk_rag.start_voice_session()
92
+
93
+ # Voice chat with callback
94
+ def on_voice_input(text):
95
+ print(f"User said: {text}")
96
+
97
+ await talk.start_voice_session(on_voice_input)
98
+ ```
99
+ """
100
+
101
+ def __init__(self, config: Optional[TalkConfig] = None):
102
+ """
103
+ Initialize the TalkSDK.
104
+
105
+ Args:
106
+ config: Configuration options. If None, uses defaults.
107
+ """
108
+ self.config = config or TalkConfig()
109
+ self.log = get_logger(__name__)
110
+ self.log.setLevel(getattr(logging, self.config.logging_level))
111
+
112
+ # Initialize ChatSDK for text generation with conversation history
113
+ chat_config = ChatConfig(
114
+ model=self.config.model,
115
+ max_tokens=self.config.max_tokens,
116
+ system_prompt=self.config.system_prompt,
117
+ max_history_length=self.config.max_history_length,
118
+ assistant_name=self.config.assistant_name,
119
+ show_stats=self.config.show_stats,
120
+ logging_level=self.config.logging_level,
121
+ use_claude=self.config.use_claude,
122
+ use_chatgpt=self.config.use_chatgpt,
123
+ )
124
+ self.chat_sdk = ChatSDK(chat_config)
125
+
126
+ # Initialize AudioClient with configuration (for voice features)
127
+ self.audio_client = AudioClient(
128
+ whisper_model_size=self.config.whisper_model_size,
129
+ audio_device_index=self.config.audio_device_index,
130
+ silence_threshold=self.config.silence_threshold,
131
+ enable_tts=self.config.enable_tts,
132
+ logging_level=self.config.logging_level,
133
+ use_claude=self.config.use_claude,
134
+ use_chatgpt=self.config.use_chatgpt,
135
+ system_prompt=self.config.system_prompt,
136
+ )
137
+
138
+ self.show_stats = self.config.show_stats
139
+ self._voice_session_active = False
140
+
141
+ # Enable RAG if documents are provided
142
+ if self.config.rag_documents:
143
+ self.enable_rag(documents=self.config.rag_documents)
144
+
145
+ self.log.info("TalkSDK initialized with ChatSDK integration")
146
+
147
+ async def chat(self, message: str) -> TalkResponse:
148
+ """
149
+ Send a text message and get a complete response.
150
+
151
+ Args:
152
+ message: The message to send
153
+
154
+ Returns:
155
+ TalkResponse with the complete response
156
+ """
157
+ try:
158
+ # Use ChatSDK for text generation (with conversation history)
159
+ chat_response = self.chat_sdk.send(message)
160
+
161
+ stats = None
162
+ if self.show_stats:
163
+ stats = chat_response.stats or self.get_stats()
164
+
165
+ return TalkResponse(text=chat_response.text, stats=stats, is_complete=True)
166
+
167
+ except Exception as e:
168
+ self.log.error(f"Error in chat: {e}")
169
+ raise
170
+
171
+ async def chat_stream(self, message: str) -> AsyncGenerator[TalkResponse, None]:
172
+ """
173
+ Send a text message and get a streaming response.
174
+
175
+ Args:
176
+ message: The message to send
177
+
178
+ Yields:
179
+ TalkResponse chunks as they arrive
180
+ """
181
+ try:
182
+ # Use ChatSDK for streaming text generation (with conversation history)
183
+ for chat_chunk in self.chat_sdk.send_stream(message):
184
+ if not chat_chunk.is_complete:
185
+ yield TalkResponse(text=chat_chunk.text, is_complete=False)
186
+ else:
187
+ # Final chunk with stats
188
+ stats = chat_chunk.stats if self.show_stats else None
189
+ yield TalkResponse(text="", stats=stats, is_complete=True)
190
+
191
+ except Exception as e:
192
+ self.log.error(f"Error in chat_stream: {e}")
193
+ raise
194
+
195
+ async def process_voice_input(self, text: str) -> TalkResponse:
196
+ """
197
+ Process voice input text through the complete voice pipeline.
198
+
199
+ This includes TTS output if enabled.
200
+
201
+ Args:
202
+ text: The transcribed voice input
203
+
204
+ Returns:
205
+ TalkResponse with the processed response
206
+ """
207
+ try:
208
+ # Use ChatSDK to generate response (with conversation history)
209
+ chat_response = self.chat_sdk.send(text)
210
+
211
+ # If TTS is enabled, speak the response
212
+ if self.config.enable_tts and getattr(self.audio_client, "tts", None):
213
+ await self.audio_client.speak_text(chat_response.text)
214
+
215
+ stats = None
216
+ if self.show_stats:
217
+ stats = chat_response.stats or self.get_stats()
218
+
219
+ return TalkResponse(text=chat_response.text, stats=stats, is_complete=True)
220
+
221
+ except Exception as e:
222
+ self.log.error(f"Error processing voice input: {e}")
223
+ raise
224
+
225
+ async def start_voice_session(
226
+ self,
227
+ on_voice_input: Optional[Callable[[str], None]] = None,
228
+ ) -> None:
229
+ """
230
+ Start an interactive voice session.
231
+
232
+ Args:
233
+ on_voice_input: Optional callback called when voice input is detected
234
+ """
235
+ try:
236
+ self._voice_session_active = True
237
+
238
+ # Initialize TTS if enabled
239
+ self.audio_client.initialize_tts()
240
+
241
+ # Create voice processor that uses ChatSDK for responses
242
+ async def voice_processor(text: str):
243
+ # Call user callback if provided
244
+ if on_voice_input:
245
+ on_voice_input(text)
246
+
247
+ # Use ChatSDK to generate response (with conversation history)
248
+ chat_response = self.chat_sdk.send(text)
249
+
250
+ # If TTS is enabled, speak the response
251
+ if self.config.enable_tts and getattr(self.audio_client, "tts", None):
252
+ await self.audio_client.speak_text(chat_response.text)
253
+
254
+ # Print the response for user feedback
255
+ print(f"{self.config.assistant_name.title()}: {chat_response.text}")
256
+
257
+ # Show stats if enabled
258
+ if self.show_stats and chat_response.stats:
259
+ print(f"Stats: {chat_response.stats}")
260
+
261
+ # Start voice chat session with our processor
262
+ await self.audio_client.start_voice_chat(voice_processor)
263
+
264
+ except KeyboardInterrupt:
265
+ self.log.info("Voice session interrupted by user")
266
+ except Exception as e:
267
+ self.log.error(f"Error in voice session: {e}")
268
+ raise
269
+ finally:
270
+ self._voice_session_active = False
271
+ self.log.info("Voice chat session ended")
272
+
273
+ async def halt_generation(self) -> None:
274
+ """Halt the current LLM generation."""
275
+ try:
276
+ await self.audio_client.halt_generation()
277
+ except Exception as e:
278
+ self.log.error(f"Error halting generation: {e}")
279
+ raise
280
+
281
+ def get_stats(self) -> Dict[str, Any]:
282
+ """
283
+ Get performance statistics.
284
+
285
+ Returns:
286
+ Dictionary of performance stats
287
+ """
288
+ try:
289
+ # Get stats from ChatSDK instead of directly from LLMClient
290
+ return self.chat_sdk.get_stats()
291
+ except Exception as e:
292
+ self.log.warning(f"Failed to get stats: {e}")
293
+ return {}
294
+
295
+ def update_config(self, **kwargs) -> None:
296
+ """
297
+ Update configuration dynamically.
298
+
299
+ Args:
300
+ **kwargs: Configuration parameters to update
301
+ """
302
+ # Update our config
303
+ for key, value in kwargs.items():
304
+ if hasattr(self.config, key):
305
+ setattr(self.config, key, value)
306
+
307
+ # Update show_stats
308
+ if "show_stats" in kwargs:
309
+ self.show_stats = kwargs["show_stats"]
310
+
311
+ # Update AudioClient configuration
312
+ if "silence_threshold" in kwargs:
313
+ self.audio_client.silence_threshold = kwargs["silence_threshold"]
314
+
315
+ # Update ChatSDK configuration
316
+ chat_updates = {}
317
+ if "system_prompt" in kwargs:
318
+ chat_updates["system_prompt"] = kwargs["system_prompt"]
319
+ # Also update AudioClient's system prompt for consistency
320
+ self.audio_client.llm_client.system_prompt = kwargs["system_prompt"]
321
+ if "max_tokens" in kwargs:
322
+ chat_updates["max_tokens"] = kwargs["max_tokens"]
323
+ if "max_history_length" in kwargs:
324
+ chat_updates["max_history_length"] = kwargs["max_history_length"]
325
+ if "assistant_name" in kwargs:
326
+ chat_updates["assistant_name"] = kwargs["assistant_name"]
327
+
328
+ if chat_updates:
329
+ self.chat_sdk.update_config(**chat_updates)
330
+
331
+ def clear_history(self) -> None:
332
+ """Clear the conversation history."""
333
+ self.chat_sdk.clear_history()
334
+ self.log.debug("Conversation history cleared")
335
+
336
+ def get_history(self) -> list:
337
+ """Get the current conversation history."""
338
+ return self.chat_sdk.get_history()
339
+
340
+ def get_formatted_history(self) -> list:
341
+ """Get the conversation history in structured format."""
342
+ return self.chat_sdk.get_formatted_history()
343
+
344
+ def enable_rag(self, documents: Optional[list] = None, **rag_kwargs) -> bool:
345
+ """
346
+ Enable RAG (Retrieval-Augmented Generation) for document-based voice/text chat.
347
+
348
+ Args:
349
+ documents: List of PDF file paths to index
350
+ **rag_kwargs: Additional RAG configuration options
351
+
352
+ Returns:
353
+ True if RAG was successfully enabled
354
+ """
355
+ try:
356
+ self.chat_sdk.enable_rag(documents=documents, **rag_kwargs)
357
+ self.log.info(
358
+ f"RAG enabled with {len(documents) if documents else 0} documents"
359
+ )
360
+ return True
361
+ except ImportError:
362
+ self.log.warning(
363
+ 'RAG dependencies not available. Install with: uv pip install -e ".[rag]"'
364
+ )
365
+ return False
366
+ except Exception as e:
367
+ self.log.error(f"Failed to enable RAG: {e}")
368
+ return False
369
+
370
+ def disable_rag(self) -> None:
371
+ """Disable RAG functionality."""
372
+ self.chat_sdk.disable_rag()
373
+ self.log.info("RAG disabled")
374
+
375
+ def add_document(self, document_path: str) -> bool:
376
+ """
377
+ Add a document to the RAG index.
378
+
379
+ Args:
380
+ document_path: Path to PDF file to index
381
+
382
+ Returns:
383
+ True if document was successfully added
384
+ """
385
+ if not self.chat_sdk.rag_enabled:
386
+ self.log.warning("RAG not enabled. Call enable_rag() first.")
387
+ return False
388
+
389
+ try:
390
+ return self.chat_sdk.add_document(document_path)
391
+ except Exception as e:
392
+ self.log.error(f"Failed to add document {document_path}: {e}")
393
+ return False
394
+
395
+ @property
396
+ def is_voice_session_active(self) -> bool:
397
+ """Check if a voice session is currently active."""
398
+ return self._voice_session_active
399
+
400
+ @property
401
+ def audio_devices(self) -> list:
402
+ """Get list of available audio input devices."""
403
+ try:
404
+ from gaia.audio.audio_recorder import AudioRecorder
405
+
406
+ recorder = AudioRecorder()
407
+ return recorder.list_audio_devices()
408
+ except Exception as e:
409
+ self.log.error(f"Error listing audio devices: {e}")
410
+ return []
411
+
412
+
413
+ class SimpleTalk:
414
+ """
415
+ Ultra-simple interface for quick integration.
416
+
417
+ Example usage:
418
+ ```python
419
+ from gaia.talk.sdk import SimpleTalk
420
+
421
+ talk = SimpleTalk()
422
+
423
+ # Simple text chat
424
+ response = await talk.ask("What's the weather like?")
425
+ print(response)
426
+
427
+ # Simple voice chat
428
+ await talk.voice_chat() # Starts interactive session
429
+ ```
430
+ """
431
+
432
+ def __init__(
433
+ self,
434
+ system_prompt: Optional[str] = None,
435
+ enable_tts: bool = True,
436
+ assistant_name: str = "gaia",
437
+ ):
438
+ """
439
+ Initialize SimpleTalk with minimal configuration.
440
+
441
+ Args:
442
+ system_prompt: Optional system prompt for the AI
443
+ enable_tts: Whether to enable text-to-speech
444
+ assistant_name: Name to use for the assistant
445
+ """
446
+ config = TalkConfig(
447
+ system_prompt=system_prompt,
448
+ enable_tts=enable_tts,
449
+ assistant_name=assistant_name,
450
+ show_stats=False,
451
+ logging_level="WARNING", # Minimal logging
452
+ )
453
+ self._sdk = TalkSDK(config)
454
+
455
+ async def ask(self, question: str) -> str:
456
+ """
457
+ Ask a question and get a text response.
458
+
459
+ Args:
460
+ question: The question to ask
461
+
462
+ Returns:
463
+ The AI's response as a string
464
+ """
465
+ response = await self._sdk.chat(question)
466
+ return response.text
467
+
468
+ async def ask_stream(self, question: str):
469
+ """
470
+ Ask a question and get a streaming response.
471
+
472
+ Args:
473
+ question: The question to ask
474
+
475
+ Yields:
476
+ Response chunks as they arrive
477
+ """
478
+ async for chunk in self._sdk.chat_stream(question):
479
+ if not chunk.is_complete:
480
+ yield chunk.text
481
+
482
+ async def voice_chat(self) -> None:
483
+ """Start an interactive voice chat session."""
484
+ print("Starting voice chat... Say 'stop' to quit or press Ctrl+C")
485
+
486
+ def on_voice_input(text: str):
487
+ print(f"You: {text}")
488
+
489
+ await self._sdk.start_voice_session(on_voice_input)
490
+
491
+ def clear_memory(self) -> None:
492
+ """Clear the conversation memory."""
493
+ self._sdk.clear_history()
494
+
495
+ def get_conversation(self) -> list:
496
+ """Get the conversation history in a readable format."""
497
+ return self._sdk.get_formatted_history()
498
+
499
+
500
+ # Convenience functions for one-off usage
501
+ async def quick_chat(
502
+ message: str, system_prompt: Optional[str] = None, assistant_name: str = "gaia"
503
+ ) -> str:
504
+ """
505
+ Quick one-off text chat with conversation memory.
506
+
507
+ Args:
508
+ message: Message to send
509
+ system_prompt: Optional system prompt
510
+ assistant_name: Name to use for the assistant
511
+
512
+ Returns:
513
+ AI response
514
+ """
515
+ config = TalkConfig(
516
+ system_prompt=system_prompt,
517
+ assistant_name=assistant_name,
518
+ enable_tts=False,
519
+ logging_level="WARNING",
520
+ max_history_length=2, # Small history for quick chat
521
+ )
522
+ sdk = TalkSDK(config)
523
+ response = await sdk.chat(message)
524
+ return response.text
525
+
526
+
527
+ async def quick_voice_chat(
528
+ system_prompt: Optional[str] = None, assistant_name: str = "gaia"
529
+ ) -> None:
530
+ """
531
+ Quick one-off voice chat session with conversation memory.
532
+
533
+ Args:
534
+ system_prompt: Optional system prompt
535
+ assistant_name: Name to use for the assistant
536
+ """
537
+ simple = SimpleTalk(system_prompt=system_prompt, assistant_name=assistant_name)
538
+ await simple.voice_chat()