amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
gaia/llm/llm_client.py DELETED
@@ -1,723 +0,0 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
-
4
- # Standard library imports
5
- import logging
6
- import os
7
- import time
8
- from typing import (
9
- Any,
10
- Callable,
11
- Dict,
12
- Iterator,
13
- List,
14
- Literal,
15
- Optional,
16
- TypeVar,
17
- Union,
18
- )
19
-
20
- import httpx
21
-
22
- # Third-party imports
23
- import requests
24
- from dotenv import load_dotenv
25
- from openai import OpenAI
26
-
27
- from ..version import LEMONADE_VERSION
28
-
29
- # Local imports
30
- from .lemonade_client import DEFAULT_MODEL_NAME
31
-
32
- # Default Lemonade server URL (can be overridden via LEMONADE_BASE_URL env var)
33
- DEFAULT_LEMONADE_URL = "http://localhost:8000/api/v1"
34
-
35
- # Type variable for retry decorator
36
- T = TypeVar("T")
37
-
38
- # Conditional import for Claude
39
- try:
40
- from ..eval.claude import ClaudeClient as AnthropicClaudeClient
41
-
42
- CLAUDE_AVAILABLE = True
43
- except ImportError:
44
- CLAUDE_AVAILABLE = False
45
-
46
- # Set up logging
47
- logging.basicConfig(level=logging.INFO)
48
- logger = logging.getLogger(__name__)
49
- logger.setLevel(logging.INFO) # Explicitly set module logger level
50
-
51
- # Load environment variables from .env file
52
- load_dotenv()
53
-
54
-
55
- class LLMClient:
56
- def __init__(
57
- self,
58
- use_claude: bool = False,
59
- use_openai: bool = False,
60
- system_prompt: Optional[str] = None,
61
- base_url: Optional[str] = None,
62
- claude_model: str = "claude-sonnet-4-20250514",
63
- max_retries: int = 3,
64
- retry_base_delay: float = 1.0,
65
- ):
66
- """
67
- Initialize the LLM client.
68
-
69
- Args:
70
- use_claude: If True, uses Anthropic Claude API.
71
- use_openai: If True, uses OpenAI ChatGPT API.
72
- system_prompt: Default system prompt to use for all generation requests.
73
- base_url: Base URL for local LLM server (defaults to LEMONADE_BASE_URL env var).
74
- claude_model: Claude model to use (e.g., "claude-sonnet-4-20250514").
75
- max_retries: Maximum number of retry attempts on connection errors.
76
- retry_base_delay: Base delay in seconds for exponential backoff.
77
-
78
- Note: Uses local LLM server by default unless use_claude or use_openai is True.
79
- Context size is configured when starting the Lemonade server with --ctx-size parameter.
80
- """
81
- # Use provided base_url, fall back to env var, then default
82
- if base_url is None:
83
- base_url = os.getenv("LEMONADE_BASE_URL", DEFAULT_LEMONADE_URL)
84
-
85
- # Normalize base_url to ensure it has the /api/v1 suffix for Lemonade server
86
- # This allows users to specify just "http://localhost:8000" for convenience
87
- if base_url and not base_url.endswith("/api/v1"):
88
- # Remove trailing slash if present
89
- base_url = base_url.rstrip("/")
90
- # Add /api/v1 if the URL looks like a Lemonade server (localhost or IP with port)
91
- # but doesn't already have a path beyond the port
92
- from urllib.parse import urlparse
93
-
94
- parsed = urlparse(base_url)
95
- # Only add /api/v1 if path is empty or just "/"
96
- if not parsed.path or parsed.path == "/":
97
- base_url = f"{base_url}/api/v1"
98
- logger.debug(f"Normalized base_url to: {base_url}")
99
-
100
- # Compute use_local: True if neither claude nor openai is selected
101
- use_local = not (use_claude or use_openai)
102
-
103
- logger.debug(
104
- f"Initializing LLMClient with use_local={use_local}, use_claude={use_claude}, use_openai={use_openai}, base_url={base_url}"
105
- )
106
-
107
- self.use_claude = use_claude
108
- self.use_openai = use_openai
109
- self.base_url = base_url
110
- self.system_prompt = system_prompt
111
- self.max_retries = max_retries
112
- self.retry_base_delay = retry_base_delay
113
-
114
- if use_local:
115
- # Configure timeout for local LLM server
116
- # For streaming: timeout between chunks (read timeout)
117
- # For non-streaming: total timeout for the entire response
118
- self.client = OpenAI(
119
- base_url=base_url,
120
- api_key="None",
121
- timeout=httpx.Timeout(
122
- connect=15.0, # 15 seconds to establish connection
123
- read=120.0, # 120 seconds between data chunks (matches Lemonade DEFAULT_REQUEST_TIMEOUT)
124
- write=15.0, # 15 seconds to send request
125
- pool=15.0, # 15 seconds to acquire connection from pool
126
- ),
127
- max_retries=0, # Disable retries to fail fast on connection issues
128
- )
129
- # Use completions endpoint for pre-formatted prompts (ChatSDK compatibility)
130
- # Use chat endpoint when messages array is explicitly provided
131
- self.endpoint = "completions"
132
- logger.debug("Using Lemonade completions endpoint")
133
- self.default_model = DEFAULT_MODEL_NAME
134
- self.claude_client = None
135
- logger.debug(f"Using local LLM with model={self.default_model}")
136
- elif use_claude and CLAUDE_AVAILABLE:
137
- # Use Claude API
138
- self.claude_client = AnthropicClaudeClient(model=claude_model)
139
- self.client = None
140
- self.endpoint = "claude"
141
- self.default_model = claude_model
142
- logger.debug(f"Using Claude API with model={self.default_model}")
143
- elif use_claude and not CLAUDE_AVAILABLE:
144
- raise ValueError(
145
- "Claude support requested but anthropic library not available. Install with: uv pip install anthropic"
146
- )
147
- elif use_openai:
148
- # Use OpenAI API
149
- api_key = os.getenv("OPENAI_API_KEY")
150
- if not api_key:
151
- raise ValueError(
152
- "OPENAI_API_KEY not found in environment variables. Please add it to your .env file."
153
- )
154
- self.client = OpenAI(api_key=api_key)
155
- self.claude_client = None
156
- self.endpoint = "openai"
157
- self.default_model = "gpt-4o" # Updated to latest model
158
- logger.debug(f"Using OpenAI API with model={self.default_model}")
159
- else:
160
- # This should not happen with the new logic, but keep as fallback
161
- raise ValueError("Invalid LLM provider configuration")
162
- if system_prompt:
163
- logger.debug(f"System prompt set: {system_prompt[:100]}...")
164
-
165
- def _retry_with_exponential_backoff(
166
- self,
167
- func: Callable[..., T],
168
- *args,
169
- **kwargs,
170
- ) -> T:
171
- """
172
- Execute a function with exponential backoff retry on connection errors.
173
-
174
- Args:
175
- func: The function to execute
176
- *args: Positional arguments for the function
177
- **kwargs: Keyword arguments for the function
178
-
179
- Returns:
180
- The result of the function call
181
-
182
- Raises:
183
- The last exception if all retries are exhausted
184
- """
185
- delay = self.retry_base_delay
186
- max_delay = 60.0
187
- exponential_base = 2.0
188
-
189
- for attempt in range(self.max_retries + 1):
190
- try:
191
- return func(*args, **kwargs)
192
- except (
193
- ConnectionError,
194
- httpx.ConnectError,
195
- httpx.TimeoutException,
196
- httpx.NetworkError,
197
- requests.exceptions.ConnectionError,
198
- requests.exceptions.Timeout,
199
- ) as e:
200
- if attempt == self.max_retries:
201
- logger.error(
202
- f"Max retries ({self.max_retries}) reached for {func.__name__}. "
203
- f"Last error: {str(e)}"
204
- )
205
- raise
206
-
207
- # Calculate next delay with exponential backoff
208
- wait_time = min(delay, max_delay)
209
- logger.warning(
210
- f"Connection error in {func.__name__} (attempt {attempt + 1}/{self.max_retries + 1}): {str(e)}. "
211
- f"Retrying in {wait_time:.1f}s..."
212
- )
213
-
214
- time.sleep(wait_time)
215
- delay *= exponential_base
216
-
217
- def generate(
218
- self,
219
- prompt: str,
220
- model: Optional[str] = None,
221
- endpoint: Optional[Literal["completions", "chat", "claude", "openai"]] = None,
222
- system_prompt: Optional[str] = None,
223
- stream: bool = False,
224
- messages: Optional[List[Dict[str, str]]] = None,
225
- **kwargs: Any,
226
- ) -> Union[str, Iterator[str]]:
227
- """
228
- Generate a response from the LLM.
229
-
230
- Args:
231
- prompt: The user prompt/query to send to the LLM. For chat endpoint,
232
- if messages is not provided, this is treated as a pre-formatted
233
- prompt string that already contains the full conversation.
234
- model: The model to use (defaults to endpoint-appropriate model)
235
- endpoint: Override the endpoint to use (completions, chat, claude, or openai)
236
- system_prompt: System prompt to use for this specific request (overrides default)
237
- stream: If True, returns a generator that yields chunks of the response as they become available
238
- messages: Optional list of message dicts with 'role' and 'content' keys.
239
- If provided, these are used directly for chat completions instead of prompt.
240
- **kwargs: Additional parameters to pass to the API
241
-
242
- Returns:
243
- If stream=False: The complete generated text as a string
244
- If stream=True: A generator yielding chunks of the response as they become available
245
- """
246
- model = model or self.default_model
247
- endpoint_to_use = endpoint or self.endpoint
248
- logger.debug(
249
- f"LLMClient.generate() called with model={model}, endpoint={endpoint_to_use}, stream={stream}"
250
- )
251
-
252
- # Use provided system_prompt, fall back to instance default if not provided
253
- effective_system_prompt = (
254
- system_prompt if system_prompt is not None else self.system_prompt
255
- )
256
- logger.debug(
257
- f"Using system prompt: {effective_system_prompt[:100] if effective_system_prompt else 'None'}..."
258
- )
259
-
260
- if endpoint_to_use == "claude":
261
- # For Claude API, construct the prompt appropriately
262
- if effective_system_prompt:
263
- # Claude handles system prompts differently in messages format
264
- full_prompt = f"System: {effective_system_prompt}\n\nHuman: {prompt}"
265
- else:
266
- full_prompt = prompt
267
-
268
- logger.debug(f"Using Claude API with prompt: {full_prompt[:200]}...")
269
-
270
- try:
271
- if stream:
272
- logger.warning(
273
- "Streaming not yet implemented for Claude API, falling back to non-streaming"
274
- )
275
-
276
- # Use Claude client with retry logic
277
- logger.debug("Making request to Claude API")
278
-
279
- # Use retry logic for the API call
280
- result = self._retry_with_exponential_backoff(
281
- self.claude_client.get_completion, full_prompt
282
- )
283
-
284
- # Claude returns a list of content blocks, extract text
285
- if isinstance(result, list) and len(result) > 0:
286
- # Each content block has a 'text' attribute
287
- text_parts = []
288
- for content_block in result:
289
- if hasattr(content_block, "text"):
290
- text_parts.append(content_block.text)
291
- else:
292
- text_parts.append(str(content_block))
293
- result = "".join(text_parts)
294
- elif isinstance(result, str):
295
- pass # result is already a string
296
- else:
297
- result = str(result)
298
-
299
- # Check for empty responses
300
- if not result or not result.strip():
301
- logger.warning("Empty response from Claude API")
302
-
303
- # Debug: log the response structure for troubleshooting
304
- logger.debug(f"Claude response length: {len(result)}")
305
- logger.debug(f"Claude response preview: {result[:300]}...")
306
-
307
- # Claude sometimes returns valid JSON followed by additional text
308
- # Try to extract just the JSON part if it exists
309
- result = self._clean_claude_response(result)
310
-
311
- return result
312
- except Exception as e:
313
- logger.error(f"Error generating response from Claude API: {str(e)}")
314
- raise
315
- elif endpoint_to_use == "completions":
316
- # For local LLM with pre-formatted prompts (ChatSDK uses this)
317
- # The prompt already contains the full formatted conversation
318
- logger.debug(
319
- f"Using completions endpoint: prompt_length={len(prompt)} chars"
320
- )
321
-
322
- try:
323
- # Use retry logic for the API call
324
- response = self._retry_with_exponential_backoff(
325
- self.client.completions.create,
326
- model=model,
327
- prompt=prompt,
328
- temperature=0.1,
329
- stream=stream,
330
- **kwargs,
331
- )
332
-
333
- if stream:
334
- # Return a generator that yields chunks
335
- def stream_generator():
336
- for chunk in response:
337
- if (
338
- hasattr(chunk.choices[0], "text")
339
- and chunk.choices[0].text
340
- ):
341
- yield chunk.choices[0].text
342
-
343
- return stream_generator()
344
- else:
345
- # Return the complete response
346
- result = response.choices[0].text
347
- if not result or not result.strip():
348
- logger.warning("Empty response from local LLM")
349
- return result
350
- except (
351
- httpx.ConnectError,
352
- httpx.TimeoutException,
353
- httpx.NetworkError,
354
- ) as e:
355
- logger.error(f"Network error connecting to local LLM server: {str(e)}")
356
- error_msg = f"LLM Server Connection Error: {str(e)}"
357
- raise ConnectionError(error_msg) from e
358
- except Exception as e:
359
- error_str = str(e)
360
- logger.error(f"Error generating response from local LLM: {error_str}")
361
-
362
- if "404" in error_str:
363
- if (
364
- "endpoint" in error_str.lower()
365
- or "not found" in error_str.lower()
366
- ):
367
- raise ConnectionError(
368
- f"API endpoint error: {error_str}\n\n"
369
- f"This may indicate:\n"
370
- f" 1. Lemonade Server version mismatch (try updating to {LEMONADE_VERSION})\n"
371
- f" 2. Model not properly loaded or corrupted\n"
372
- ) from e
373
-
374
- if "network" in error_str.lower() or "connection" in error_str.lower():
375
- raise ConnectionError(f"LLM Server Error: {error_str}") from e
376
- raise
377
- elif endpoint_to_use == "chat":
378
- # For local LLM using chat completions format (Lemonade v9+)
379
- if messages:
380
- # Use provided messages directly (proper chat history support)
381
- chat_messages = list(messages)
382
- # Prepend system prompt if provided and not already in messages
383
- if effective_system_prompt and (
384
- not chat_messages or chat_messages[0].get("role") != "system"
385
- ):
386
- chat_messages.insert(
387
- 0, {"role": "system", "content": effective_system_prompt}
388
- )
389
- else:
390
- # Treat prompt as pre-formatted string (legacy ChatSDK support)
391
- # Pass as single user message - the prompt already contains formatted history
392
- chat_messages = []
393
- if effective_system_prompt:
394
- chat_messages.append(
395
- {"role": "system", "content": effective_system_prompt}
396
- )
397
- chat_messages.append({"role": "user", "content": prompt})
398
- logger.debug(
399
- f"Using chat completions for local LLM: {len(chat_messages)} messages"
400
- )
401
-
402
- try:
403
- # Use retry logic for the API call
404
- response = self._retry_with_exponential_backoff(
405
- self.client.chat.completions.create,
406
- model=model,
407
- messages=chat_messages,
408
- temperature=0.1,
409
- stream=stream,
410
- **kwargs,
411
- )
412
-
413
- if stream:
414
- # Return a generator that yields chunks
415
- def stream_generator():
416
- for chunk in response:
417
- if (
418
- hasattr(chunk.choices[0].delta, "content")
419
- and chunk.choices[0].delta.content
420
- ):
421
- yield chunk.choices[0].delta.content
422
-
423
- return stream_generator()
424
- else:
425
- # Return the complete response
426
- result = response.choices[0].message.content
427
- if not result or not result.strip():
428
- logger.warning("Empty response from local LLM")
429
- return result
430
- except (
431
- httpx.ConnectError,
432
- httpx.TimeoutException,
433
- httpx.NetworkError,
434
- ) as e:
435
- logger.error(f"Network error connecting to local LLM server: {str(e)}")
436
- error_msg = f"LLM Server Connection Error: {str(e)}"
437
- raise ConnectionError(error_msg) from e
438
- except Exception as e:
439
- error_str = str(e)
440
- logger.error(f"Error generating response from local LLM: {error_str}")
441
-
442
- # Check for 404 errors which might indicate endpoint or model issues
443
- if "404" in error_str:
444
- if (
445
- "endpoint" in error_str.lower()
446
- or "not found" in error_str.lower()
447
- ):
448
- raise ConnectionError(
449
- f"API endpoint error: {error_str}\n\n"
450
- f"This may indicate:\n"
451
- f" 1. Lemonade Server version mismatch (try updating to {LEMONADE_VERSION})\n"
452
- f" 2. Model not properly loaded or corrupted\n"
453
- ) from e
454
-
455
- if "network" in error_str.lower() or "connection" in error_str.lower():
456
- raise ConnectionError(f"LLM Server Error: {error_str}") from e
457
- raise
458
- elif endpoint_to_use == "openai":
459
- # For OpenAI API, use the messages format
460
- messages = []
461
- if effective_system_prompt:
462
- messages.append({"role": "system", "content": effective_system_prompt})
463
- messages.append({"role": "user", "content": prompt})
464
- logger.debug(f"OpenAI API messages: {messages}")
465
-
466
- try:
467
- # Use retry logic for the API call
468
- response = self._retry_with_exponential_backoff(
469
- self.client.chat.completions.create,
470
- model=model,
471
- messages=messages,
472
- stream=stream,
473
- **kwargs,
474
- )
475
-
476
- if stream:
477
- # Return a generator that yields chunks
478
- def stream_generator():
479
- for chunk in response:
480
- if (
481
- hasattr(chunk.choices[0].delta, "content")
482
- and chunk.choices[0].delta.content
483
- ):
484
- yield chunk.choices[0].delta.content
485
-
486
- return stream_generator()
487
- else:
488
- # Return the complete response as before
489
- result = response.choices[0].message.content
490
- logger.debug(f"OpenAI API response: {result[:200]}...")
491
- return result
492
- except Exception as e:
493
- logger.error(f"Error generating response from OpenAI API: {str(e)}")
494
- raise
495
- else:
496
- raise ValueError(
497
- f"Unsupported endpoint: {endpoint_to_use}. Supported endpoints: 'completions', 'chat', 'claude', 'openai'."
498
- )
499
-
500
- def get_performance_stats(self) -> Dict[str, Any]:
501
- """
502
- Get performance statistics from the last LLM request.
503
-
504
- Returns:
505
- Dictionary containing performance statistics like:
506
- - time_to_first_token: Time in seconds until first token is generated
507
- - tokens_per_second: Rate of token generation
508
- - input_tokens: Number of tokens in the input
509
- - output_tokens: Number of tokens in the output
510
- """
511
- if not self.base_url:
512
- # Return empty stats if not using local LLM
513
- return {
514
- "time_to_first_token": None,
515
- "tokens_per_second": None,
516
- "input_tokens": None,
517
- "output_tokens": None,
518
- }
519
-
520
- try:
521
- # Use the Lemonade API v1 stats endpoint
522
- # This returns both timing stats and token counts
523
- stats_url = f"{self.base_url}/stats"
524
- response = requests.get(stats_url)
525
-
526
- if response.status_code == 200:
527
- stats = response.json()
528
- # Remove decode_token_times as it's too verbose
529
- if "decode_token_times" in stats:
530
- del stats["decode_token_times"]
531
- return stats
532
- else:
533
- logger.warning(
534
- f"Failed to get stats: {response.status_code} - {response.text}"
535
- )
536
- return {}
537
- except Exception as e:
538
- logger.warning(f"Error fetching performance stats: {str(e)}")
539
- return {}
540
-
541
- def is_generating(self) -> bool:
542
- """
543
- Check if the local LLM is currently generating.
544
-
545
- Returns:
546
- bool: True if generating, False otherwise
547
-
548
- Note:
549
- Only available when using local LLM (use_local=True).
550
- Returns False for OpenAI API usage.
551
- """
552
- if not self.base_url:
553
- logger.debug("is_generating(): Not using local LLM, returning False")
554
- return False
555
-
556
- try:
557
- # Check the generating endpoint
558
- # Remove /api/v1 suffix to access root-level endpoints
559
- base = self.base_url.replace("/api/v1", "")
560
- generating_url = f"{base}/generating"
561
- response = requests.get(generating_url)
562
- if response.status_code == 200:
563
- response_data = response.json()
564
- is_gen = response_data.get("is_generating", False)
565
- logger.debug(f"Generation status check: {is_gen}")
566
- return is_gen
567
- else:
568
- logger.warning(
569
- f"Failed to check generation status: {response.status_code} - {response.text}"
570
- )
571
- return False
572
- except Exception as e:
573
- logger.warning(f"Error checking generation status: {str(e)}")
574
- return False
575
-
576
- def halt_generation(self) -> bool:
577
- """
578
- Halt current generation on the local LLM server.
579
-
580
- Returns:
581
- bool: True if halt was successful, False otherwise
582
-
583
- Note:
584
- Only available when using local LLM (use_local=True).
585
- Does nothing for OpenAI API usage.
586
- """
587
- if not self.base_url:
588
- logger.debug("halt_generation(): Not using local LLM, nothing to halt")
589
- return False
590
-
591
- try:
592
- # Send halt request
593
- # Remove /api/v1 suffix to access root-level endpoints
594
- base = self.base_url.replace("/api/v1", "")
595
- halt_url = f"{base}/halt"
596
- response = requests.get(halt_url)
597
- if response.status_code == 200:
598
- logger.debug("Successfully halted current generation")
599
- return True
600
- else:
601
- logger.warning(
602
- f"Failed to halt generation: {response.status_code} - {response.text}"
603
- )
604
- return False
605
- except Exception as e:
606
- logger.warning(f"Error halting generation: {str(e)}")
607
- return False
608
-
609
- def _clean_claude_response(self, response: str) -> str:
610
- """
611
- Extract valid JSON from Claude responses that may contain extra content after the JSON.
612
-
613
- Args:
614
- response: The raw response from Claude API
615
-
616
- Returns:
617
- Cleaned response with only the JSON portion
618
- """
619
- import json
620
-
621
- if not response or not response.strip():
622
- return response
623
-
624
- # Try to parse as-is first
625
- try:
626
- json.loads(response.strip())
627
- return response.strip()
628
- except json.JSONDecodeError:
629
- pass
630
-
631
- # Look for JSON object patterns
632
- # Find the first { and try to extract a complete JSON object
633
- start_idx = response.find("{")
634
- if start_idx == -1:
635
- # No JSON object found, return as-is
636
- return response
637
-
638
- # Find the matching closing brace by counting braces
639
- brace_count = 0
640
- end_idx = -1
641
-
642
- for i in range(start_idx, len(response)):
643
- char = response[i]
644
- if char == "{":
645
- brace_count += 1
646
- elif char == "}":
647
- brace_count -= 1
648
- if brace_count == 0:
649
- end_idx = i
650
- break
651
-
652
- if end_idx == -1:
653
- # No complete JSON object found
654
- return response
655
-
656
- # Extract the JSON portion
657
- json_portion = response[start_idx : end_idx + 1]
658
-
659
- # Validate that it's valid JSON
660
- try:
661
- json.loads(json_portion)
662
- logger.debug(
663
- f"Extracted JSON from Claude response: {len(json_portion)} chars vs original {len(response)} chars"
664
- )
665
- return json_portion
666
- except json.JSONDecodeError:
667
- # If extracted portion is not valid JSON, return original
668
- logger.debug(
669
- "Could not extract valid JSON from Claude response, returning original"
670
- )
671
- return response
672
-
673
-
674
- def main():
675
- # Example usage with local LLM
676
- system_prompt = "You are a creative assistant who specializes in short stories."
677
-
678
- local_llm = LLMClient(system_prompt=system_prompt)
679
-
680
- # Non-streaming example
681
- result = local_llm.generate("Write a one-sentence bedtime story about a unicorn.")
682
- print(f"Local LLM response:\n{result}")
683
- print(f"Local LLM stats:\n{local_llm.get_performance_stats()}")
684
-
685
- # Halt functionality demo (only for local LLM)
686
- print(f"\nHalt functionality available: {local_llm.is_generating()}")
687
-
688
- # Streaming example
689
- print("\nLocal LLM streaming response:")
690
- for chunk in local_llm.generate(
691
- "Write a one-sentence bedtime story about a dragon.", stream=True
692
- ):
693
- print(chunk, end="", flush=True)
694
- print("\n")
695
-
696
- # Example usage with Claude API
697
- if CLAUDE_AVAILABLE:
698
- claude_llm = LLMClient(use_claude=True, system_prompt=system_prompt)
699
-
700
- # Non-streaming example
701
- result = claude_llm.generate(
702
- "Write a one-sentence bedtime story about a unicorn."
703
- )
704
- print(f"\nClaude API response:\n{result}")
705
-
706
- # Example usage with OpenAI API
707
- openai_llm = LLMClient(use_openai=True, system_prompt=system_prompt)
708
-
709
- # Non-streaming example
710
- result = openai_llm.generate("Write a one-sentence bedtime story about a unicorn.")
711
- print(f"\nOpenAI API response:\n{result}")
712
-
713
- # Streaming example
714
- print("\nOpenAI API streaming response:")
715
- for chunk in openai_llm.generate(
716
- "Write a one-sentence bedtime story about a dragon.", stream=True
717
- ):
718
- print(chunk, end="", flush=True)
719
- print("\n")
720
-
721
-
722
- if __name__ == "__main__":
723
- main()