amd-gaia 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5632
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -723
  180. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/llm/llm_client.py DELETED
@@ -1,723 +0,0 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
-
4
- # Standard library imports
5
- import logging
6
- import os
7
- import time
8
- from typing import (
9
- Any,
10
- Callable,
11
- Dict,
12
- Iterator,
13
- List,
14
- Literal,
15
- Optional,
16
- TypeVar,
17
- Union,
18
- )
19
-
20
- import httpx
21
-
22
- # Third-party imports
23
- import requests
24
- from dotenv import load_dotenv
25
- from openai import OpenAI
26
-
27
- from ..version import LEMONADE_VERSION
28
-
29
- # Local imports
30
- from .lemonade_client import DEFAULT_MODEL_NAME
31
-
32
- # Default Lemonade server URL (can be overridden via LEMONADE_BASE_URL env var)
33
- DEFAULT_LEMONADE_URL = "http://localhost:8000/api/v1"
34
-
35
- # Type variable for retry decorator
36
- T = TypeVar("T")
37
-
38
- # Conditional import for Claude
39
- try:
40
- from ..eval.claude import ClaudeClient as AnthropicClaudeClient
41
-
42
- CLAUDE_AVAILABLE = True
43
- except ImportError:
44
- CLAUDE_AVAILABLE = False
45
-
46
- # Set up logging
47
- logging.basicConfig(level=logging.INFO)
48
- logger = logging.getLogger(__name__)
49
- logger.setLevel(logging.INFO) # Explicitly set module logger level
50
-
51
- # Load environment variables from .env file
52
- load_dotenv()
53
-
54
-
55
- class LLMClient:
56
- def __init__(
57
- self,
58
- use_claude: bool = False,
59
- use_openai: bool = False,
60
- system_prompt: Optional[str] = None,
61
- base_url: Optional[str] = None,
62
- claude_model: str = "claude-sonnet-4-20250514",
63
- max_retries: int = 3,
64
- retry_base_delay: float = 1.0,
65
- ):
66
- """
67
- Initialize the LLM client.
68
-
69
- Args:
70
- use_claude: If True, uses Anthropic Claude API.
71
- use_openai: If True, uses OpenAI ChatGPT API.
72
- system_prompt: Default system prompt to use for all generation requests.
73
- base_url: Base URL for local LLM server (defaults to LEMONADE_BASE_URL env var).
74
- claude_model: Claude model to use (e.g., "claude-sonnet-4-20250514").
75
- max_retries: Maximum number of retry attempts on connection errors.
76
- retry_base_delay: Base delay in seconds for exponential backoff.
77
-
78
- Note: Uses local LLM server by default unless use_claude or use_openai is True.
79
- Context size is configured when starting the Lemonade server with --ctx-size parameter.
80
- """
81
- # Use provided base_url, fall back to env var, then default
82
- if base_url is None:
83
- base_url = os.getenv("LEMONADE_BASE_URL", DEFAULT_LEMONADE_URL)
84
-
85
- # Normalize base_url to ensure it has the /api/v1 suffix for Lemonade server
86
- # This allows users to specify just "http://localhost:8000" for convenience
87
- if base_url and not base_url.endswith("/api/v1"):
88
- # Remove trailing slash if present
89
- base_url = base_url.rstrip("/")
90
- # Add /api/v1 if the URL looks like a Lemonade server (localhost or IP with port)
91
- # but doesn't already have a path beyond the port
92
- from urllib.parse import urlparse
93
-
94
- parsed = urlparse(base_url)
95
- # Only add /api/v1 if path is empty or just "/"
96
- if not parsed.path or parsed.path == "/":
97
- base_url = f"{base_url}/api/v1"
98
- logger.debug(f"Normalized base_url to: {base_url}")
99
-
100
- # Compute use_local: True if neither claude nor openai is selected
101
- use_local = not (use_claude or use_openai)
102
-
103
- logger.debug(
104
- f"Initializing LLMClient with use_local={use_local}, use_claude={use_claude}, use_openai={use_openai}, base_url={base_url}"
105
- )
106
-
107
- self.use_claude = use_claude
108
- self.use_openai = use_openai
109
- self.base_url = base_url
110
- self.system_prompt = system_prompt
111
- self.max_retries = max_retries
112
- self.retry_base_delay = retry_base_delay
113
-
114
- if use_local:
115
- # Configure timeout for local LLM server
116
- # For streaming: timeout between chunks (read timeout)
117
- # For non-streaming: total timeout for the entire response
118
- self.client = OpenAI(
119
- base_url=base_url,
120
- api_key="None",
121
- timeout=httpx.Timeout(
122
- connect=15.0, # 15 seconds to establish connection
123
- read=120.0, # 120 seconds between data chunks (matches Lemonade DEFAULT_REQUEST_TIMEOUT)
124
- write=15.0, # 15 seconds to send request
125
- pool=15.0, # 15 seconds to acquire connection from pool
126
- ),
127
- max_retries=0, # Disable retries to fail fast on connection issues
128
- )
129
- # Use completions endpoint for pre-formatted prompts (ChatSDK compatibility)
130
- # Use chat endpoint when messages array is explicitly provided
131
- self.endpoint = "completions"
132
- logger.debug("Using Lemonade completions endpoint")
133
- self.default_model = DEFAULT_MODEL_NAME
134
- self.claude_client = None
135
- logger.debug(f"Using local LLM with model={self.default_model}")
136
- elif use_claude and CLAUDE_AVAILABLE:
137
- # Use Claude API
138
- self.claude_client = AnthropicClaudeClient(model=claude_model)
139
- self.client = None
140
- self.endpoint = "claude"
141
- self.default_model = claude_model
142
- logger.debug(f"Using Claude API with model={self.default_model}")
143
- elif use_claude and not CLAUDE_AVAILABLE:
144
- raise ValueError(
145
- "Claude support requested but anthropic library not available. Install with: uv pip install anthropic"
146
- )
147
- elif use_openai:
148
- # Use OpenAI API
149
- api_key = os.getenv("OPENAI_API_KEY")
150
- if not api_key:
151
- raise ValueError(
152
- "OPENAI_API_KEY not found in environment variables. Please add it to your .env file."
153
- )
154
- self.client = OpenAI(api_key=api_key)
155
- self.claude_client = None
156
- self.endpoint = "openai"
157
- self.default_model = "gpt-4o" # Updated to latest model
158
- logger.debug(f"Using OpenAI API with model={self.default_model}")
159
- else:
160
- # This should not happen with the new logic, but keep as fallback
161
- raise ValueError("Invalid LLM provider configuration")
162
- if system_prompt:
163
- logger.debug(f"System prompt set: {system_prompt[:100]}...")
164
-
165
- def _retry_with_exponential_backoff(
166
- self,
167
- func: Callable[..., T],
168
- *args,
169
- **kwargs,
170
- ) -> T:
171
- """
172
- Execute a function with exponential backoff retry on connection errors.
173
-
174
- Args:
175
- func: The function to execute
176
- *args: Positional arguments for the function
177
- **kwargs: Keyword arguments for the function
178
-
179
- Returns:
180
- The result of the function call
181
-
182
- Raises:
183
- The last exception if all retries are exhausted
184
- """
185
- delay = self.retry_base_delay
186
- max_delay = 60.0
187
- exponential_base = 2.0
188
-
189
- for attempt in range(self.max_retries + 1):
190
- try:
191
- return func(*args, **kwargs)
192
- except (
193
- ConnectionError,
194
- httpx.ConnectError,
195
- httpx.TimeoutException,
196
- httpx.NetworkError,
197
- requests.exceptions.ConnectionError,
198
- requests.exceptions.Timeout,
199
- ) as e:
200
- if attempt == self.max_retries:
201
- logger.error(
202
- f"Max retries ({self.max_retries}) reached for {func.__name__}. "
203
- f"Last error: {str(e)}"
204
- )
205
- raise
206
-
207
- # Calculate next delay with exponential backoff
208
- wait_time = min(delay, max_delay)
209
- logger.warning(
210
- f"Connection error in {func.__name__} (attempt {attempt + 1}/{self.max_retries + 1}): {str(e)}. "
211
- f"Retrying in {wait_time:.1f}s..."
212
- )
213
-
214
- time.sleep(wait_time)
215
- delay *= exponential_base
216
-
217
- def generate(
218
- self,
219
- prompt: str,
220
- model: Optional[str] = None,
221
- endpoint: Optional[Literal["completions", "chat", "claude", "openai"]] = None,
222
- system_prompt: Optional[str] = None,
223
- stream: bool = False,
224
- messages: Optional[List[Dict[str, str]]] = None,
225
- **kwargs: Any,
226
- ) -> Union[str, Iterator[str]]:
227
- """
228
- Generate a response from the LLM.
229
-
230
- Args:
231
- prompt: The user prompt/query to send to the LLM. For chat endpoint,
232
- if messages is not provided, this is treated as a pre-formatted
233
- prompt string that already contains the full conversation.
234
- model: The model to use (defaults to endpoint-appropriate model)
235
- endpoint: Override the endpoint to use (completions, chat, claude, or openai)
236
- system_prompt: System prompt to use for this specific request (overrides default)
237
- stream: If True, returns a generator that yields chunks of the response as they become available
238
- messages: Optional list of message dicts with 'role' and 'content' keys.
239
- If provided, these are used directly for chat completions instead of prompt.
240
- **kwargs: Additional parameters to pass to the API
241
-
242
- Returns:
243
- If stream=False: The complete generated text as a string
244
- If stream=True: A generator yielding chunks of the response as they become available
245
- """
246
- model = model or self.default_model
247
- endpoint_to_use = endpoint or self.endpoint
248
- logger.debug(
249
- f"LLMClient.generate() called with model={model}, endpoint={endpoint_to_use}, stream={stream}"
250
- )
251
-
252
- # Use provided system_prompt, fall back to instance default if not provided
253
- effective_system_prompt = (
254
- system_prompt if system_prompt is not None else self.system_prompt
255
- )
256
- logger.debug(
257
- f"Using system prompt: {effective_system_prompt[:100] if effective_system_prompt else 'None'}..."
258
- )
259
-
260
- if endpoint_to_use == "claude":
261
- # For Claude API, construct the prompt appropriately
262
- if effective_system_prompt:
263
- # Claude handles system prompts differently in messages format
264
- full_prompt = f"System: {effective_system_prompt}\n\nHuman: {prompt}"
265
- else:
266
- full_prompt = prompt
267
-
268
- logger.debug(f"Using Claude API with prompt: {full_prompt[:200]}...")
269
-
270
- try:
271
- if stream:
272
- logger.warning(
273
- "Streaming not yet implemented for Claude API, falling back to non-streaming"
274
- )
275
-
276
- # Use Claude client with retry logic
277
- logger.debug("Making request to Claude API")
278
-
279
- # Use retry logic for the API call
280
- result = self._retry_with_exponential_backoff(
281
- self.claude_client.get_completion, full_prompt
282
- )
283
-
284
- # Claude returns a list of content blocks, extract text
285
- if isinstance(result, list) and len(result) > 0:
286
- # Each content block has a 'text' attribute
287
- text_parts = []
288
- for content_block in result:
289
- if hasattr(content_block, "text"):
290
- text_parts.append(content_block.text)
291
- else:
292
- text_parts.append(str(content_block))
293
- result = "".join(text_parts)
294
- elif isinstance(result, str):
295
- pass # result is already a string
296
- else:
297
- result = str(result)
298
-
299
- # Check for empty responses
300
- if not result or not result.strip():
301
- logger.warning("Empty response from Claude API")
302
-
303
- # Debug: log the response structure for troubleshooting
304
- logger.debug(f"Claude response length: {len(result)}")
305
- logger.debug(f"Claude response preview: {result[:300]}...")
306
-
307
- # Claude sometimes returns valid JSON followed by additional text
308
- # Try to extract just the JSON part if it exists
309
- result = self._clean_claude_response(result)
310
-
311
- return result
312
- except Exception as e:
313
- logger.error(f"Error generating response from Claude API: {str(e)}")
314
- raise
315
- elif endpoint_to_use == "completions":
316
- # For local LLM with pre-formatted prompts (ChatSDK uses this)
317
- # The prompt already contains the full formatted conversation
318
- logger.debug(
319
- f"Using completions endpoint: prompt_length={len(prompt)} chars"
320
- )
321
-
322
- try:
323
- # Use retry logic for the API call
324
- response = self._retry_with_exponential_backoff(
325
- self.client.completions.create,
326
- model=model,
327
- prompt=prompt,
328
- temperature=0.1,
329
- stream=stream,
330
- **kwargs,
331
- )
332
-
333
- if stream:
334
- # Return a generator that yields chunks
335
- def stream_generator():
336
- for chunk in response:
337
- if (
338
- hasattr(chunk.choices[0], "text")
339
- and chunk.choices[0].text
340
- ):
341
- yield chunk.choices[0].text
342
-
343
- return stream_generator()
344
- else:
345
- # Return the complete response
346
- result = response.choices[0].text
347
- if not result or not result.strip():
348
- logger.warning("Empty response from local LLM")
349
- return result
350
- except (
351
- httpx.ConnectError,
352
- httpx.TimeoutException,
353
- httpx.NetworkError,
354
- ) as e:
355
- logger.error(f"Network error connecting to local LLM server: {str(e)}")
356
- error_msg = f"LLM Server Connection Error: {str(e)}"
357
- raise ConnectionError(error_msg) from e
358
- except Exception as e:
359
- error_str = str(e)
360
- logger.error(f"Error generating response from local LLM: {error_str}")
361
-
362
- if "404" in error_str:
363
- if (
364
- "endpoint" in error_str.lower()
365
- or "not found" in error_str.lower()
366
- ):
367
- raise ConnectionError(
368
- f"API endpoint error: {error_str}\n\n"
369
- f"This may indicate:\n"
370
- f" 1. Lemonade Server version mismatch (try updating to {LEMONADE_VERSION})\n"
371
- f" 2. Model not properly loaded or corrupted\n"
372
- ) from e
373
-
374
- if "network" in error_str.lower() or "connection" in error_str.lower():
375
- raise ConnectionError(f"LLM Server Error: {error_str}") from e
376
- raise
377
- elif endpoint_to_use == "chat":
378
- # For local LLM using chat completions format (Lemonade v9+)
379
- if messages:
380
- # Use provided messages directly (proper chat history support)
381
- chat_messages = list(messages)
382
- # Prepend system prompt if provided and not already in messages
383
- if effective_system_prompt and (
384
- not chat_messages or chat_messages[0].get("role") != "system"
385
- ):
386
- chat_messages.insert(
387
- 0, {"role": "system", "content": effective_system_prompt}
388
- )
389
- else:
390
- # Treat prompt as pre-formatted string (legacy ChatSDK support)
391
- # Pass as single user message - the prompt already contains formatted history
392
- chat_messages = []
393
- if effective_system_prompt:
394
- chat_messages.append(
395
- {"role": "system", "content": effective_system_prompt}
396
- )
397
- chat_messages.append({"role": "user", "content": prompt})
398
- logger.debug(
399
- f"Using chat completions for local LLM: {len(chat_messages)} messages"
400
- )
401
-
402
- try:
403
- # Use retry logic for the API call
404
- response = self._retry_with_exponential_backoff(
405
- self.client.chat.completions.create,
406
- model=model,
407
- messages=chat_messages,
408
- temperature=0.1,
409
- stream=stream,
410
- **kwargs,
411
- )
412
-
413
- if stream:
414
- # Return a generator that yields chunks
415
- def stream_generator():
416
- for chunk in response:
417
- if (
418
- hasattr(chunk.choices[0].delta, "content")
419
- and chunk.choices[0].delta.content
420
- ):
421
- yield chunk.choices[0].delta.content
422
-
423
- return stream_generator()
424
- else:
425
- # Return the complete response
426
- result = response.choices[0].message.content
427
- if not result or not result.strip():
428
- logger.warning("Empty response from local LLM")
429
- return result
430
- except (
431
- httpx.ConnectError,
432
- httpx.TimeoutException,
433
- httpx.NetworkError,
434
- ) as e:
435
- logger.error(f"Network error connecting to local LLM server: {str(e)}")
436
- error_msg = f"LLM Server Connection Error: {str(e)}"
437
- raise ConnectionError(error_msg) from e
438
- except Exception as e:
439
- error_str = str(e)
440
- logger.error(f"Error generating response from local LLM: {error_str}")
441
-
442
- # Check for 404 errors which might indicate endpoint or model issues
443
- if "404" in error_str:
444
- if (
445
- "endpoint" in error_str.lower()
446
- or "not found" in error_str.lower()
447
- ):
448
- raise ConnectionError(
449
- f"API endpoint error: {error_str}\n\n"
450
- f"This may indicate:\n"
451
- f" 1. Lemonade Server version mismatch (try updating to {LEMONADE_VERSION})\n"
452
- f" 2. Model not properly loaded or corrupted\n"
453
- ) from e
454
-
455
- if "network" in error_str.lower() or "connection" in error_str.lower():
456
- raise ConnectionError(f"LLM Server Error: {error_str}") from e
457
- raise
458
- elif endpoint_to_use == "openai":
459
- # For OpenAI API, use the messages format
460
- messages = []
461
- if effective_system_prompt:
462
- messages.append({"role": "system", "content": effective_system_prompt})
463
- messages.append({"role": "user", "content": prompt})
464
- logger.debug(f"OpenAI API messages: {messages}")
465
-
466
- try:
467
- # Use retry logic for the API call
468
- response = self._retry_with_exponential_backoff(
469
- self.client.chat.completions.create,
470
- model=model,
471
- messages=messages,
472
- stream=stream,
473
- **kwargs,
474
- )
475
-
476
- if stream:
477
- # Return a generator that yields chunks
478
- def stream_generator():
479
- for chunk in response:
480
- if (
481
- hasattr(chunk.choices[0].delta, "content")
482
- and chunk.choices[0].delta.content
483
- ):
484
- yield chunk.choices[0].delta.content
485
-
486
- return stream_generator()
487
- else:
488
- # Return the complete response as before
489
- result = response.choices[0].message.content
490
- logger.debug(f"OpenAI API response: {result[:200]}...")
491
- return result
492
- except Exception as e:
493
- logger.error(f"Error generating response from OpenAI API: {str(e)}")
494
- raise
495
- else:
496
- raise ValueError(
497
- f"Unsupported endpoint: {endpoint_to_use}. Supported endpoints: 'completions', 'chat', 'claude', 'openai'."
498
- )
499
-
500
- def get_performance_stats(self) -> Dict[str, Any]:
501
- """
502
- Get performance statistics from the last LLM request.
503
-
504
- Returns:
505
- Dictionary containing performance statistics like:
506
- - time_to_first_token: Time in seconds until first token is generated
507
- - tokens_per_second: Rate of token generation
508
- - input_tokens: Number of tokens in the input
509
- - output_tokens: Number of tokens in the output
510
- """
511
- if not self.base_url:
512
- # Return empty stats if not using local LLM
513
- return {
514
- "time_to_first_token": None,
515
- "tokens_per_second": None,
516
- "input_tokens": None,
517
- "output_tokens": None,
518
- }
519
-
520
- try:
521
- # Use the Lemonade API v1 stats endpoint
522
- # This returns both timing stats and token counts
523
- stats_url = f"{self.base_url}/stats"
524
- response = requests.get(stats_url)
525
-
526
- if response.status_code == 200:
527
- stats = response.json()
528
- # Remove decode_token_times as it's too verbose
529
- if "decode_token_times" in stats:
530
- del stats["decode_token_times"]
531
- return stats
532
- else:
533
- logger.warning(
534
- f"Failed to get stats: {response.status_code} - {response.text}"
535
- )
536
- return {}
537
- except Exception as e:
538
- logger.warning(f"Error fetching performance stats: {str(e)}")
539
- return {}
540
-
541
- def is_generating(self) -> bool:
542
- """
543
- Check if the local LLM is currently generating.
544
-
545
- Returns:
546
- bool: True if generating, False otherwise
547
-
548
- Note:
549
- Only available when using local LLM (use_local=True).
550
- Returns False for OpenAI API usage.
551
- """
552
- if not self.base_url:
553
- logger.debug("is_generating(): Not using local LLM, returning False")
554
- return False
555
-
556
- try:
557
- # Check the generating endpoint
558
- # Remove /api/v1 suffix to access root-level endpoints
559
- base = self.base_url.replace("/api/v1", "")
560
- generating_url = f"{base}/generating"
561
- response = requests.get(generating_url)
562
- if response.status_code == 200:
563
- response_data = response.json()
564
- is_gen = response_data.get("is_generating", False)
565
- logger.debug(f"Generation status check: {is_gen}")
566
- return is_gen
567
- else:
568
- logger.warning(
569
- f"Failed to check generation status: {response.status_code} - {response.text}"
570
- )
571
- return False
572
- except Exception as e:
573
- logger.warning(f"Error checking generation status: {str(e)}")
574
- return False
575
-
576
- def halt_generation(self) -> bool:
577
- """
578
- Halt current generation on the local LLM server.
579
-
580
- Returns:
581
- bool: True if halt was successful, False otherwise
582
-
583
- Note:
584
- Only available when using local LLM (use_local=True).
585
- Does nothing for OpenAI API usage.
586
- """
587
- if not self.base_url:
588
- logger.debug("halt_generation(): Not using local LLM, nothing to halt")
589
- return False
590
-
591
- try:
592
- # Send halt request
593
- # Remove /api/v1 suffix to access root-level endpoints
594
- base = self.base_url.replace("/api/v1", "")
595
- halt_url = f"{base}/halt"
596
- response = requests.get(halt_url)
597
- if response.status_code == 200:
598
- logger.debug("Successfully halted current generation")
599
- return True
600
- else:
601
- logger.warning(
602
- f"Failed to halt generation: {response.status_code} - {response.text}"
603
- )
604
- return False
605
- except Exception as e:
606
- logger.warning(f"Error halting generation: {str(e)}")
607
- return False
608
-
609
- def _clean_claude_response(self, response: str) -> str:
610
- """
611
- Extract valid JSON from Claude responses that may contain extra content after the JSON.
612
-
613
- Args:
614
- response: The raw response from Claude API
615
-
616
- Returns:
617
- Cleaned response with only the JSON portion
618
- """
619
- import json
620
-
621
- if not response or not response.strip():
622
- return response
623
-
624
- # Try to parse as-is first
625
- try:
626
- json.loads(response.strip())
627
- return response.strip()
628
- except json.JSONDecodeError:
629
- pass
630
-
631
- # Look for JSON object patterns
632
- # Find the first { and try to extract a complete JSON object
633
- start_idx = response.find("{")
634
- if start_idx == -1:
635
- # No JSON object found, return as-is
636
- return response
637
-
638
- # Find the matching closing brace by counting braces
639
- brace_count = 0
640
- end_idx = -1
641
-
642
- for i in range(start_idx, len(response)):
643
- char = response[i]
644
- if char == "{":
645
- brace_count += 1
646
- elif char == "}":
647
- brace_count -= 1
648
- if brace_count == 0:
649
- end_idx = i
650
- break
651
-
652
- if end_idx == -1:
653
- # No complete JSON object found
654
- return response
655
-
656
- # Extract the JSON portion
657
- json_portion = response[start_idx : end_idx + 1]
658
-
659
- # Validate that it's valid JSON
660
- try:
661
- json.loads(json_portion)
662
- logger.debug(
663
- f"Extracted JSON from Claude response: {len(json_portion)} chars vs original {len(response)} chars"
664
- )
665
- return json_portion
666
- except json.JSONDecodeError:
667
- # If extracted portion is not valid JSON, return original
668
- logger.debug(
669
- "Could not extract valid JSON from Claude response, returning original"
670
- )
671
- return response
672
-
673
-
674
- def main():
675
- # Example usage with local LLM
676
- system_prompt = "You are a creative assistant who specializes in short stories."
677
-
678
- local_llm = LLMClient(system_prompt=system_prompt)
679
-
680
- # Non-streaming example
681
- result = local_llm.generate("Write a one-sentence bedtime story about a unicorn.")
682
- print(f"Local LLM response:\n{result}")
683
- print(f"Local LLM stats:\n{local_llm.get_performance_stats()}")
684
-
685
- # Halt functionality demo (only for local LLM)
686
- print(f"\nHalt functionality available: {local_llm.is_generating()}")
687
-
688
- # Streaming example
689
- print("\nLocal LLM streaming response:")
690
- for chunk in local_llm.generate(
691
- "Write a one-sentence bedtime story about a dragon.", stream=True
692
- ):
693
- print(chunk, end="", flush=True)
694
- print("\n")
695
-
696
- # Example usage with Claude API
697
- if CLAUDE_AVAILABLE:
698
- claude_llm = LLMClient(use_claude=True, system_prompt=system_prompt)
699
-
700
- # Non-streaming example
701
- result = claude_llm.generate(
702
- "Write a one-sentence bedtime story about a unicorn."
703
- )
704
- print(f"\nClaude API response:\n{result}")
705
-
706
- # Example usage with OpenAI API
707
- openai_llm = LLMClient(use_openai=True, system_prompt=system_prompt)
708
-
709
- # Non-streaming example
710
- result = openai_llm.generate("Write a one-sentence bedtime story about a unicorn.")
711
- print(f"\nOpenAI API response:\n{result}")
712
-
713
- # Streaming example
714
- print("\nOpenAI API streaming response:")
715
- for chunk in openai_llm.generate(
716
- "Write a one-sentence bedtime story about a dragon.", stream=True
717
- ):
718
- print(chunk, end="", flush=True)
719
- print("\n")
720
-
721
-
722
- if __name__ == "__main__":
723
- main()