ai-coding-assistant 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. ai_coding_assistant-0.5.0.dist-info/METADATA +226 -0
  2. ai_coding_assistant-0.5.0.dist-info/RECORD +89 -0
  3. ai_coding_assistant-0.5.0.dist-info/WHEEL +4 -0
  4. ai_coding_assistant-0.5.0.dist-info/entry_points.txt +3 -0
  5. ai_coding_assistant-0.5.0.dist-info/licenses/LICENSE +21 -0
  6. coding_assistant/__init__.py +3 -0
  7. coding_assistant/__main__.py +19 -0
  8. coding_assistant/cli/__init__.py +1 -0
  9. coding_assistant/cli/app.py +158 -0
  10. coding_assistant/cli/commands/__init__.py +19 -0
  11. coding_assistant/cli/commands/ask.py +178 -0
  12. coding_assistant/cli/commands/config.py +438 -0
  13. coding_assistant/cli/commands/diagram.py +267 -0
  14. coding_assistant/cli/commands/document.py +410 -0
  15. coding_assistant/cli/commands/explain.py +192 -0
  16. coding_assistant/cli/commands/fix.py +249 -0
  17. coding_assistant/cli/commands/index.py +162 -0
  18. coding_assistant/cli/commands/refactor.py +245 -0
  19. coding_assistant/cli/commands/search.py +182 -0
  20. coding_assistant/cli/commands/serve_docs.py +128 -0
  21. coding_assistant/cli/repl.py +381 -0
  22. coding_assistant/cli/theme.py +90 -0
  23. coding_assistant/codebase/__init__.py +1 -0
  24. coding_assistant/codebase/crawler.py +93 -0
  25. coding_assistant/codebase/parser.py +266 -0
  26. coding_assistant/config/__init__.py +25 -0
  27. coding_assistant/config/config_manager.py +615 -0
  28. coding_assistant/config/settings.py +82 -0
  29. coding_assistant/context/__init__.py +19 -0
  30. coding_assistant/context/chunker.py +443 -0
  31. coding_assistant/context/enhanced_retriever.py +322 -0
  32. coding_assistant/context/hybrid_search.py +311 -0
  33. coding_assistant/context/ranker.py +355 -0
  34. coding_assistant/context/retriever.py +119 -0
  35. coding_assistant/context/window.py +362 -0
  36. coding_assistant/documentation/__init__.py +23 -0
  37. coding_assistant/documentation/agents/__init__.py +27 -0
  38. coding_assistant/documentation/agents/coordinator.py +510 -0
  39. coding_assistant/documentation/agents/module_documenter.py +111 -0
  40. coding_assistant/documentation/agents/synthesizer.py +139 -0
  41. coding_assistant/documentation/agents/task_delegator.py +100 -0
  42. coding_assistant/documentation/decomposition/__init__.py +21 -0
  43. coding_assistant/documentation/decomposition/context_preserver.py +477 -0
  44. coding_assistant/documentation/decomposition/module_detector.py +302 -0
  45. coding_assistant/documentation/decomposition/partitioner.py +621 -0
  46. coding_assistant/documentation/generators/__init__.py +14 -0
  47. coding_assistant/documentation/generators/dataflow_generator.py +440 -0
  48. coding_assistant/documentation/generators/diagram_generator.py +511 -0
  49. coding_assistant/documentation/graph/__init__.py +13 -0
  50. coding_assistant/documentation/graph/dependency_builder.py +468 -0
  51. coding_assistant/documentation/graph/module_analyzer.py +475 -0
  52. coding_assistant/documentation/writers/__init__.py +11 -0
  53. coding_assistant/documentation/writers/markdown_writer.py +322 -0
  54. coding_assistant/embeddings/__init__.py +0 -0
  55. coding_assistant/embeddings/generator.py +89 -0
  56. coding_assistant/embeddings/store.py +187 -0
  57. coding_assistant/exceptions/__init__.py +50 -0
  58. coding_assistant/exceptions/base.py +110 -0
  59. coding_assistant/exceptions/llm.py +249 -0
  60. coding_assistant/exceptions/recovery.py +263 -0
  61. coding_assistant/exceptions/storage.py +213 -0
  62. coding_assistant/exceptions/validation.py +230 -0
  63. coding_assistant/llm/__init__.py +1 -0
  64. coding_assistant/llm/client.py +277 -0
  65. coding_assistant/llm/gemini_client.py +181 -0
  66. coding_assistant/llm/groq_client.py +160 -0
  67. coding_assistant/llm/prompts.py +98 -0
  68. coding_assistant/llm/together_client.py +160 -0
  69. coding_assistant/operations/__init__.py +13 -0
  70. coding_assistant/operations/differ.py +369 -0
  71. coding_assistant/operations/generator.py +347 -0
  72. coding_assistant/operations/linter.py +430 -0
  73. coding_assistant/operations/validator.py +406 -0
  74. coding_assistant/storage/__init__.py +9 -0
  75. coding_assistant/storage/database.py +363 -0
  76. coding_assistant/storage/session.py +231 -0
  77. coding_assistant/utils/__init__.py +31 -0
  78. coding_assistant/utils/cache.py +477 -0
  79. coding_assistant/utils/hardware.py +132 -0
  80. coding_assistant/utils/keystore.py +206 -0
  81. coding_assistant/utils/logger.py +32 -0
  82. coding_assistant/utils/progress.py +311 -0
  83. coding_assistant/validation/__init__.py +13 -0
  84. coding_assistant/validation/files.py +305 -0
  85. coding_assistant/validation/inputs.py +335 -0
  86. coding_assistant/validation/params.py +280 -0
  87. coding_assistant/validation/sanitizers.py +243 -0
  88. coding_assistant/vcs/__init__.py +5 -0
  89. coding_assistant/vcs/git.py +269 -0
@@ -0,0 +1,277 @@
1
+ """LLM client implementations with pluggable providers."""
2
+ from abc import ABC, abstractmethod
3
+ from typing import List, Dict, Iterator
4
+ import requests
5
+ import json
6
+ import os
7
+
8
+
9
+ class BaseLLMClient(ABC):
10
+ """Base class for all LLM clients."""
11
+
12
+ @abstractmethod
13
+ def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
14
+ """Generate a response from the LLM."""
15
+ pass
16
+
17
+ @abstractmethod
18
+ def is_available(self) -> bool:
19
+ """Check if this provider is configured and available."""
20
+ pass
21
+
22
+
23
+ class MockLLMClient(BaseLLMClient):
24
+ """Mock LLM for testing and development without API keys."""
25
+
26
+ def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
27
+ """Generate a mock response."""
28
+ # Extract user query from messages
29
+ user_msg = messages[-1]['content'] if messages else "No query"
30
+
31
+ response = f"[MOCK RESPONSE]\n\n"
32
+ response += f"I received your query: {user_msg[:100]}...\n\n"
33
+ response += "This is a mock LLM client for testing the infrastructure.\n\n"
34
+ response += "To get real responses, choose one of these options:\n\n"
35
+ response += "Option 1 - Local (FREE, Private, 8GB+ RAM needed):\n"
36
+ response += " 1. Install Ollama: curl -fsSL https://ollama.com/install.sh | sh\n"
37
+ response += " 2. Pull model: ollama pull qwen2.5-coder:7b\n"
38
+ response += " 3. Start: ollama serve\n"
39
+ response += " 4. The system will auto-detect and use it!\n\n"
40
+ response += "Option 2 - Cloud (FREE tier, No local resources):\n"
41
+ response += " 1. Get Groq API key: https://console.groq.com (FREE!)\n"
42
+ response += " 2. Set key: assistant config set-api-key groq <your-key>\n"
43
+ response += " 3. Start using immediately!\n\n"
44
+ response += "Option 3 - Cloud (FREE tier, Google):\n"
45
+ response += " 1. Get Gemini key: https://makersuite.google.com/app/apikey\n"
46
+ response += " 2. Set key: assistant config set-api-key gemini <your-key>\n\n"
47
+ response += "Option 4 - Cloud (FREE trial, $25 credits):\n"
48
+ response += " 1. Get Together AI key: https://api.together.xyz\n"
49
+ response += " 2. Set key: assistant config set-api-key together <your-key>\n\n"
50
+ response += "Check your hardware: assistant config check-hardware\n"
51
+
52
+ if stream:
53
+ for char in response:
54
+ yield char
55
+ else:
56
+ yield response
57
+
58
+ def is_available(self) -> bool:
59
+ """Always available."""
60
+ return True
61
+
62
+
63
+ class OllamaClient(BaseLLMClient):
64
+ """Local LLM via Ollama (no API keys needed)."""
65
+
66
+ def __init__(self, model: str = "deepseek-coder:6.7b", base_url: str = "http://localhost:11434"):
67
+ self.model = model
68
+ self.base_url = base_url
69
+
70
+ def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
71
+ """Generate response from Ollama."""
72
+ try:
73
+ # Use the chat API which is better for multi-turn conversations
74
+ # Convert messages to Ollama format (role: user/assistant/system)
75
+ ollama_messages = []
76
+ for msg in messages:
77
+ role = msg.get('role', 'user')
78
+ # Map OpenAI roles to Ollama roles
79
+ if role == 'system':
80
+ ollama_role = 'system'
81
+ elif role == 'assistant':
82
+ ollama_role = 'assistant'
83
+ else:
84
+ ollama_role = 'user'
85
+
86
+ ollama_messages.append({
87
+ 'role': ollama_role,
88
+ 'content': msg.get('content', '')
89
+ })
90
+
91
+ response = requests.post(
92
+ f"{self.base_url}/api/chat",
93
+ json={
94
+ "model": self.model,
95
+ "messages": ollama_messages,
96
+ "stream": stream
97
+ },
98
+ stream=stream,
99
+ timeout=120 # Increased timeout for slower models
100
+ )
101
+ response.raise_for_status()
102
+
103
+ if stream:
104
+ for line in response.iter_lines():
105
+ if line:
106
+ try:
107
+ chunk = json.loads(line)
108
+ if 'message' in chunk and 'content' in chunk['message']:
109
+ content = chunk['message']['content']
110
+ if content:
111
+ yield content
112
+ elif 'response' in chunk:
113
+ # Fallback for generate API format
114
+ yield chunk['response']
115
+ except json.JSONDecodeError:
116
+ continue
117
+ else:
118
+ result = response.json()
119
+ if 'message' in result and 'content' in result['message']:
120
+ yield result['message']['content']
121
+ else:
122
+ yield result.get('response', '')
123
+
124
+ except requests.exceptions.Timeout:
125
+ yield f"Error: Request to Ollama timed out after 120 seconds.\n"
126
+ yield "The model might be slow to respond. Try:\n"
127
+ yield " 1. Using a smaller/faster model\n"
128
+ yield " 2. Checking system resources (CPU/RAM)\n"
129
+ yield " 3. Using a cloud provider (Groq/Together AI)\n"
130
+ except Exception as e:
131
+ yield f"Error connecting to Ollama: {e}\n"
132
+ yield "Make sure Ollama is running: ollama serve\n"
133
+
134
+ def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
135
+ """Convert chat messages to a single prompt."""
136
+ prompt = ""
137
+ for msg in messages:
138
+ role = msg.get('role', 'user').upper()
139
+ content = msg.get('content', '')
140
+ prompt += f"{role}: {content}\n\n"
141
+ prompt += "ASSISTANT: "
142
+ return prompt
143
+
144
+ def is_available(self) -> bool:
145
+ """Check if Ollama is running."""
146
+ try:
147
+ response = requests.get(f"{self.base_url}/api/tags", timeout=2)
148
+ return response.status_code == 200
149
+ except:
150
+ return False
151
+
152
+
153
+ class LLMClientFactory:
154
+ """Factory to create the best available LLM client."""
155
+
156
+ @staticmethod
157
+ def create_client(preferred_provider: str = None) -> BaseLLMClient:
158
+ """
159
+ Create an LLM client, trying providers in order of preference.
160
+
161
+ Priority is hardware-aware:
162
+ - High RAM (8GB+): Ollama → Groq → Together → Mock
163
+ - Low RAM (<8GB): Groq → Together → Ollama → Mock
164
+ """
165
+ from coding_assistant.config.settings import settings
166
+ # Import here to avoid import issues if psutil not available
167
+ from coding_assistant.utils.hardware import HardwareDetector
168
+
169
+ # Check if user specified a preferred provider
170
+ if preferred_provider:
171
+ client = LLMClientFactory._create_specific(preferred_provider)
172
+ if client and client.is_available():
173
+ return client
174
+ else:
175
+ print(f"⚠️ {preferred_provider} not available, trying alternatives...")
176
+
177
+ # Get hardware info for smart provider selection
178
+ hw_info = HardwareDetector.get_hardware_info()
179
+
180
+ # Import here to avoid circular import
181
+ from coding_assistant.llm.groq_client import GroqClient
182
+ from coding_assistant.llm.together_client import TogetherClient
183
+ from coding_assistant.llm.gemini_client import GeminiClient
184
+
185
+ # Build provider list based on hardware capabilities
186
+ if hw_info.can_run_local:
187
+ # Sufficient RAM for local models - prefer Ollama
188
+ providers = [
189
+ ('Ollama', lambda: OllamaClient(
190
+ model=settings.ollama_model,
191
+ base_url=settings.ollama_base_url
192
+ )),
193
+ ('Groq', lambda: GroqClient(
194
+ api_key=settings.groq_api_key,
195
+ model=settings.groq_model
196
+ )),
197
+ ('Gemini', lambda: GeminiClient(
198
+ api_key=settings.gemini_api_key,
199
+ model=settings.gemini_model
200
+ )),
201
+ ('Together AI', lambda: TogetherClient(
202
+ api_key=settings.together_api_key,
203
+ model=settings.together_model
204
+ )),
205
+ ('Mock', MockLLMClient),
206
+ ]
207
+ else:
208
+ # Limited RAM - prefer cloud providers
209
+ providers = [
210
+ ('Groq', lambda: GroqClient(
211
+ api_key=settings.groq_api_key,
212
+ model=settings.groq_model
213
+ )),
214
+ ('Gemini', lambda: GeminiClient(
215
+ api_key=settings.gemini_api_key,
216
+ model=settings.gemini_model
217
+ )),
218
+ ('Together AI', lambda: TogetherClient(
219
+ api_key=settings.together_api_key,
220
+ model=settings.together_model
221
+ )),
222
+ ('Ollama', lambda: OllamaClient(
223
+ model=settings.ollama_model,
224
+ base_url=settings.ollama_base_url
225
+ )),
226
+ ('Mock', MockLLMClient),
227
+ ]
228
+
229
+ # Try providers in order
230
+ for name, ClientClass in providers:
231
+ try:
232
+ client = ClientClass()
233
+ if client.is_available():
234
+ from rich.console import Console
235
+ console = Console()
236
+ console.print(f"[bold #10B981]✓[/bold #10B981] [bold #8B5CF6]Using {name}[/bold #8B5CF6] [dim]LLM provider[/dim]")
237
+ return client
238
+ except Exception as e:
239
+ continue
240
+
241
+ # Fallback to mock (should never reach here)
242
+ print("⚠️ No LLM providers available, using Mock client")
243
+ print("💡 Tip: Run 'assistant config check-hardware' for setup recommendations")
244
+ return MockLLMClient()
245
+
246
+ @staticmethod
247
+ def _create_specific(provider_name: str) -> BaseLLMClient:
248
+ """Create a specific provider by name."""
249
+ from coding_assistant.config.settings import settings
250
+ # Import here to avoid circular import
251
+ from coding_assistant.llm.groq_client import GroqClient
252
+ from coding_assistant.llm.together_client import TogetherClient
253
+ from coding_assistant.llm.gemini_client import GeminiClient
254
+
255
+ if provider_name.lower() == 'ollama':
256
+ return OllamaClient(
257
+ model=settings.ollama_model,
258
+ base_url=settings.ollama_base_url
259
+ )
260
+ elif provider_name.lower() == 'groq':
261
+ return GroqClient(
262
+ api_key=settings.groq_api_key,
263
+ model=settings.groq_model
264
+ )
265
+ elif provider_name.lower() == 'gemini':
266
+ return GeminiClient(
267
+ api_key=settings.gemini_api_key,
268
+ model=settings.gemini_model
269
+ )
270
+ elif provider_name.lower() == 'together':
271
+ return TogetherClient(
272
+ api_key=settings.together_api_key,
273
+ model=settings.together_model
274
+ )
275
+ elif provider_name.lower() == 'mock':
276
+ return MockLLMClient()
277
+ return None
@@ -0,0 +1,181 @@
1
+ """Google Gemini LLM client implementation."""
2
+
3
+ from typing import List, Dict, Iterator, Optional
4
+ import requests
5
+ import json
6
+ from coding_assistant.llm.client import BaseLLMClient
7
+ from coding_assistant.exceptions.llm import (
8
+ LLMConnectionError,
9
+ LLMResponseError,
10
+ LLMTimeoutError
11
+ )
12
+
13
+
14
+ class GeminiClient(BaseLLMClient):
15
+ """Google Gemini cloud LLM client."""
16
+
17
+ def __init__(
18
+ self,
19
+ api_key: Optional[str] = None,
20
+ model: str = "gemini-2.0-flash-exp",
21
+ base_url: str = "https://generativelanguage.googleapis.com/v1beta"
22
+ ):
23
+ """
24
+ Initialize Gemini client.
25
+
26
+ Args:
27
+ api_key: Gemini API key
28
+ model: Model name (gemini-2.0-flash-exp, gemini-1.5-pro, etc.)
29
+ base_url: API base URL
30
+ """
31
+ self.api_key = api_key
32
+ self.model = model
33
+ self.base_url = base_url
34
+
35
+ def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
36
+ """
37
+ Generate response from Gemini.
38
+
39
+ Args:
40
+ messages: List of message dicts with 'role' and 'content'
41
+ stream: Whether to stream the response
42
+
43
+ Yields:
44
+ Response chunks
45
+ """
46
+ if not self.api_key:
47
+ raise LLMConnectionError(
48
+ provider="gemini",
49
+ endpoint=self.base_url,
50
+ reason="API key not set"
51
+ )
52
+
53
+ # Convert messages to Gemini format
54
+ gemini_contents = []
55
+ for msg in messages:
56
+ role = msg.get('role', 'user')
57
+ # Map roles: user -> user, assistant -> model, system -> user (with prefix)
58
+ if role == 'system':
59
+ gemini_contents.append({
60
+ "role": "user",
61
+ "parts": [{"text": f"System: {msg.get('content', '')}"}]
62
+ })
63
+ elif role == 'assistant':
64
+ gemini_contents.append({
65
+ "role": "model",
66
+ "parts": [{"text": msg.get('content', '')}]
67
+ })
68
+ else: # user
69
+ gemini_contents.append({
70
+ "role": "user",
71
+ "parts": [{"text": msg.get('content', '')}]
72
+ })
73
+
74
+ payload = {
75
+ "contents": gemini_contents
76
+ }
77
+
78
+ try:
79
+ # Gemini uses API key as query parameter
80
+ url = f"{self.base_url}/models/{self.model}:{'streamGenerateContent' if stream else 'generateContent'}"
81
+ params = {"key": self.api_key}
82
+
83
+ response = requests.post(
84
+ url,
85
+ params=params,
86
+ headers={"Content-Type": "application/json"},
87
+ json=payload,
88
+ stream=stream,
89
+ timeout=120
90
+ )
91
+
92
+ # Check for errors
93
+ if response.status_code == 400:
94
+ raise LLMResponseError(
95
+ message="Invalid request. Check your message format.",
96
+ provider="gemini",
97
+ status_code=response.status_code,
98
+ response_text=response.text
99
+ )
100
+ elif response.status_code == 401 or response.status_code == 403:
101
+ raise LLMResponseError(
102
+ message="Authentication failed. Check your API key.",
103
+ provider="gemini",
104
+ status_code=response.status_code,
105
+ response_text=response.text
106
+ )
107
+ elif response.status_code == 429:
108
+ raise LLMResponseError(
109
+ message="Rate limit exceeded. Please wait and try again.",
110
+ provider="gemini",
111
+ status_code=429,
112
+ response_text=response.text
113
+ )
114
+ elif response.status_code >= 400:
115
+ raise LLMResponseError(
116
+ message=f"API error: {response.status_code}",
117
+ provider="gemini",
118
+ status_code=response.status_code,
119
+ response_text=response.text
120
+ )
121
+
122
+ if stream:
123
+ # Parse streaming response (JSON lines)
124
+ for line in response.iter_lines():
125
+ if line:
126
+ try:
127
+ chunk = json.loads(line)
128
+ # Extract content from candidates
129
+ if 'candidates' in chunk and len(chunk['candidates']) > 0:
130
+ candidate = chunk['candidates'][0]
131
+ if 'content' in candidate and 'parts' in candidate['content']:
132
+ for part in candidate['content']['parts']:
133
+ if 'text' in part:
134
+ yield part['text']
135
+ except json.JSONDecodeError:
136
+ continue
137
+ else:
138
+ # Non-streaming response
139
+ result = response.json()
140
+ if 'candidates' in result and len(result['candidates']) > 0:
141
+ candidate = result['candidates'][0]
142
+ if 'content' in candidate and 'parts' in candidate['content']:
143
+ for part in candidate['content']['parts']:
144
+ if 'text' in part:
145
+ yield part['text']
146
+
147
+ except requests.exceptions.Timeout:
148
+ raise LLMTimeoutError(
149
+ provider="gemini",
150
+ timeout_seconds=120
151
+ )
152
+ except requests.exceptions.ConnectionError as e:
153
+ raise LLMConnectionError(
154
+ provider="gemini",
155
+ endpoint=self.base_url,
156
+ reason=str(e)
157
+ )
158
+ except (LLMConnectionError, LLMResponseError, LLMTimeoutError):
159
+ # Re-raise our custom exceptions
160
+ raise
161
+ except Exception as e:
162
+ raise LLMConnectionError(
163
+ provider="gemini",
164
+ endpoint=self.base_url,
165
+ reason=f"Unexpected error: {str(e)}"
166
+ )
167
+
168
+ def is_available(self) -> bool:
169
+ """
170
+ Check if Gemini is available.
171
+
172
+ Returns:
173
+ True if API key is set
174
+ """
175
+ # Basic check: API key is set
176
+ if not self.api_key:
177
+ return False
178
+
179
+ # Could optionally validate with API call here
180
+ # but keeping it simple for now to avoid unnecessary API calls
181
+ return True
@@ -0,0 +1,160 @@
1
+ """Groq LLM client implementation."""
2
+
3
+ from typing import List, Dict, Iterator, Optional
4
+ import requests
5
+ import json
6
+ from coding_assistant.llm.client import BaseLLMClient
7
+ from coding_assistant.exceptions.llm import (
8
+ LLMConnectionError,
9
+ LLMResponseError,
10
+ LLMTimeoutError
11
+ )
12
+
13
+
14
+ class GroqClient(BaseLLMClient):
15
+ """Groq cloud LLM client (OpenAI-compatible API)."""
16
+
17
+ def __init__(
18
+ self,
19
+ api_key: Optional[str] = None,
20
+ model: str = "llama-3.3-70b-versatile",
21
+ base_url: str = "https://api.groq.com/openai/v1"
22
+ ):
23
+ """
24
+ Initialize Groq client.
25
+
26
+ Args:
27
+ api_key: Groq API key
28
+ model: Model name (default: qwen-2.5-coder-32b-instruct)
29
+ base_url: API base URL
30
+ """
31
+ self.api_key = api_key
32
+ self.model = model
33
+ self.base_url = base_url
34
+
35
+ def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
36
+ """
37
+ Generate response from Groq.
38
+
39
+ Args:
40
+ messages: List of message dicts with 'role' and 'content'
41
+ stream: Whether to stream the response
42
+
43
+ Yields:
44
+ Response chunks
45
+ """
46
+ if not self.api_key:
47
+ raise LLMConnectionError(
48
+ provider="groq",
49
+ endpoint=self.base_url,
50
+ reason="API key not set"
51
+ )
52
+
53
+ headers = {
54
+ "Authorization": f"Bearer {self.api_key}",
55
+ "Content-Type": "application/json"
56
+ }
57
+
58
+ payload = {
59
+ "model": self.model,
60
+ "messages": messages,
61
+ "stream": stream
62
+ }
63
+
64
+ try:
65
+ response = requests.post(
66
+ f"{self.base_url}/chat/completions",
67
+ headers=headers,
68
+ json=payload,
69
+ stream=stream,
70
+ timeout=120
71
+ )
72
+
73
+ # Check for errors
74
+ if response.status_code == 401 or response.status_code == 403:
75
+ raise LLMResponseError(
76
+ message="Authentication failed. Check your API key.",
77
+ provider="groq",
78
+ status_code=response.status_code,
79
+ response_text=response.text
80
+ )
81
+ elif response.status_code == 429:
82
+ raise LLMResponseError(
83
+ message="Rate limit exceeded. Please wait and try again.",
84
+ provider="groq",
85
+ status_code=429,
86
+ response_text=response.text
87
+ )
88
+ elif response.status_code >= 400:
89
+ raise LLMResponseError(
90
+ message=f"API error: {response.status_code}",
91
+ provider="groq",
92
+ status_code=response.status_code,
93
+ response_text=response.text
94
+ )
95
+
96
+ if stream:
97
+ # Parse Server-Sent Events (SSE)
98
+ for line in response.iter_lines():
99
+ if line:
100
+ line_str = line.decode('utf-8')
101
+ # SSE format: "data: {json}"
102
+ if line_str.startswith('data: '):
103
+ data_str = line_str[6:] # Remove "data: " prefix
104
+
105
+ # Check for end of stream
106
+ if data_str.strip() == '[DONE]':
107
+ break
108
+
109
+ try:
110
+ chunk = json.loads(data_str)
111
+ # Extract content from delta
112
+ if 'choices' in chunk and len(chunk['choices']) > 0:
113
+ delta = chunk['choices'][0].get('delta', {})
114
+ content = delta.get('content', '')
115
+ if content:
116
+ yield content
117
+ except json.JSONDecodeError:
118
+ continue
119
+ else:
120
+ # Non-streaming response
121
+ result = response.json()
122
+ if 'choices' in result and len(result['choices']) > 0:
123
+ content = result['choices'][0]['message']['content']
124
+ yield content
125
+
126
+ except requests.exceptions.Timeout:
127
+ raise LLMTimeoutError(
128
+ provider="groq",
129
+ timeout_seconds=120
130
+ )
131
+ except requests.exceptions.ConnectionError as e:
132
+ raise LLMConnectionError(
133
+ provider="groq",
134
+ endpoint=self.base_url,
135
+ reason=str(e)
136
+ )
137
+ except (LLMConnectionError, LLMResponseError, LLMTimeoutError):
138
+ # Re-raise our custom exceptions
139
+ raise
140
+ except Exception as e:
141
+ raise LLMConnectionError(
142
+ provider="groq",
143
+ endpoint=self.base_url,
144
+ reason=f"Unexpected error: {str(e)}"
145
+ )
146
+
147
+ def is_available(self) -> bool:
148
+ """
149
+ Check if Groq is available.
150
+
151
+ Returns:
152
+ True if API key is set
153
+ """
154
+ # Basic check: API key is set
155
+ if not self.api_key:
156
+ return False
157
+
158
+ # Could optionally validate with API call here
159
+ # but keeping it simple for now to avoid unnecessary API calls
160
+ return True