ai-coding-assistant 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_coding_assistant-0.5.0.dist-info/METADATA +226 -0
- ai_coding_assistant-0.5.0.dist-info/RECORD +89 -0
- ai_coding_assistant-0.5.0.dist-info/WHEEL +4 -0
- ai_coding_assistant-0.5.0.dist-info/entry_points.txt +3 -0
- ai_coding_assistant-0.5.0.dist-info/licenses/LICENSE +21 -0
- coding_assistant/__init__.py +3 -0
- coding_assistant/__main__.py +19 -0
- coding_assistant/cli/__init__.py +1 -0
- coding_assistant/cli/app.py +158 -0
- coding_assistant/cli/commands/__init__.py +19 -0
- coding_assistant/cli/commands/ask.py +178 -0
- coding_assistant/cli/commands/config.py +438 -0
- coding_assistant/cli/commands/diagram.py +267 -0
- coding_assistant/cli/commands/document.py +410 -0
- coding_assistant/cli/commands/explain.py +192 -0
- coding_assistant/cli/commands/fix.py +249 -0
- coding_assistant/cli/commands/index.py +162 -0
- coding_assistant/cli/commands/refactor.py +245 -0
- coding_assistant/cli/commands/search.py +182 -0
- coding_assistant/cli/commands/serve_docs.py +128 -0
- coding_assistant/cli/repl.py +381 -0
- coding_assistant/cli/theme.py +90 -0
- coding_assistant/codebase/__init__.py +1 -0
- coding_assistant/codebase/crawler.py +93 -0
- coding_assistant/codebase/parser.py +266 -0
- coding_assistant/config/__init__.py +25 -0
- coding_assistant/config/config_manager.py +615 -0
- coding_assistant/config/settings.py +82 -0
- coding_assistant/context/__init__.py +19 -0
- coding_assistant/context/chunker.py +443 -0
- coding_assistant/context/enhanced_retriever.py +322 -0
- coding_assistant/context/hybrid_search.py +311 -0
- coding_assistant/context/ranker.py +355 -0
- coding_assistant/context/retriever.py +119 -0
- coding_assistant/context/window.py +362 -0
- coding_assistant/documentation/__init__.py +23 -0
- coding_assistant/documentation/agents/__init__.py +27 -0
- coding_assistant/documentation/agents/coordinator.py +510 -0
- coding_assistant/documentation/agents/module_documenter.py +111 -0
- coding_assistant/documentation/agents/synthesizer.py +139 -0
- coding_assistant/documentation/agents/task_delegator.py +100 -0
- coding_assistant/documentation/decomposition/__init__.py +21 -0
- coding_assistant/documentation/decomposition/context_preserver.py +477 -0
- coding_assistant/documentation/decomposition/module_detector.py +302 -0
- coding_assistant/documentation/decomposition/partitioner.py +621 -0
- coding_assistant/documentation/generators/__init__.py +14 -0
- coding_assistant/documentation/generators/dataflow_generator.py +440 -0
- coding_assistant/documentation/generators/diagram_generator.py +511 -0
- coding_assistant/documentation/graph/__init__.py +13 -0
- coding_assistant/documentation/graph/dependency_builder.py +468 -0
- coding_assistant/documentation/graph/module_analyzer.py +475 -0
- coding_assistant/documentation/writers/__init__.py +11 -0
- coding_assistant/documentation/writers/markdown_writer.py +322 -0
- coding_assistant/embeddings/__init__.py +0 -0
- coding_assistant/embeddings/generator.py +89 -0
- coding_assistant/embeddings/store.py +187 -0
- coding_assistant/exceptions/__init__.py +50 -0
- coding_assistant/exceptions/base.py +110 -0
- coding_assistant/exceptions/llm.py +249 -0
- coding_assistant/exceptions/recovery.py +263 -0
- coding_assistant/exceptions/storage.py +213 -0
- coding_assistant/exceptions/validation.py +230 -0
- coding_assistant/llm/__init__.py +1 -0
- coding_assistant/llm/client.py +277 -0
- coding_assistant/llm/gemini_client.py +181 -0
- coding_assistant/llm/groq_client.py +160 -0
- coding_assistant/llm/prompts.py +98 -0
- coding_assistant/llm/together_client.py +160 -0
- coding_assistant/operations/__init__.py +13 -0
- coding_assistant/operations/differ.py +369 -0
- coding_assistant/operations/generator.py +347 -0
- coding_assistant/operations/linter.py +430 -0
- coding_assistant/operations/validator.py +406 -0
- coding_assistant/storage/__init__.py +9 -0
- coding_assistant/storage/database.py +363 -0
- coding_assistant/storage/session.py +231 -0
- coding_assistant/utils/__init__.py +31 -0
- coding_assistant/utils/cache.py +477 -0
- coding_assistant/utils/hardware.py +132 -0
- coding_assistant/utils/keystore.py +206 -0
- coding_assistant/utils/logger.py +32 -0
- coding_assistant/utils/progress.py +311 -0
- coding_assistant/validation/__init__.py +13 -0
- coding_assistant/validation/files.py +305 -0
- coding_assistant/validation/inputs.py +335 -0
- coding_assistant/validation/params.py +280 -0
- coding_assistant/validation/sanitizers.py +243 -0
- coding_assistant/vcs/__init__.py +5 -0
- coding_assistant/vcs/git.py +269 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""LLM client implementations with pluggable providers."""
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import List, Dict, Iterator
|
|
4
|
+
import requests
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseLLMClient(ABC):
|
|
10
|
+
"""Base class for all LLM clients."""
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
|
|
14
|
+
"""Generate a response from the LLM."""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def is_available(self) -> bool:
|
|
19
|
+
"""Check if this provider is configured and available."""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MockLLMClient(BaseLLMClient):
|
|
24
|
+
"""Mock LLM for testing and development without API keys."""
|
|
25
|
+
|
|
26
|
+
def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
|
|
27
|
+
"""Generate a mock response."""
|
|
28
|
+
# Extract user query from messages
|
|
29
|
+
user_msg = messages[-1]['content'] if messages else "No query"
|
|
30
|
+
|
|
31
|
+
response = f"[MOCK RESPONSE]\n\n"
|
|
32
|
+
response += f"I received your query: {user_msg[:100]}...\n\n"
|
|
33
|
+
response += "This is a mock LLM client for testing the infrastructure.\n\n"
|
|
34
|
+
response += "To get real responses, choose one of these options:\n\n"
|
|
35
|
+
response += "Option 1 - Local (FREE, Private, 8GB+ RAM needed):\n"
|
|
36
|
+
response += " 1. Install Ollama: curl -fsSL https://ollama.com/install.sh | sh\n"
|
|
37
|
+
response += " 2. Pull model: ollama pull qwen2.5-coder:7b\n"
|
|
38
|
+
response += " 3. Start: ollama serve\n"
|
|
39
|
+
response += " 4. The system will auto-detect and use it!\n\n"
|
|
40
|
+
response += "Option 2 - Cloud (FREE tier, No local resources):\n"
|
|
41
|
+
response += " 1. Get Groq API key: https://console.groq.com (FREE!)\n"
|
|
42
|
+
response += " 2. Set key: assistant config set-api-key groq <your-key>\n"
|
|
43
|
+
response += " 3. Start using immediately!\n\n"
|
|
44
|
+
response += "Option 3 - Cloud (FREE tier, Google):\n"
|
|
45
|
+
response += " 1. Get Gemini key: https://makersuite.google.com/app/apikey\n"
|
|
46
|
+
response += " 2. Set key: assistant config set-api-key gemini <your-key>\n\n"
|
|
47
|
+
response += "Option 4 - Cloud (FREE trial, $25 credits):\n"
|
|
48
|
+
response += " 1. Get Together AI key: https://api.together.xyz\n"
|
|
49
|
+
response += " 2. Set key: assistant config set-api-key together <your-key>\n\n"
|
|
50
|
+
response += "Check your hardware: assistant config check-hardware\n"
|
|
51
|
+
|
|
52
|
+
if stream:
|
|
53
|
+
for char in response:
|
|
54
|
+
yield char
|
|
55
|
+
else:
|
|
56
|
+
yield response
|
|
57
|
+
|
|
58
|
+
def is_available(self) -> bool:
|
|
59
|
+
"""Always available."""
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class OllamaClient(BaseLLMClient):
|
|
64
|
+
"""Local LLM via Ollama (no API keys needed)."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, model: str = "deepseek-coder:6.7b", base_url: str = "http://localhost:11434"):
|
|
67
|
+
self.model = model
|
|
68
|
+
self.base_url = base_url
|
|
69
|
+
|
|
70
|
+
def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
|
|
71
|
+
"""Generate response from Ollama."""
|
|
72
|
+
try:
|
|
73
|
+
# Use the chat API which is better for multi-turn conversations
|
|
74
|
+
# Convert messages to Ollama format (role: user/assistant/system)
|
|
75
|
+
ollama_messages = []
|
|
76
|
+
for msg in messages:
|
|
77
|
+
role = msg.get('role', 'user')
|
|
78
|
+
# Map OpenAI roles to Ollama roles
|
|
79
|
+
if role == 'system':
|
|
80
|
+
ollama_role = 'system'
|
|
81
|
+
elif role == 'assistant':
|
|
82
|
+
ollama_role = 'assistant'
|
|
83
|
+
else:
|
|
84
|
+
ollama_role = 'user'
|
|
85
|
+
|
|
86
|
+
ollama_messages.append({
|
|
87
|
+
'role': ollama_role,
|
|
88
|
+
'content': msg.get('content', '')
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
response = requests.post(
|
|
92
|
+
f"{self.base_url}/api/chat",
|
|
93
|
+
json={
|
|
94
|
+
"model": self.model,
|
|
95
|
+
"messages": ollama_messages,
|
|
96
|
+
"stream": stream
|
|
97
|
+
},
|
|
98
|
+
stream=stream,
|
|
99
|
+
timeout=120 # Increased timeout for slower models
|
|
100
|
+
)
|
|
101
|
+
response.raise_for_status()
|
|
102
|
+
|
|
103
|
+
if stream:
|
|
104
|
+
for line in response.iter_lines():
|
|
105
|
+
if line:
|
|
106
|
+
try:
|
|
107
|
+
chunk = json.loads(line)
|
|
108
|
+
if 'message' in chunk and 'content' in chunk['message']:
|
|
109
|
+
content = chunk['message']['content']
|
|
110
|
+
if content:
|
|
111
|
+
yield content
|
|
112
|
+
elif 'response' in chunk:
|
|
113
|
+
# Fallback for generate API format
|
|
114
|
+
yield chunk['response']
|
|
115
|
+
except json.JSONDecodeError:
|
|
116
|
+
continue
|
|
117
|
+
else:
|
|
118
|
+
result = response.json()
|
|
119
|
+
if 'message' in result and 'content' in result['message']:
|
|
120
|
+
yield result['message']['content']
|
|
121
|
+
else:
|
|
122
|
+
yield result.get('response', '')
|
|
123
|
+
|
|
124
|
+
except requests.exceptions.Timeout:
|
|
125
|
+
yield f"Error: Request to Ollama timed out after 120 seconds.\n"
|
|
126
|
+
yield "The model might be slow to respond. Try:\n"
|
|
127
|
+
yield " 1. Using a smaller/faster model\n"
|
|
128
|
+
yield " 2. Checking system resources (CPU/RAM)\n"
|
|
129
|
+
yield " 3. Using a cloud provider (Groq/Together AI)\n"
|
|
130
|
+
except Exception as e:
|
|
131
|
+
yield f"Error connecting to Ollama: {e}\n"
|
|
132
|
+
yield "Make sure Ollama is running: ollama serve\n"
|
|
133
|
+
|
|
134
|
+
def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
|
|
135
|
+
"""Convert chat messages to a single prompt."""
|
|
136
|
+
prompt = ""
|
|
137
|
+
for msg in messages:
|
|
138
|
+
role = msg.get('role', 'user').upper()
|
|
139
|
+
content = msg.get('content', '')
|
|
140
|
+
prompt += f"{role}: {content}\n\n"
|
|
141
|
+
prompt += "ASSISTANT: "
|
|
142
|
+
return prompt
|
|
143
|
+
|
|
144
|
+
def is_available(self) -> bool:
|
|
145
|
+
"""Check if Ollama is running."""
|
|
146
|
+
try:
|
|
147
|
+
response = requests.get(f"{self.base_url}/api/tags", timeout=2)
|
|
148
|
+
return response.status_code == 200
|
|
149
|
+
except:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class LLMClientFactory:
|
|
154
|
+
"""Factory to create the best available LLM client."""
|
|
155
|
+
|
|
156
|
+
@staticmethod
|
|
157
|
+
def create_client(preferred_provider: str = None) -> BaseLLMClient:
|
|
158
|
+
"""
|
|
159
|
+
Create an LLM client, trying providers in order of preference.
|
|
160
|
+
|
|
161
|
+
Priority is hardware-aware:
|
|
162
|
+
- High RAM (8GB+): Ollama → Groq → Together → Mock
|
|
163
|
+
- Low RAM (<8GB): Groq → Together → Ollama → Mock
|
|
164
|
+
"""
|
|
165
|
+
from coding_assistant.config.settings import settings
|
|
166
|
+
# Import here to avoid import issues if psutil not available
|
|
167
|
+
from coding_assistant.utils.hardware import HardwareDetector
|
|
168
|
+
|
|
169
|
+
# Check if user specified a preferred provider
|
|
170
|
+
if preferred_provider:
|
|
171
|
+
client = LLMClientFactory._create_specific(preferred_provider)
|
|
172
|
+
if client and client.is_available():
|
|
173
|
+
return client
|
|
174
|
+
else:
|
|
175
|
+
print(f"⚠️ {preferred_provider} not available, trying alternatives...")
|
|
176
|
+
|
|
177
|
+
# Get hardware info for smart provider selection
|
|
178
|
+
hw_info = HardwareDetector.get_hardware_info()
|
|
179
|
+
|
|
180
|
+
# Import here to avoid circular import
|
|
181
|
+
from coding_assistant.llm.groq_client import GroqClient
|
|
182
|
+
from coding_assistant.llm.together_client import TogetherClient
|
|
183
|
+
from coding_assistant.llm.gemini_client import GeminiClient
|
|
184
|
+
|
|
185
|
+
# Build provider list based on hardware capabilities
|
|
186
|
+
if hw_info.can_run_local:
|
|
187
|
+
# Sufficient RAM for local models - prefer Ollama
|
|
188
|
+
providers = [
|
|
189
|
+
('Ollama', lambda: OllamaClient(
|
|
190
|
+
model=settings.ollama_model,
|
|
191
|
+
base_url=settings.ollama_base_url
|
|
192
|
+
)),
|
|
193
|
+
('Groq', lambda: GroqClient(
|
|
194
|
+
api_key=settings.groq_api_key,
|
|
195
|
+
model=settings.groq_model
|
|
196
|
+
)),
|
|
197
|
+
('Gemini', lambda: GeminiClient(
|
|
198
|
+
api_key=settings.gemini_api_key,
|
|
199
|
+
model=settings.gemini_model
|
|
200
|
+
)),
|
|
201
|
+
('Together AI', lambda: TogetherClient(
|
|
202
|
+
api_key=settings.together_api_key,
|
|
203
|
+
model=settings.together_model
|
|
204
|
+
)),
|
|
205
|
+
('Mock', MockLLMClient),
|
|
206
|
+
]
|
|
207
|
+
else:
|
|
208
|
+
# Limited RAM - prefer cloud providers
|
|
209
|
+
providers = [
|
|
210
|
+
('Groq', lambda: GroqClient(
|
|
211
|
+
api_key=settings.groq_api_key,
|
|
212
|
+
model=settings.groq_model
|
|
213
|
+
)),
|
|
214
|
+
('Gemini', lambda: GeminiClient(
|
|
215
|
+
api_key=settings.gemini_api_key,
|
|
216
|
+
model=settings.gemini_model
|
|
217
|
+
)),
|
|
218
|
+
('Together AI', lambda: TogetherClient(
|
|
219
|
+
api_key=settings.together_api_key,
|
|
220
|
+
model=settings.together_model
|
|
221
|
+
)),
|
|
222
|
+
('Ollama', lambda: OllamaClient(
|
|
223
|
+
model=settings.ollama_model,
|
|
224
|
+
base_url=settings.ollama_base_url
|
|
225
|
+
)),
|
|
226
|
+
('Mock', MockLLMClient),
|
|
227
|
+
]
|
|
228
|
+
|
|
229
|
+
# Try providers in order
|
|
230
|
+
for name, ClientClass in providers:
|
|
231
|
+
try:
|
|
232
|
+
client = ClientClass()
|
|
233
|
+
if client.is_available():
|
|
234
|
+
from rich.console import Console
|
|
235
|
+
console = Console()
|
|
236
|
+
console.print(f"[bold #10B981]✓[/bold #10B981] [bold #8B5CF6]Using {name}[/bold #8B5CF6] [dim]LLM provider[/dim]")
|
|
237
|
+
return client
|
|
238
|
+
except Exception as e:
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
# Fallback to mock (should never reach here)
|
|
242
|
+
print("⚠️ No LLM providers available, using Mock client")
|
|
243
|
+
print("💡 Tip: Run 'assistant config check-hardware' for setup recommendations")
|
|
244
|
+
return MockLLMClient()
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def _create_specific(provider_name: str) -> BaseLLMClient:
|
|
248
|
+
"""Create a specific provider by name."""
|
|
249
|
+
from coding_assistant.config.settings import settings
|
|
250
|
+
# Import here to avoid circular import
|
|
251
|
+
from coding_assistant.llm.groq_client import GroqClient
|
|
252
|
+
from coding_assistant.llm.together_client import TogetherClient
|
|
253
|
+
from coding_assistant.llm.gemini_client import GeminiClient
|
|
254
|
+
|
|
255
|
+
if provider_name.lower() == 'ollama':
|
|
256
|
+
return OllamaClient(
|
|
257
|
+
model=settings.ollama_model,
|
|
258
|
+
base_url=settings.ollama_base_url
|
|
259
|
+
)
|
|
260
|
+
elif provider_name.lower() == 'groq':
|
|
261
|
+
return GroqClient(
|
|
262
|
+
api_key=settings.groq_api_key,
|
|
263
|
+
model=settings.groq_model
|
|
264
|
+
)
|
|
265
|
+
elif provider_name.lower() == 'gemini':
|
|
266
|
+
return GeminiClient(
|
|
267
|
+
api_key=settings.gemini_api_key,
|
|
268
|
+
model=settings.gemini_model
|
|
269
|
+
)
|
|
270
|
+
elif provider_name.lower() == 'together':
|
|
271
|
+
return TogetherClient(
|
|
272
|
+
api_key=settings.together_api_key,
|
|
273
|
+
model=settings.together_model
|
|
274
|
+
)
|
|
275
|
+
elif provider_name.lower() == 'mock':
|
|
276
|
+
return MockLLMClient()
|
|
277
|
+
return None
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Google Gemini LLM client implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Dict, Iterator, Optional
|
|
4
|
+
import requests
|
|
5
|
+
import json
|
|
6
|
+
from coding_assistant.llm.client import BaseLLMClient
|
|
7
|
+
from coding_assistant.exceptions.llm import (
|
|
8
|
+
LLMConnectionError,
|
|
9
|
+
LLMResponseError,
|
|
10
|
+
LLMTimeoutError
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GeminiClient(BaseLLMClient):
|
|
15
|
+
"""Google Gemini cloud LLM client."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
api_key: Optional[str] = None,
|
|
20
|
+
model: str = "gemini-2.0-flash-exp",
|
|
21
|
+
base_url: str = "https://generativelanguage.googleapis.com/v1beta"
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Initialize Gemini client.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
api_key: Gemini API key
|
|
28
|
+
model: Model name (gemini-2.0-flash-exp, gemini-1.5-pro, etc.)
|
|
29
|
+
base_url: API base URL
|
|
30
|
+
"""
|
|
31
|
+
self.api_key = api_key
|
|
32
|
+
self.model = model
|
|
33
|
+
self.base_url = base_url
|
|
34
|
+
|
|
35
|
+
def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
|
|
36
|
+
"""
|
|
37
|
+
Generate response from Gemini.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
messages: List of message dicts with 'role' and 'content'
|
|
41
|
+
stream: Whether to stream the response
|
|
42
|
+
|
|
43
|
+
Yields:
|
|
44
|
+
Response chunks
|
|
45
|
+
"""
|
|
46
|
+
if not self.api_key:
|
|
47
|
+
raise LLMConnectionError(
|
|
48
|
+
provider="gemini",
|
|
49
|
+
endpoint=self.base_url,
|
|
50
|
+
reason="API key not set"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Convert messages to Gemini format
|
|
54
|
+
gemini_contents = []
|
|
55
|
+
for msg in messages:
|
|
56
|
+
role = msg.get('role', 'user')
|
|
57
|
+
# Map roles: user -> user, assistant -> model, system -> user (with prefix)
|
|
58
|
+
if role == 'system':
|
|
59
|
+
gemini_contents.append({
|
|
60
|
+
"role": "user",
|
|
61
|
+
"parts": [{"text": f"System: {msg.get('content', '')}"}]
|
|
62
|
+
})
|
|
63
|
+
elif role == 'assistant':
|
|
64
|
+
gemini_contents.append({
|
|
65
|
+
"role": "model",
|
|
66
|
+
"parts": [{"text": msg.get('content', '')}]
|
|
67
|
+
})
|
|
68
|
+
else: # user
|
|
69
|
+
gemini_contents.append({
|
|
70
|
+
"role": "user",
|
|
71
|
+
"parts": [{"text": msg.get('content', '')}]
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
payload = {
|
|
75
|
+
"contents": gemini_contents
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# Gemini uses API key as query parameter
|
|
80
|
+
url = f"{self.base_url}/models/{self.model}:{'streamGenerateContent' if stream else 'generateContent'}"
|
|
81
|
+
params = {"key": self.api_key}
|
|
82
|
+
|
|
83
|
+
response = requests.post(
|
|
84
|
+
url,
|
|
85
|
+
params=params,
|
|
86
|
+
headers={"Content-Type": "application/json"},
|
|
87
|
+
json=payload,
|
|
88
|
+
stream=stream,
|
|
89
|
+
timeout=120
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Check for errors
|
|
93
|
+
if response.status_code == 400:
|
|
94
|
+
raise LLMResponseError(
|
|
95
|
+
message="Invalid request. Check your message format.",
|
|
96
|
+
provider="gemini",
|
|
97
|
+
status_code=response.status_code,
|
|
98
|
+
response_text=response.text
|
|
99
|
+
)
|
|
100
|
+
elif response.status_code == 401 or response.status_code == 403:
|
|
101
|
+
raise LLMResponseError(
|
|
102
|
+
message="Authentication failed. Check your API key.",
|
|
103
|
+
provider="gemini",
|
|
104
|
+
status_code=response.status_code,
|
|
105
|
+
response_text=response.text
|
|
106
|
+
)
|
|
107
|
+
elif response.status_code == 429:
|
|
108
|
+
raise LLMResponseError(
|
|
109
|
+
message="Rate limit exceeded. Please wait and try again.",
|
|
110
|
+
provider="gemini",
|
|
111
|
+
status_code=429,
|
|
112
|
+
response_text=response.text
|
|
113
|
+
)
|
|
114
|
+
elif response.status_code >= 400:
|
|
115
|
+
raise LLMResponseError(
|
|
116
|
+
message=f"API error: {response.status_code}",
|
|
117
|
+
provider="gemini",
|
|
118
|
+
status_code=response.status_code,
|
|
119
|
+
response_text=response.text
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if stream:
|
|
123
|
+
# Parse streaming response (JSON lines)
|
|
124
|
+
for line in response.iter_lines():
|
|
125
|
+
if line:
|
|
126
|
+
try:
|
|
127
|
+
chunk = json.loads(line)
|
|
128
|
+
# Extract content from candidates
|
|
129
|
+
if 'candidates' in chunk and len(chunk['candidates']) > 0:
|
|
130
|
+
candidate = chunk['candidates'][0]
|
|
131
|
+
if 'content' in candidate and 'parts' in candidate['content']:
|
|
132
|
+
for part in candidate['content']['parts']:
|
|
133
|
+
if 'text' in part:
|
|
134
|
+
yield part['text']
|
|
135
|
+
except json.JSONDecodeError:
|
|
136
|
+
continue
|
|
137
|
+
else:
|
|
138
|
+
# Non-streaming response
|
|
139
|
+
result = response.json()
|
|
140
|
+
if 'candidates' in result and len(result['candidates']) > 0:
|
|
141
|
+
candidate = result['candidates'][0]
|
|
142
|
+
if 'content' in candidate and 'parts' in candidate['content']:
|
|
143
|
+
for part in candidate['content']['parts']:
|
|
144
|
+
if 'text' in part:
|
|
145
|
+
yield part['text']
|
|
146
|
+
|
|
147
|
+
except requests.exceptions.Timeout:
|
|
148
|
+
raise LLMTimeoutError(
|
|
149
|
+
provider="gemini",
|
|
150
|
+
timeout_seconds=120
|
|
151
|
+
)
|
|
152
|
+
except requests.exceptions.ConnectionError as e:
|
|
153
|
+
raise LLMConnectionError(
|
|
154
|
+
provider="gemini",
|
|
155
|
+
endpoint=self.base_url,
|
|
156
|
+
reason=str(e)
|
|
157
|
+
)
|
|
158
|
+
except (LLMConnectionError, LLMResponseError, LLMTimeoutError):
|
|
159
|
+
# Re-raise our custom exceptions
|
|
160
|
+
raise
|
|
161
|
+
except Exception as e:
|
|
162
|
+
raise LLMConnectionError(
|
|
163
|
+
provider="gemini",
|
|
164
|
+
endpoint=self.base_url,
|
|
165
|
+
reason=f"Unexpected error: {str(e)}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def is_available(self) -> bool:
|
|
169
|
+
"""
|
|
170
|
+
Check if Gemini is available.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
True if API key is set
|
|
174
|
+
"""
|
|
175
|
+
# Basic check: API key is set
|
|
176
|
+
if not self.api_key:
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
# Could optionally validate with API call here
|
|
180
|
+
# but keeping it simple for now to avoid unnecessary API calls
|
|
181
|
+
return True
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Groq LLM client implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Dict, Iterator, Optional
|
|
4
|
+
import requests
|
|
5
|
+
import json
|
|
6
|
+
from coding_assistant.llm.client import BaseLLMClient
|
|
7
|
+
from coding_assistant.exceptions.llm import (
|
|
8
|
+
LLMConnectionError,
|
|
9
|
+
LLMResponseError,
|
|
10
|
+
LLMTimeoutError
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GroqClient(BaseLLMClient):
|
|
15
|
+
"""Groq cloud LLM client (OpenAI-compatible API)."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
api_key: Optional[str] = None,
|
|
20
|
+
model: str = "llama-3.3-70b-versatile",
|
|
21
|
+
base_url: str = "https://api.groq.com/openai/v1"
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Initialize Groq client.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
api_key: Groq API key
|
|
28
|
+
model: Model name (default: qwen-2.5-coder-32b-instruct)
|
|
29
|
+
base_url: API base URL
|
|
30
|
+
"""
|
|
31
|
+
self.api_key = api_key
|
|
32
|
+
self.model = model
|
|
33
|
+
self.base_url = base_url
|
|
34
|
+
|
|
35
|
+
def generate(self, messages: List[Dict[str, str]], stream: bool = True) -> Iterator[str]:
|
|
36
|
+
"""
|
|
37
|
+
Generate response from Groq.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
messages: List of message dicts with 'role' and 'content'
|
|
41
|
+
stream: Whether to stream the response
|
|
42
|
+
|
|
43
|
+
Yields:
|
|
44
|
+
Response chunks
|
|
45
|
+
"""
|
|
46
|
+
if not self.api_key:
|
|
47
|
+
raise LLMConnectionError(
|
|
48
|
+
provider="groq",
|
|
49
|
+
endpoint=self.base_url,
|
|
50
|
+
reason="API key not set"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
headers = {
|
|
54
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
55
|
+
"Content-Type": "application/json"
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
payload = {
|
|
59
|
+
"model": self.model,
|
|
60
|
+
"messages": messages,
|
|
61
|
+
"stream": stream
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
response = requests.post(
|
|
66
|
+
f"{self.base_url}/chat/completions",
|
|
67
|
+
headers=headers,
|
|
68
|
+
json=payload,
|
|
69
|
+
stream=stream,
|
|
70
|
+
timeout=120
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Check for errors
|
|
74
|
+
if response.status_code == 401 or response.status_code == 403:
|
|
75
|
+
raise LLMResponseError(
|
|
76
|
+
message="Authentication failed. Check your API key.",
|
|
77
|
+
provider="groq",
|
|
78
|
+
status_code=response.status_code,
|
|
79
|
+
response_text=response.text
|
|
80
|
+
)
|
|
81
|
+
elif response.status_code == 429:
|
|
82
|
+
raise LLMResponseError(
|
|
83
|
+
message="Rate limit exceeded. Please wait and try again.",
|
|
84
|
+
provider="groq",
|
|
85
|
+
status_code=429,
|
|
86
|
+
response_text=response.text
|
|
87
|
+
)
|
|
88
|
+
elif response.status_code >= 400:
|
|
89
|
+
raise LLMResponseError(
|
|
90
|
+
message=f"API error: {response.status_code}",
|
|
91
|
+
provider="groq",
|
|
92
|
+
status_code=response.status_code,
|
|
93
|
+
response_text=response.text
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if stream:
|
|
97
|
+
# Parse Server-Sent Events (SSE)
|
|
98
|
+
for line in response.iter_lines():
|
|
99
|
+
if line:
|
|
100
|
+
line_str = line.decode('utf-8')
|
|
101
|
+
# SSE format: "data: {json}"
|
|
102
|
+
if line_str.startswith('data: '):
|
|
103
|
+
data_str = line_str[6:] # Remove "data: " prefix
|
|
104
|
+
|
|
105
|
+
# Check for end of stream
|
|
106
|
+
if data_str.strip() == '[DONE]':
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
chunk = json.loads(data_str)
|
|
111
|
+
# Extract content from delta
|
|
112
|
+
if 'choices' in chunk and len(chunk['choices']) > 0:
|
|
113
|
+
delta = chunk['choices'][0].get('delta', {})
|
|
114
|
+
content = delta.get('content', '')
|
|
115
|
+
if content:
|
|
116
|
+
yield content
|
|
117
|
+
except json.JSONDecodeError:
|
|
118
|
+
continue
|
|
119
|
+
else:
|
|
120
|
+
# Non-streaming response
|
|
121
|
+
result = response.json()
|
|
122
|
+
if 'choices' in result and len(result['choices']) > 0:
|
|
123
|
+
content = result['choices'][0]['message']['content']
|
|
124
|
+
yield content
|
|
125
|
+
|
|
126
|
+
except requests.exceptions.Timeout:
|
|
127
|
+
raise LLMTimeoutError(
|
|
128
|
+
provider="groq",
|
|
129
|
+
timeout_seconds=120
|
|
130
|
+
)
|
|
131
|
+
except requests.exceptions.ConnectionError as e:
|
|
132
|
+
raise LLMConnectionError(
|
|
133
|
+
provider="groq",
|
|
134
|
+
endpoint=self.base_url,
|
|
135
|
+
reason=str(e)
|
|
136
|
+
)
|
|
137
|
+
except (LLMConnectionError, LLMResponseError, LLMTimeoutError):
|
|
138
|
+
# Re-raise our custom exceptions
|
|
139
|
+
raise
|
|
140
|
+
except Exception as e:
|
|
141
|
+
raise LLMConnectionError(
|
|
142
|
+
provider="groq",
|
|
143
|
+
endpoint=self.base_url,
|
|
144
|
+
reason=f"Unexpected error: {str(e)}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def is_available(self) -> bool:
|
|
148
|
+
"""
|
|
149
|
+
Check if Groq is available.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
True if API key is set
|
|
153
|
+
"""
|
|
154
|
+
# Basic check: API key is set
|
|
155
|
+
if not self.api_key:
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
# Could optionally validate with API call here
|
|
159
|
+
# but keeping it simple for now to avoid unnecessary API calls
|
|
160
|
+
return True
|