codeshield-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ """
2
+ LLM Utility - Multi-provider AI integration
3
+
4
+ Supports (in priority order):
5
+ - CometAPI (primary) - OpenAI-compatible unified gateway to 100+ models
6
+ Docs: https://apidoc.cometapi.com/
7
+ - Novita.ai (secondary) - Cost-effective open-source model inference
8
+ Docs: https://novita.ai/docs/guides/llm-api
9
+ - AI/ML API (fallback)
10
+
11
+ Provider chain ensures high availability with automatic fallback.
12
+ """
13
+
14
+ import os
15
+ import time
16
+ import httpx
17
+ from typing import Optional
18
+ from dataclasses import dataclass
19
+
20
+
21
+ @dataclass
22
+ class LLMResponse:
23
+ """Response from LLM"""
24
+ content: str
25
+ provider: str
26
+ model: str
27
+ tokens_used: int = 0
28
+ input_tokens: int = 0
29
+ output_tokens: int = 0
30
+ latency_ms: int = 0
31
+
32
+
33
+ # Track provider usage for observability
34
+ _provider_stats = {
35
+ "cometapi": {"calls": 0, "errors": 0, "tokens": 0, "input_tokens": 0, "output_tokens": 0, "latency_ms": 0},
36
+ "novita": {"calls": 0, "errors": 0, "tokens": 0, "input_tokens": 0, "output_tokens": 0, "latency_ms": 0},
37
+ "aiml": {"calls": 0, "errors": 0, "tokens": 0, "input_tokens": 0, "output_tokens": 0, "latency_ms": 0},
38
+ }
39
+
40
+
41
+ def get_provider_stats() -> dict:
42
+ """Get usage statistics for all LLM providers with efficiency metrics"""
43
+ stats = {}
44
+ for provider, data in _provider_stats.items():
45
+ stats[provider] = data.copy()
46
+ # Calculate efficiency metrics
47
+ if data["input_tokens"] > 0:
48
+ stats[provider]["token_efficiency"] = round(data["output_tokens"] / data["input_tokens"], 3)
49
+ else:
50
+ stats[provider]["token_efficiency"] = 0.0
51
+ if data["calls"] > 0:
52
+ stats[provider]["avg_tokens_per_call"] = round(data["tokens"] / data["calls"], 1)
53
+ stats[provider]["avg_latency_ms"] = round(data["latency_ms"] / data["calls"], 1)
54
+ stats[provider]["error_rate"] = round((data["errors"] / data["calls"]) * 100, 2)
55
+ else:
56
+ stats[provider]["avg_tokens_per_call"] = 0.0
57
+ stats[provider]["avg_latency_ms"] = 0.0
58
+ stats[provider]["error_rate"] = 0.0
59
+ return stats
60
+
61
+
62
+ class LLMClient:
63
+ """
64
+ Multi-provider LLM client with automatic fallback.
65
+
66
+ Provider Priority:
67
+ 1. CometAPI - Unified gateway with free tier models (deepseek-chat)
68
+ 2. Novita.ai - OpenAI-compatible API for open-source models
69
+ 3. AIML API - Backup provider
70
+
71
+ All providers use OpenAI-compatible /v1/chat/completions endpoint.
72
+ """
73
+
74
+ PROVIDERS = {
75
+ "cometapi": {
76
+ "base_url": "https://api.cometapi.com/v1",
77
+ "env_key": "COMETAPI_KEY",
78
+ "default_model": "deepseek-chat", # Free model on CometAPI
79
+ "free_models": ["deepseek-chat", "deepseek-reasoner", "llama-4-maverick"],
80
+ "description": "CometAPI - Unified AI gateway (100+ models)",
81
+ },
82
+ "novita": {
83
+ "base_url": "https://api.novita.ai/openai/v1",
84
+ "env_key": "NOVITA_API_KEY",
85
+ "default_model": "deepseek/deepseek-r1", # Strong open-source model
86
+ "free_models": ["meta-llama/llama-3-8b-instruct"],
87
+ "description": "Novita.ai - Cost-effective inference platform",
88
+ },
89
+ "aiml": {
90
+ "base_url": "https://api.aimlapi.com/v1",
91
+ "env_key": "AIML_API_KEY",
92
+ "default_model": "gpt-4o-mini",
93
+ "description": "AIML API - Fallback provider",
94
+ },
95
+ }
96
+
97
+ def __init__(self, preferred_provider: Optional[str] = None):
98
+ self.preferred_provider = preferred_provider
99
+ self._client = httpx.Client(timeout=60.0)
100
+
101
+ def get_status(self) -> dict:
102
+ """
103
+ Get status of all configured LLM providers.
104
+ Useful for observability and debugging connectivity.
105
+ """
106
+ status = {}
107
+ for name, config in self.PROVIDERS.items():
108
+ api_key = os.getenv(config["env_key"])
109
+ status[name] = {
110
+ "configured": bool(api_key),
111
+ "env_var": config["env_key"],
112
+ "base_url": config["base_url"],
113
+ "default_model": config["default_model"],
114
+ "description": config.get("description", ""),
115
+ "stats": _provider_stats.get(name, {}),
116
+ }
117
+ return status
118
+
119
+ def _get_available_provider(self) -> Optional[tuple[str, dict]]:
120
+ """Get first available provider with valid API key"""
121
+ order = [self.preferred_provider] if self.preferred_provider else []
122
+ order.extend(["cometapi", "novita", "aiml"])
123
+
124
+ for name in order:
125
+ if name and name in self.PROVIDERS:
126
+ config = self.PROVIDERS[name]
127
+ api_key = os.getenv(config["env_key"])
128
+ if api_key:
129
+ return name, config
130
+ return None
131
+
132
+ def chat(
133
+ self,
134
+ prompt: str,
135
+ system_prompt: Optional[str] = None,
136
+ model: Optional[str] = None,
137
+ max_tokens: int = 1000,
138
+ ) -> Optional[LLMResponse]:
139
+ """
140
+ Send chat completion request.
141
+
142
+ Args:
143
+ prompt: User prompt
144
+ system_prompt: Optional system prompt
145
+ model: Optional model override
146
+ max_tokens: Maximum tokens in response
147
+
148
+ Returns:
149
+ LLMResponse or None if all providers fail
150
+ """
151
+ provider_info = self._get_available_provider()
152
+ if not provider_info:
153
+ return None
154
+
155
+ provider_name, config = provider_info
156
+ api_key = os.getenv(config["env_key"])
157
+
158
+ messages = []
159
+ if system_prompt:
160
+ messages.append({"role": "system", "content": system_prompt})
161
+ messages.append({"role": "user", "content": prompt})
162
+
163
+ # Track call attempt and timing
164
+ _provider_stats[provider_name]["calls"] += 1
165
+ start_time = time.time()
166
+
167
+ try:
168
+ # Use httpx directly (most reliable) - OpenAI-compatible endpoint
169
+ response = self._client.post(
170
+ f"{config['base_url']}/chat/completions",
171
+ headers={
172
+ "Authorization": f"Bearer {api_key}",
173
+ "Content-Type": "application/json",
174
+ },
175
+ json={
176
+ "model": model or config["default_model"],
177
+ "messages": messages,
178
+ "max_tokens": max_tokens,
179
+ },
180
+ )
181
+ response.raise_for_status()
182
+ data = response.json()
183
+
184
+ # Extract token usage with efficiency tracking
185
+ usage = data.get("usage", {})
186
+ input_tokens = usage.get("prompt_tokens", 0)
187
+ output_tokens = usage.get("completion_tokens", 0)
188
+ total_tokens = usage.get("total_tokens", input_tokens + output_tokens)
189
+ latency_ms = int((time.time() - start_time) * 1000)
190
+
191
+ # Update provider stats
192
+ _provider_stats[provider_name]["tokens"] += total_tokens
193
+ _provider_stats[provider_name]["input_tokens"] += input_tokens
194
+ _provider_stats[provider_name]["output_tokens"] += output_tokens
195
+ _provider_stats[provider_name]["latency_ms"] += latency_ms
196
+
197
+ # Track in metrics system
198
+ try:
199
+ from codeshield.utils.metrics import get_metrics
200
+ get_metrics().track_tokens(provider_name, input_tokens, output_tokens, success=True)
201
+ except ImportError:
202
+ pass
203
+
204
+ return LLMResponse(
205
+ content=data["choices"][0]["message"]["content"],
206
+ provider=provider_name,
207
+ model=model or config["default_model"],
208
+ tokens_used=total_tokens,
209
+ input_tokens=input_tokens,
210
+ output_tokens=output_tokens,
211
+ latency_ms=latency_ms,
212
+ )
213
+ except Exception as e:
214
+ print(f"LLM error ({provider_name}): {e}")
215
+ _provider_stats[provider_name]["errors"] += 1
216
+ # Try fallback chain: cometapi -> novita -> aiml
217
+ if provider_name == "cometapi":
218
+ return self._try_novita(prompt, system_prompt, model, max_tokens)
219
+ elif provider_name == "novita":
220
+ return self._try_aiml(prompt, system_prompt, model, max_tokens)
221
+ return None
222
+
223
+ def _try_novita(self, prompt: str, system_prompt: Optional[str], model: Optional[str], max_tokens: int) -> Optional[LLMResponse]:
224
+ """Fallback to Novita.ai API"""
225
+ config = self.PROVIDERS.get("novita")
226
+ if not config:
227
+ return self._try_aiml(prompt, system_prompt, model, max_tokens)
228
+
229
+ api_key = os.getenv(config["env_key"])
230
+ if not api_key:
231
+ return self._try_aiml(prompt, system_prompt, model, max_tokens)
232
+
233
+ messages = []
234
+ if system_prompt:
235
+ messages.append({"role": "system", "content": system_prompt})
236
+ messages.append({"role": "user", "content": prompt})
237
+
238
+ _provider_stats["novita"]["calls"] += 1
239
+
240
+ try:
241
+ response = self._client.post(
242
+ f"{config['base_url']}/chat/completions",
243
+ headers={
244
+ "Authorization": f"Bearer {api_key}",
245
+ "Content-Type": "application/json",
246
+ },
247
+ json={
248
+ "model": model or config["default_model"],
249
+ "messages": messages,
250
+ "max_tokens": max_tokens,
251
+ },
252
+ )
253
+ response.raise_for_status()
254
+ data = response.json()
255
+
256
+ tokens = data.get("usage", {}).get("total_tokens", 0)
257
+ _provider_stats["novita"]["tokens"] += tokens
258
+
259
+ return LLMResponse(
260
+ content=data["choices"][0]["message"]["content"],
261
+ provider="novita",
262
+ model=model or config["default_model"],
263
+ tokens_used=tokens,
264
+ )
265
+ except Exception as e:
266
+ print(f"Novita error: {e}")
267
+ _provider_stats["novita"]["errors"] += 1
268
+ return self._try_aiml(prompt, system_prompt, model, max_tokens)
269
+
270
+ def _try_aiml(self, prompt: str, system_prompt: Optional[str], model: Optional[str], max_tokens: int) -> Optional[LLMResponse]:
271
+ """Fallback to AIML API"""
272
+ config = self.PROVIDERS.get("aiml")
273
+ if not config:
274
+ return None
275
+
276
+ api_key = os.getenv(config["env_key"])
277
+ if not api_key:
278
+ return None
279
+
280
+ messages = []
281
+ if system_prompt:
282
+ messages.append({"role": "system", "content": system_prompt})
283
+ messages.append({"role": "user", "content": prompt})
284
+
285
+ _provider_stats["aiml"]["calls"] += 1
286
+
287
+ try:
288
+ response = self._client.post(
289
+ f"{config['base_url']}/chat/completions",
290
+ headers={
291
+ "Authorization": f"Bearer {api_key}",
292
+ "Content-Type": "application/json",
293
+ },
294
+ json={
295
+ "model": model or config["default_model"],
296
+ "messages": messages,
297
+ "max_tokens": max_tokens,
298
+ },
299
+ )
300
+ response.raise_for_status()
301
+ data = response.json()
302
+
303
+ tokens = data.get("usage", {}).get("total_tokens", 0)
304
+ _provider_stats["aiml"]["tokens"] += tokens
305
+
306
+ return LLMResponse(
307
+ content=data["choices"][0]["message"]["content"],
308
+ provider="aiml",
309
+ model=model or config["default_model"],
310
+ tokens_used=tokens,
311
+ )
312
+ except Exception as e:
313
+ print(f"AIML error: {e}")
314
+ _provider_stats["aiml"]["errors"] += 1
315
+ return None
316
+
317
+ def generate_fix(self, code: str, issues: list[str]) -> Optional[str]:
318
+ """Generate code fix using LLM - MAXIMUM TOKEN EFFICIENCY"""
319
+ from codeshield.utils.token_optimizer import (
320
+ get_token_optimizer, optimize_fix_prompt,
321
+ LocalProcessor, ModelTier, get_optimal_max_tokens
322
+ )
323
+
324
+ optimizer = get_token_optimizer()
325
+
326
+ # 1. TRY LOCAL FIX FIRST (0 tokens!)
327
+ local_fix = LocalProcessor.fix_locally(code, issues)
328
+ if local_fix is not None:
329
+ if not hasattr(optimizer, '_local_saves'):
330
+ optimizer._local_saves = 0
331
+ optimizer._local_saves += 1
332
+ return local_fix
333
+
334
+ # 2. Generate optimized prompt
335
+ prompt = optimize_fix_prompt(code, issues)
336
+
337
+ # Check if local fix was signaled
338
+ if prompt == "__LOCAL_FIX__":
339
+ return LocalProcessor.fix_locally(code, issues)
340
+
341
+ system_prompt = "Fix code. Return code only." # Ultra short
342
+
343
+ # 3. Check cache
344
+ cached = optimizer.get_cached(prompt, system_prompt)
345
+ if cached:
346
+ return cached.content
347
+
348
+ # 4. Calculate optimal max_tokens (dynamic based on code size)
349
+ max_tokens = get_optimal_max_tokens("fix", len(code))
350
+
351
+ # 5. Check budget
352
+ estimated = optimizer.estimate_tokens(prompt) + max_tokens
353
+ if not optimizer.check_budget(estimated):
354
+ print("Token budget exceeded")
355
+ return None
356
+
357
+ # 6. Select optimal model for task complexity
358
+ provider_info = self._get_available_provider()
359
+ if provider_info:
360
+ provider_name = provider_info[0]
361
+ optimal_model = ModelTier.select_model(code, issues, provider_name)
362
+ else:
363
+ optimal_model = None
364
+
365
+ response = self.chat(
366
+ prompt=prompt,
367
+ system_prompt=system_prompt,
368
+ model=optimal_model,
369
+ max_tokens=max_tokens,
370
+ )
371
+
372
+ if response:
373
+ # Cache the response
374
+ optimizer.cache_response(prompt, response, system_prompt)
375
+ optimizer.record_usage(response.tokens_used)
376
+
377
+ # Extract code from response
378
+ content = response.content
379
+ if "```python" in content:
380
+ content = content.split("```python")[1].split("```")[0]
381
+ elif "```" in content:
382
+ content = content.split("```")[1].split("```")[0]
383
+ return content.strip()
384
+ return None
385
+
386
+ def generate_context_briefing(self, context: dict) -> Optional[str]:
387
+ """Generate context briefing - MAXIMUM TOKEN EFFICIENCY"""
388
+ from codeshield.utils.token_optimizer import (
389
+ get_token_optimizer, optimize_context_prompt, get_optimal_max_tokens
390
+ )
391
+
392
+ optimizer = get_token_optimizer()
393
+
394
+ # Use optimized prompt
395
+ prompt = optimize_context_prompt(context)
396
+
397
+ # Check cache
398
+ cached = optimizer.get_cached(prompt)
399
+ if cached:
400
+ return cached.content
401
+
402
+ # Dynamic max_tokens (very short for briefings)
403
+ max_tokens = get_optimal_max_tokens("briefing", 0)
404
+
405
+ response = self.chat(prompt=prompt, max_tokens=max_tokens)
406
+
407
+ if response:
408
+ optimizer.cache_response(prompt, response)
409
+ optimizer.record_usage(response.tokens_used)
410
+ return response.content
411
+ return None
412
+
413
+
414
+ # Singleton instance
415
+ _llm_client: Optional[LLMClient] = None
416
+
417
+
418
+ def get_llm_client() -> LLMClient:
419
+ """Get or create LLM client singleton"""
420
+ global _llm_client
421
+ if _llm_client is None:
422
+ _llm_client = LLMClient()
423
+ return _llm_client