codeshield-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeshield/__init__.py +62 -0
- codeshield/api_server.py +438 -0
- codeshield/cli.py +48 -0
- codeshield/contextvault/__init__.py +1 -0
- codeshield/contextvault/capture.py +174 -0
- codeshield/contextvault/restore.py +115 -0
- codeshield/mcp/__init__.py +1 -0
- codeshield/mcp/hooks.py +65 -0
- codeshield/mcp/server.py +319 -0
- codeshield/styleforge/__init__.py +1 -0
- codeshield/styleforge/corrector.py +298 -0
- codeshield/trustgate/__init__.py +1 -0
- codeshield/trustgate/checker.py +384 -0
- codeshield/trustgate/sandbox.py +101 -0
- codeshield/utils/__init__.py +9 -0
- codeshield/utils/daytona.py +233 -0
- codeshield/utils/leanmcp.py +258 -0
- codeshield/utils/llm.py +423 -0
- codeshield/utils/metrics.py +543 -0
- codeshield/utils/token_optimizer.py +605 -0
- codeshield_ai-0.1.0.dist-info/METADATA +565 -0
- codeshield_ai-0.1.0.dist-info/RECORD +24 -0
- codeshield_ai-0.1.0.dist-info/WHEEL +4 -0
- codeshield_ai-0.1.0.dist-info/entry_points.txt +3 -0
codeshield/utils/llm.py
ADDED
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Utility - Multi-provider AI integration
|
|
3
|
+
|
|
4
|
+
Supports (in priority order):
|
|
5
|
+
- CometAPI (primary) - OpenAI-compatible unified gateway to 100+ models
|
|
6
|
+
Docs: https://apidoc.cometapi.com/
|
|
7
|
+
- Novita.ai (secondary) - Cost-effective open-source model inference
|
|
8
|
+
Docs: https://novita.ai/docs/guides/llm-api
|
|
9
|
+
- AI/ML API (fallback)
|
|
10
|
+
|
|
11
|
+
Provider chain ensures high availability with automatic fallback.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
import httpx
|
|
17
|
+
from typing import Optional
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class LLMResponse:
|
|
23
|
+
"""Response from LLM"""
|
|
24
|
+
content: str
|
|
25
|
+
provider: str
|
|
26
|
+
model: str
|
|
27
|
+
tokens_used: int = 0
|
|
28
|
+
input_tokens: int = 0
|
|
29
|
+
output_tokens: int = 0
|
|
30
|
+
latency_ms: int = 0
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Track provider usage for observability
|
|
34
|
+
_provider_stats = {
|
|
35
|
+
"cometapi": {"calls": 0, "errors": 0, "tokens": 0, "input_tokens": 0, "output_tokens": 0, "latency_ms": 0},
|
|
36
|
+
"novita": {"calls": 0, "errors": 0, "tokens": 0, "input_tokens": 0, "output_tokens": 0, "latency_ms": 0},
|
|
37
|
+
"aiml": {"calls": 0, "errors": 0, "tokens": 0, "input_tokens": 0, "output_tokens": 0, "latency_ms": 0},
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_provider_stats() -> dict:
|
|
42
|
+
"""Get usage statistics for all LLM providers with efficiency metrics"""
|
|
43
|
+
stats = {}
|
|
44
|
+
for provider, data in _provider_stats.items():
|
|
45
|
+
stats[provider] = data.copy()
|
|
46
|
+
# Calculate efficiency metrics
|
|
47
|
+
if data["input_tokens"] > 0:
|
|
48
|
+
stats[provider]["token_efficiency"] = round(data["output_tokens"] / data["input_tokens"], 3)
|
|
49
|
+
else:
|
|
50
|
+
stats[provider]["token_efficiency"] = 0.0
|
|
51
|
+
if data["calls"] > 0:
|
|
52
|
+
stats[provider]["avg_tokens_per_call"] = round(data["tokens"] / data["calls"], 1)
|
|
53
|
+
stats[provider]["avg_latency_ms"] = round(data["latency_ms"] / data["calls"], 1)
|
|
54
|
+
stats[provider]["error_rate"] = round((data["errors"] / data["calls"]) * 100, 2)
|
|
55
|
+
else:
|
|
56
|
+
stats[provider]["avg_tokens_per_call"] = 0.0
|
|
57
|
+
stats[provider]["avg_latency_ms"] = 0.0
|
|
58
|
+
stats[provider]["error_rate"] = 0.0
|
|
59
|
+
return stats
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class LLMClient:
|
|
63
|
+
"""
|
|
64
|
+
Multi-provider LLM client with automatic fallback.
|
|
65
|
+
|
|
66
|
+
Provider Priority:
|
|
67
|
+
1. CometAPI - Unified gateway with free tier models (deepseek-chat)
|
|
68
|
+
2. Novita.ai - OpenAI-compatible API for open-source models
|
|
69
|
+
3. AIML API - Backup provider
|
|
70
|
+
|
|
71
|
+
All providers use OpenAI-compatible /v1/chat/completions endpoint.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
PROVIDERS = {
|
|
75
|
+
"cometapi": {
|
|
76
|
+
"base_url": "https://api.cometapi.com/v1",
|
|
77
|
+
"env_key": "COMETAPI_KEY",
|
|
78
|
+
"default_model": "deepseek-chat", # Free model on CometAPI
|
|
79
|
+
"free_models": ["deepseek-chat", "deepseek-reasoner", "llama-4-maverick"],
|
|
80
|
+
"description": "CometAPI - Unified AI gateway (100+ models)",
|
|
81
|
+
},
|
|
82
|
+
"novita": {
|
|
83
|
+
"base_url": "https://api.novita.ai/openai/v1",
|
|
84
|
+
"env_key": "NOVITA_API_KEY",
|
|
85
|
+
"default_model": "deepseek/deepseek-r1", # Strong open-source model
|
|
86
|
+
"free_models": ["meta-llama/llama-3-8b-instruct"],
|
|
87
|
+
"description": "Novita.ai - Cost-effective inference platform",
|
|
88
|
+
},
|
|
89
|
+
"aiml": {
|
|
90
|
+
"base_url": "https://api.aimlapi.com/v1",
|
|
91
|
+
"env_key": "AIML_API_KEY",
|
|
92
|
+
"default_model": "gpt-4o-mini",
|
|
93
|
+
"description": "AIML API - Fallback provider",
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
def __init__(self, preferred_provider: Optional[str] = None):
|
|
98
|
+
self.preferred_provider = preferred_provider
|
|
99
|
+
self._client = httpx.Client(timeout=60.0)
|
|
100
|
+
|
|
101
|
+
def get_status(self) -> dict:
|
|
102
|
+
"""
|
|
103
|
+
Get status of all configured LLM providers.
|
|
104
|
+
Useful for observability and debugging connectivity.
|
|
105
|
+
"""
|
|
106
|
+
status = {}
|
|
107
|
+
for name, config in self.PROVIDERS.items():
|
|
108
|
+
api_key = os.getenv(config["env_key"])
|
|
109
|
+
status[name] = {
|
|
110
|
+
"configured": bool(api_key),
|
|
111
|
+
"env_var": config["env_key"],
|
|
112
|
+
"base_url": config["base_url"],
|
|
113
|
+
"default_model": config["default_model"],
|
|
114
|
+
"description": config.get("description", ""),
|
|
115
|
+
"stats": _provider_stats.get(name, {}),
|
|
116
|
+
}
|
|
117
|
+
return status
|
|
118
|
+
|
|
119
|
+
def _get_available_provider(self) -> Optional[tuple[str, dict]]:
|
|
120
|
+
"""Get first available provider with valid API key"""
|
|
121
|
+
order = [self.preferred_provider] if self.preferred_provider else []
|
|
122
|
+
order.extend(["cometapi", "novita", "aiml"])
|
|
123
|
+
|
|
124
|
+
for name in order:
|
|
125
|
+
if name and name in self.PROVIDERS:
|
|
126
|
+
config = self.PROVIDERS[name]
|
|
127
|
+
api_key = os.getenv(config["env_key"])
|
|
128
|
+
if api_key:
|
|
129
|
+
return name, config
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def chat(
|
|
133
|
+
self,
|
|
134
|
+
prompt: str,
|
|
135
|
+
system_prompt: Optional[str] = None,
|
|
136
|
+
model: Optional[str] = None,
|
|
137
|
+
max_tokens: int = 1000,
|
|
138
|
+
) -> Optional[LLMResponse]:
|
|
139
|
+
"""
|
|
140
|
+
Send chat completion request.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
prompt: User prompt
|
|
144
|
+
system_prompt: Optional system prompt
|
|
145
|
+
model: Optional model override
|
|
146
|
+
max_tokens: Maximum tokens in response
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
LLMResponse or None if all providers fail
|
|
150
|
+
"""
|
|
151
|
+
provider_info = self._get_available_provider()
|
|
152
|
+
if not provider_info:
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
provider_name, config = provider_info
|
|
156
|
+
api_key = os.getenv(config["env_key"])
|
|
157
|
+
|
|
158
|
+
messages = []
|
|
159
|
+
if system_prompt:
|
|
160
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
161
|
+
messages.append({"role": "user", "content": prompt})
|
|
162
|
+
|
|
163
|
+
# Track call attempt and timing
|
|
164
|
+
_provider_stats[provider_name]["calls"] += 1
|
|
165
|
+
start_time = time.time()
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
# Use httpx directly (most reliable) - OpenAI-compatible endpoint
|
|
169
|
+
response = self._client.post(
|
|
170
|
+
f"{config['base_url']}/chat/completions",
|
|
171
|
+
headers={
|
|
172
|
+
"Authorization": f"Bearer {api_key}",
|
|
173
|
+
"Content-Type": "application/json",
|
|
174
|
+
},
|
|
175
|
+
json={
|
|
176
|
+
"model": model or config["default_model"],
|
|
177
|
+
"messages": messages,
|
|
178
|
+
"max_tokens": max_tokens,
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
response.raise_for_status()
|
|
182
|
+
data = response.json()
|
|
183
|
+
|
|
184
|
+
# Extract token usage with efficiency tracking
|
|
185
|
+
usage = data.get("usage", {})
|
|
186
|
+
input_tokens = usage.get("prompt_tokens", 0)
|
|
187
|
+
output_tokens = usage.get("completion_tokens", 0)
|
|
188
|
+
total_tokens = usage.get("total_tokens", input_tokens + output_tokens)
|
|
189
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
190
|
+
|
|
191
|
+
# Update provider stats
|
|
192
|
+
_provider_stats[provider_name]["tokens"] += total_tokens
|
|
193
|
+
_provider_stats[provider_name]["input_tokens"] += input_tokens
|
|
194
|
+
_provider_stats[provider_name]["output_tokens"] += output_tokens
|
|
195
|
+
_provider_stats[provider_name]["latency_ms"] += latency_ms
|
|
196
|
+
|
|
197
|
+
# Track in metrics system
|
|
198
|
+
try:
|
|
199
|
+
from codeshield.utils.metrics import get_metrics
|
|
200
|
+
get_metrics().track_tokens(provider_name, input_tokens, output_tokens, success=True)
|
|
201
|
+
except ImportError:
|
|
202
|
+
pass
|
|
203
|
+
|
|
204
|
+
return LLMResponse(
|
|
205
|
+
content=data["choices"][0]["message"]["content"],
|
|
206
|
+
provider=provider_name,
|
|
207
|
+
model=model or config["default_model"],
|
|
208
|
+
tokens_used=total_tokens,
|
|
209
|
+
input_tokens=input_tokens,
|
|
210
|
+
output_tokens=output_tokens,
|
|
211
|
+
latency_ms=latency_ms,
|
|
212
|
+
)
|
|
213
|
+
except Exception as e:
|
|
214
|
+
print(f"LLM error ({provider_name}): {e}")
|
|
215
|
+
_provider_stats[provider_name]["errors"] += 1
|
|
216
|
+
# Try fallback chain: cometapi -> novita -> aiml
|
|
217
|
+
if provider_name == "cometapi":
|
|
218
|
+
return self._try_novita(prompt, system_prompt, model, max_tokens)
|
|
219
|
+
elif provider_name == "novita":
|
|
220
|
+
return self._try_aiml(prompt, system_prompt, model, max_tokens)
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
def _try_novita(self, prompt: str, system_prompt: Optional[str], model: Optional[str], max_tokens: int) -> Optional[LLMResponse]:
|
|
224
|
+
"""Fallback to Novita.ai API"""
|
|
225
|
+
config = self.PROVIDERS.get("novita")
|
|
226
|
+
if not config:
|
|
227
|
+
return self._try_aiml(prompt, system_prompt, model, max_tokens)
|
|
228
|
+
|
|
229
|
+
api_key = os.getenv(config["env_key"])
|
|
230
|
+
if not api_key:
|
|
231
|
+
return self._try_aiml(prompt, system_prompt, model, max_tokens)
|
|
232
|
+
|
|
233
|
+
messages = []
|
|
234
|
+
if system_prompt:
|
|
235
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
236
|
+
messages.append({"role": "user", "content": prompt})
|
|
237
|
+
|
|
238
|
+
_provider_stats["novita"]["calls"] += 1
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
response = self._client.post(
|
|
242
|
+
f"{config['base_url']}/chat/completions",
|
|
243
|
+
headers={
|
|
244
|
+
"Authorization": f"Bearer {api_key}",
|
|
245
|
+
"Content-Type": "application/json",
|
|
246
|
+
},
|
|
247
|
+
json={
|
|
248
|
+
"model": model or config["default_model"],
|
|
249
|
+
"messages": messages,
|
|
250
|
+
"max_tokens": max_tokens,
|
|
251
|
+
},
|
|
252
|
+
)
|
|
253
|
+
response.raise_for_status()
|
|
254
|
+
data = response.json()
|
|
255
|
+
|
|
256
|
+
tokens = data.get("usage", {}).get("total_tokens", 0)
|
|
257
|
+
_provider_stats["novita"]["tokens"] += tokens
|
|
258
|
+
|
|
259
|
+
return LLMResponse(
|
|
260
|
+
content=data["choices"][0]["message"]["content"],
|
|
261
|
+
provider="novita",
|
|
262
|
+
model=model or config["default_model"],
|
|
263
|
+
tokens_used=tokens,
|
|
264
|
+
)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
print(f"Novita error: {e}")
|
|
267
|
+
_provider_stats["novita"]["errors"] += 1
|
|
268
|
+
return self._try_aiml(prompt, system_prompt, model, max_tokens)
|
|
269
|
+
|
|
270
|
+
def _try_aiml(self, prompt: str, system_prompt: Optional[str], model: Optional[str], max_tokens: int) -> Optional[LLMResponse]:
|
|
271
|
+
"""Fallback to AIML API"""
|
|
272
|
+
config = self.PROVIDERS.get("aiml")
|
|
273
|
+
if not config:
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
api_key = os.getenv(config["env_key"])
|
|
277
|
+
if not api_key:
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
messages = []
|
|
281
|
+
if system_prompt:
|
|
282
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
283
|
+
messages.append({"role": "user", "content": prompt})
|
|
284
|
+
|
|
285
|
+
_provider_stats["aiml"]["calls"] += 1
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
response = self._client.post(
|
|
289
|
+
f"{config['base_url']}/chat/completions",
|
|
290
|
+
headers={
|
|
291
|
+
"Authorization": f"Bearer {api_key}",
|
|
292
|
+
"Content-Type": "application/json",
|
|
293
|
+
},
|
|
294
|
+
json={
|
|
295
|
+
"model": model or config["default_model"],
|
|
296
|
+
"messages": messages,
|
|
297
|
+
"max_tokens": max_tokens,
|
|
298
|
+
},
|
|
299
|
+
)
|
|
300
|
+
response.raise_for_status()
|
|
301
|
+
data = response.json()
|
|
302
|
+
|
|
303
|
+
tokens = data.get("usage", {}).get("total_tokens", 0)
|
|
304
|
+
_provider_stats["aiml"]["tokens"] += tokens
|
|
305
|
+
|
|
306
|
+
return LLMResponse(
|
|
307
|
+
content=data["choices"][0]["message"]["content"],
|
|
308
|
+
provider="aiml",
|
|
309
|
+
model=model or config["default_model"],
|
|
310
|
+
tokens_used=tokens,
|
|
311
|
+
)
|
|
312
|
+
except Exception as e:
|
|
313
|
+
print(f"AIML error: {e}")
|
|
314
|
+
_provider_stats["aiml"]["errors"] += 1
|
|
315
|
+
return None
|
|
316
|
+
|
|
317
|
+
def generate_fix(self, code: str, issues: list[str]) -> Optional[str]:
|
|
318
|
+
"""Generate code fix using LLM - MAXIMUM TOKEN EFFICIENCY"""
|
|
319
|
+
from codeshield.utils.token_optimizer import (
|
|
320
|
+
get_token_optimizer, optimize_fix_prompt,
|
|
321
|
+
LocalProcessor, ModelTier, get_optimal_max_tokens
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
optimizer = get_token_optimizer()
|
|
325
|
+
|
|
326
|
+
# 1. TRY LOCAL FIX FIRST (0 tokens!)
|
|
327
|
+
local_fix = LocalProcessor.fix_locally(code, issues)
|
|
328
|
+
if local_fix is not None:
|
|
329
|
+
if not hasattr(optimizer, '_local_saves'):
|
|
330
|
+
optimizer._local_saves = 0
|
|
331
|
+
optimizer._local_saves += 1
|
|
332
|
+
return local_fix
|
|
333
|
+
|
|
334
|
+
# 2. Generate optimized prompt
|
|
335
|
+
prompt = optimize_fix_prompt(code, issues)
|
|
336
|
+
|
|
337
|
+
# Check if local fix was signaled
|
|
338
|
+
if prompt == "__LOCAL_FIX__":
|
|
339
|
+
return LocalProcessor.fix_locally(code, issues)
|
|
340
|
+
|
|
341
|
+
system_prompt = "Fix code. Return code only." # Ultra short
|
|
342
|
+
|
|
343
|
+
# 3. Check cache
|
|
344
|
+
cached = optimizer.get_cached(prompt, system_prompt)
|
|
345
|
+
if cached:
|
|
346
|
+
return cached.content
|
|
347
|
+
|
|
348
|
+
# 4. Calculate optimal max_tokens (dynamic based on code size)
|
|
349
|
+
max_tokens = get_optimal_max_tokens("fix", len(code))
|
|
350
|
+
|
|
351
|
+
# 5. Check budget
|
|
352
|
+
estimated = optimizer.estimate_tokens(prompt) + max_tokens
|
|
353
|
+
if not optimizer.check_budget(estimated):
|
|
354
|
+
print("Token budget exceeded")
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
# 6. Select optimal model for task complexity
|
|
358
|
+
provider_info = self._get_available_provider()
|
|
359
|
+
if provider_info:
|
|
360
|
+
provider_name = provider_info[0]
|
|
361
|
+
optimal_model = ModelTier.select_model(code, issues, provider_name)
|
|
362
|
+
else:
|
|
363
|
+
optimal_model = None
|
|
364
|
+
|
|
365
|
+
response = self.chat(
|
|
366
|
+
prompt=prompt,
|
|
367
|
+
system_prompt=system_prompt,
|
|
368
|
+
model=optimal_model,
|
|
369
|
+
max_tokens=max_tokens,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
if response:
|
|
373
|
+
# Cache the response
|
|
374
|
+
optimizer.cache_response(prompt, response, system_prompt)
|
|
375
|
+
optimizer.record_usage(response.tokens_used)
|
|
376
|
+
|
|
377
|
+
# Extract code from response
|
|
378
|
+
content = response.content
|
|
379
|
+
if "```python" in content:
|
|
380
|
+
content = content.split("```python")[1].split("```")[0]
|
|
381
|
+
elif "```" in content:
|
|
382
|
+
content = content.split("```")[1].split("```")[0]
|
|
383
|
+
return content.strip()
|
|
384
|
+
return None
|
|
385
|
+
|
|
386
|
+
def generate_context_briefing(self, context: dict) -> Optional[str]:
|
|
387
|
+
"""Generate context briefing - MAXIMUM TOKEN EFFICIENCY"""
|
|
388
|
+
from codeshield.utils.token_optimizer import (
|
|
389
|
+
get_token_optimizer, optimize_context_prompt, get_optimal_max_tokens
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
optimizer = get_token_optimizer()
|
|
393
|
+
|
|
394
|
+
# Use optimized prompt
|
|
395
|
+
prompt = optimize_context_prompt(context)
|
|
396
|
+
|
|
397
|
+
# Check cache
|
|
398
|
+
cached = optimizer.get_cached(prompt)
|
|
399
|
+
if cached:
|
|
400
|
+
return cached.content
|
|
401
|
+
|
|
402
|
+
# Dynamic max_tokens (very short for briefings)
|
|
403
|
+
max_tokens = get_optimal_max_tokens("briefing", 0)
|
|
404
|
+
|
|
405
|
+
response = self.chat(prompt=prompt, max_tokens=max_tokens)
|
|
406
|
+
|
|
407
|
+
if response:
|
|
408
|
+
optimizer.cache_response(prompt, response)
|
|
409
|
+
optimizer.record_usage(response.tokens_used)
|
|
410
|
+
return response.content
|
|
411
|
+
return None
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
# Singleton instance
|
|
415
|
+
_llm_client: Optional[LLMClient] = None
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def get_llm_client() -> LLMClient:
|
|
419
|
+
"""Get or create LLM client singleton"""
|
|
420
|
+
global _llm_client
|
|
421
|
+
if _llm_client is None:
|
|
422
|
+
_llm_client = LLMClient()
|
|
423
|
+
return _llm_client
|