@voria/cli 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -380
- package/bin/voria +625 -486
- package/docs/CHANGELOG.md +19 -0
- package/docs/USER_GUIDE.md +34 -5
- package/package.json +1 -1
- package/python/voria/__init__.py +1 -1
- package/python/voria/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/__pycache__/engine.cpython-312.pyc +0 -0
- package/python/voria/core/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/__pycache__/setup.cpython-312.pyc +0 -0
- package/python/voria/core/agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/agent/__pycache__/loop.cpython-312.pyc +0 -0
- package/python/voria/core/executor/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/executor/__pycache__/executor.cpython-312.pyc +0 -0
- package/python/voria/core/executor/executor.py +5 -0
- package/python/voria/core/github/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/github/__pycache__/client.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__init__.py +16 -0
- package/python/voria/core/llm/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/base.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/claude_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/deepseek_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/gemini_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/kimi_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/minimax_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/modal_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/model_discovery.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/openai_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/siliconflow_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/base.py +12 -0
- package/python/voria/core/llm/claude_provider.py +46 -0
- package/python/voria/core/llm/deepseek_provider.py +109 -0
- package/python/voria/core/llm/gemini_provider.py +44 -0
- package/python/voria/core/llm/kimi_provider.py +109 -0
- package/python/voria/core/llm/minimax_provider.py +187 -0
- package/python/voria/core/llm/modal_provider.py +33 -0
- package/python/voria/core/llm/model_discovery.py +58 -16
- package/python/voria/core/llm/openai_provider.py +33 -0
- package/python/voria/core/llm/siliconflow_provider.py +109 -0
- package/python/voria/core/patcher/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/patcher/__pycache__/patcher.cpython-312.pyc +0 -0
- package/python/voria/core/setup.py +4 -1
- package/python/voria/core/testing/__pycache__/definitions.cpython-312.pyc +0 -0
- package/python/voria/core/testing/__pycache__/runner.cpython-312.pyc +0 -0
- package/python/voria/core/testing/definitions.py +87 -0
- package/python/voria/core/testing/runner.py +324 -0
- package/python/voria/engine.py +736 -232
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Voria Test Runner - Executes 50+ security and production tests.
|
|
3
|
+
Uses LLM for deep static analysis and subprocesses for dynamic testing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
from typing import List, Dict, Any, Optional
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from voria.core.llm import LLMProviderFactory, Message
|
|
14
|
+
from .definitions import TEST_DEFINITIONS, TestInfo, TestCategory
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
class TestRunner:
|
|
19
|
+
def __init__(self, provider_name: str, api_key: str, model: str, repo_path: str = "."):
|
|
20
|
+
self.provider = LLMProviderFactory.create(provider_name, api_key, model)
|
|
21
|
+
self.repo_path = Path(repo_path)
|
|
22
|
+
self.test_map = {t.id: t for t in TEST_DEFINITIONS}
|
|
23
|
+
|
|
24
|
+
def list_tests(self) -> List[TestInfo]:
|
|
25
|
+
"""Return all available tests."""
|
|
26
|
+
return TEST_DEFINITIONS
|
|
27
|
+
|
|
28
|
+
async def run_test(self, test_id: str) -> Dict[str, Any]:
|
|
29
|
+
"""Run a specific test by ID."""
|
|
30
|
+
test_info = self.test_map.get(test_id)
|
|
31
|
+
if not test_info:
|
|
32
|
+
raise ValueError(f"Unknown test: {test_id}")
|
|
33
|
+
|
|
34
|
+
logger.info(f"🚀 Starting {test_info.name} [{test_id}]...")
|
|
35
|
+
|
|
36
|
+
if test_info.type == "static":
|
|
37
|
+
return await self._run_static_analysis(test_info)
|
|
38
|
+
else:
|
|
39
|
+
return await self._run_dynamic_test(test_info)
|
|
40
|
+
|
|
41
|
+
async def _run_static_analysis(self, test_info: TestInfo) -> Dict[str, Any]:
|
|
42
|
+
"""Use LLM to perform deep static analysis of the codebase."""
|
|
43
|
+
# Collect relevant files (limited to first 15 for context reasons)
|
|
44
|
+
files = []
|
|
45
|
+
extensions = {".py", ".js", ".ts", ".go", ".rs", ".java", ".cpp", ".c"}
|
|
46
|
+
|
|
47
|
+
count = 0
|
|
48
|
+
for p in self.repo_path.rglob("*"):
|
|
49
|
+
if p.suffix in extensions and "node_modules" not in str(p) and ".git" not in str(p):
|
|
50
|
+
try:
|
|
51
|
+
content = p.read_text()
|
|
52
|
+
files.append(f"--- File: {p.relative_to(self.repo_path)} ---\n{content[:5000]}")
|
|
53
|
+
count += 1
|
|
54
|
+
logger.debug(f"Collected file: {p}")
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.warning(f"Failed to read {p}: {e}")
|
|
57
|
+
continue
|
|
58
|
+
if count >= 15: break
|
|
59
|
+
|
|
60
|
+
context = "\n\n".join(files)
|
|
61
|
+
logger.info(f"Starting static analysis for {test_info.id} with {len(files)} files...")
|
|
62
|
+
|
|
63
|
+
system_prompt = f"""You are a senior cybersecurity and reliability engineer.
|
|
64
|
+
Your task is to perform the '{test_info.name}' ({test_info.id}) on the following codebase.
|
|
65
|
+
Category: {test_info.category.value}
|
|
66
|
+
Description: {test_info.description}
|
|
67
|
+
|
|
68
|
+
Provide a detailed report in JSON format:
|
|
69
|
+
{{
|
|
70
|
+
"status": "passed" | "failed" | "warning",
|
|
71
|
+
"score": 0-100,
|
|
72
|
+
"findings": [
|
|
73
|
+
{{
|
|
74
|
+
"file": "path/to/file",
|
|
75
|
+
"line": 123,
|
|
76
|
+
"description": "...",
|
|
77
|
+
"severity": "high" | "medium" | "low",
|
|
78
|
+
"fix": "..."
|
|
79
|
+
}}
|
|
80
|
+
],
|
|
81
|
+
"summary": "...",
|
|
82
|
+
"recommendations": ["..."]
|
|
83
|
+
}}
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
messages = [
|
|
88
|
+
Message(role="system", content=system_prompt),
|
|
89
|
+
Message(role="user", content=f"Codebase Context:\n{context}")
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
response = await self.provider.generate(messages, max_tokens=3000)
|
|
93
|
+
# BUG-08 FIX: Use balanced brace counting instead of greedy regex
|
|
94
|
+
content = response.content
|
|
95
|
+
result = self._extract_json(content)
|
|
96
|
+
|
|
97
|
+
if result is None:
|
|
98
|
+
result = {
|
|
99
|
+
"status": "error",
|
|
100
|
+
"summary": f"Could not parse LLM response: {content[:200]}...",
|
|
101
|
+
"findings": [],
|
|
102
|
+
"recommendations": []
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
"id": test_info.id,
|
|
107
|
+
"name": test_info.name,
|
|
108
|
+
"category": test_info.category.value,
|
|
109
|
+
"result": result
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
return {
|
|
114
|
+
"id": test_info.id,
|
|
115
|
+
"name": test_info.name,
|
|
116
|
+
"status": "error",
|
|
117
|
+
"message": str(e)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def _extract_json(text: str) -> Optional[Dict[str, Any]]:
|
|
122
|
+
"""Extract the first balanced JSON object from text using brace counting."""
|
|
123
|
+
import json as _json
|
|
124
|
+
start = text.find('{')
|
|
125
|
+
if start == -1:
|
|
126
|
+
return None
|
|
127
|
+
depth = 0
|
|
128
|
+
in_string = False
|
|
129
|
+
escape_next = False
|
|
130
|
+
for i in range(start, len(text)):
|
|
131
|
+
c = text[i]
|
|
132
|
+
if escape_next:
|
|
133
|
+
escape_next = False
|
|
134
|
+
continue
|
|
135
|
+
if c == '\\' and in_string:
|
|
136
|
+
escape_next = True
|
|
137
|
+
continue
|
|
138
|
+
if c == '"' and not escape_next:
|
|
139
|
+
in_string = not in_string
|
|
140
|
+
continue
|
|
141
|
+
if in_string:
|
|
142
|
+
continue
|
|
143
|
+
if c == '{':
|
|
144
|
+
depth += 1
|
|
145
|
+
elif c == '}':
|
|
146
|
+
depth -= 1
|
|
147
|
+
if depth == 0:
|
|
148
|
+
try:
|
|
149
|
+
return _json.loads(text[start:i+1])
|
|
150
|
+
except _json.JSONDecodeError:
|
|
151
|
+
return None
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
async def _run_dynamic_test(self, test_info: TestInfo) -> Dict[str, Any]:
|
|
155
|
+
"""Perform dynamic testing (stress, latency, etc)."""
|
|
156
|
+
start_time = time.time()
|
|
157
|
+
|
|
158
|
+
if test_info.id == "latency_baseline":
|
|
159
|
+
# Measure actual function call latency in the codebase
|
|
160
|
+
import statistics
|
|
161
|
+
latencies = []
|
|
162
|
+
for _ in range(100):
|
|
163
|
+
t0 = time.perf_counter()
|
|
164
|
+
# Simulate a minimal I/O operation
|
|
165
|
+
Path(self.repo_path / ".voria_latency_probe").touch()
|
|
166
|
+
Path(self.repo_path / ".voria_latency_probe").unlink(missing_ok=True)
|
|
167
|
+
latencies.append((time.perf_counter() - t0) * 1000)
|
|
168
|
+
|
|
169
|
+
avg = statistics.mean(latencies)
|
|
170
|
+
p95 = sorted(latencies)[int(len(latencies) * 0.95)]
|
|
171
|
+
p99 = sorted(latencies)[int(len(latencies) * 0.99)]
|
|
172
|
+
result = {
|
|
173
|
+
"status": "passed" if avg < 50 else "warning",
|
|
174
|
+
"score": max(0, 100 - int(avg * 2)),
|
|
175
|
+
"summary": f"Baseline I/O latency: {avg:.2f}ms avg, P95={p95:.2f}ms, P99={p99:.2f}ms",
|
|
176
|
+
"metrics": {"avg_ms": round(avg, 2), "p95_ms": round(p95, 2), "p99_ms": round(p99, 2)},
|
|
177
|
+
"recommendations": ["Consider SSD storage if latency exceeds 10ms."] if avg > 10 else []
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
elif test_info.id == "cpu_stress":
|
|
181
|
+
# Real CPU stress: heavy math for a controlled duration
|
|
182
|
+
import math
|
|
183
|
+
iterations = 0
|
|
184
|
+
duration_target = 2.0 # seconds
|
|
185
|
+
while time.time() - start_time < duration_target:
|
|
186
|
+
math.sqrt(1234567.89)
|
|
187
|
+
iterations += 1
|
|
188
|
+
elapsed = time.time() - start_time
|
|
189
|
+
ops_per_sec = iterations / elapsed
|
|
190
|
+
result = {
|
|
191
|
+
"status": "passed",
|
|
192
|
+
"score": 85,
|
|
193
|
+
"summary": f"CPU stress test completed. {iterations:,} ops in {elapsed:.2f}s ({ops_per_sec:,.0f} ops/sec). System remained responsive.",
|
|
194
|
+
"metrics": {"duration_sec": round(elapsed, 2), "iterations": iterations, "ops_per_sec": round(ops_per_sec)},
|
|
195
|
+
"recommendations": []
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
elif test_info.id == "mem_stress":
|
|
199
|
+
# BUG-12 FIX: Real memory stress test
|
|
200
|
+
import gc
|
|
201
|
+
blocks = []
|
|
202
|
+
block_size = 1024 * 1024 # 1MB
|
|
203
|
+
max_blocks = 100 # 100MB max
|
|
204
|
+
try:
|
|
205
|
+
for i in range(max_blocks):
|
|
206
|
+
blocks.append(bytearray(block_size))
|
|
207
|
+
peak_mb = len(blocks)
|
|
208
|
+
del blocks
|
|
209
|
+
gc.collect()
|
|
210
|
+
result = {
|
|
211
|
+
"status": "passed",
|
|
212
|
+
"score": 90,
|
|
213
|
+
"summary": f"Memory stress test completed. Successfully allocated and freed {peak_mb}MB. GC reclaimed all memory.",
|
|
214
|
+
"metrics": {"peak_mb": peak_mb, "duration_sec": round(time.time() - start_time, 2)},
|
|
215
|
+
"recommendations": []
|
|
216
|
+
}
|
|
217
|
+
except MemoryError:
|
|
218
|
+
peak_mb = len(blocks)
|
|
219
|
+
del blocks
|
|
220
|
+
gc.collect()
|
|
221
|
+
result = {
|
|
222
|
+
"status": "warning",
|
|
223
|
+
"score": 50,
|
|
224
|
+
"summary": f"Memory stress test hit limit at {peak_mb}MB. System may be memory-constrained.",
|
|
225
|
+
"metrics": {"peak_mb": peak_mb},
|
|
226
|
+
"recommendations": ["Increase available memory or implement memory-aware resource limits."]
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
elif test_info.id == "concurrent_users":
|
|
230
|
+
# BUG-12 FIX: Real concurrency simulation
|
|
231
|
+
import statistics
|
|
232
|
+
async def simulated_request(n):
|
|
233
|
+
await asyncio.sleep(0.01) # Simulate 10ms work
|
|
234
|
+
return time.perf_counter()
|
|
235
|
+
|
|
236
|
+
concurrency_levels = [10, 50, 100]
|
|
237
|
+
metrics = {}
|
|
238
|
+
for level in concurrency_levels:
|
|
239
|
+
t0 = time.perf_counter()
|
|
240
|
+
tasks = [simulated_request(i) for i in range(level)]
|
|
241
|
+
results = await asyncio.gather(*tasks)
|
|
242
|
+
elapsed = time.perf_counter() - t0
|
|
243
|
+
rps = level / elapsed
|
|
244
|
+
metrics[f"c{level}_rps"] = round(rps, 1)
|
|
245
|
+
metrics[f"c{level}_total_sec"] = round(elapsed, 3)
|
|
246
|
+
|
|
247
|
+
result = {
|
|
248
|
+
"status": "passed",
|
|
249
|
+
"score": 80,
|
|
250
|
+
"summary": f"Concurrency test completed. At 100 concurrent: {metrics.get('c100_rps', 0)} req/s in {metrics.get('c100_total_sec', 0)}s",
|
|
251
|
+
"metrics": metrics,
|
|
252
|
+
"recommendations": ["Monitor actual HTTP endpoints for real-world concurrency limits."]
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
elif test_info.id == "payload_stress":
|
|
256
|
+
# BUG-12 FIX: Test large payload handling
|
|
257
|
+
import tempfile
|
|
258
|
+
sizes = {"1KB": 1024, "100KB": 102400, "1MB": 1048576, "10MB": 10485760}
|
|
259
|
+
write_speeds = {}
|
|
260
|
+
for label, size in sizes.items():
|
|
261
|
+
data = b"X" * size
|
|
262
|
+
t0 = time.perf_counter()
|
|
263
|
+
with tempfile.NamedTemporaryFile(dir=str(self.repo_path), delete=True) as f:
|
|
264
|
+
f.write(data)
|
|
265
|
+
f.flush()
|
|
266
|
+
elapsed = time.perf_counter() - t0
|
|
267
|
+
write_speeds[label] = round(elapsed * 1000, 2) # ms
|
|
268
|
+
|
|
269
|
+
result = {
|
|
270
|
+
"status": "passed",
|
|
271
|
+
"score": 85,
|
|
272
|
+
"summary": f"Payload stress test completed. Write times: {write_speeds}",
|
|
273
|
+
"metrics": {"write_ms": write_speeds},
|
|
274
|
+
"recommendations": ["Consider streaming for payloads > 10MB."] if write_speeds.get("10MB", 0) > 500 else []
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
elif test_info.id == "cold_start":
|
|
278
|
+
# BUG-12 FIX: Measure Python import time
|
|
279
|
+
import subprocess
|
|
280
|
+
t0 = time.perf_counter()
|
|
281
|
+
proc = subprocess.run(
|
|
282
|
+
[sys.executable, "-c", "import voria; print('ok')"],
|
|
283
|
+
capture_output=True, text=True, timeout=30
|
|
284
|
+
)
|
|
285
|
+
import_time = (time.perf_counter() - t0) * 1000
|
|
286
|
+
result = {
|
|
287
|
+
"status": "passed" if import_time < 3000 else "warning",
|
|
288
|
+
"score": max(0, 100 - int(import_time / 50)),
|
|
289
|
+
"summary": f"Cold start: voria package imports in {import_time:.0f}ms",
|
|
290
|
+
"metrics": {"import_ms": round(import_time, 1), "success": proc.returncode == 0},
|
|
291
|
+
"recommendations": ["Lazy-load heavy modules to reduce cold start."] if import_time > 2000 else []
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
elif test_info.id == "network_latency":
|
|
295
|
+
# BUG-12 FIX: Test actual DNS resolution latency
|
|
296
|
+
import socket
|
|
297
|
+
hosts = ["github.com", "api.github.com", "integrate.api.nvidia.com"]
|
|
298
|
+
dns_times = {}
|
|
299
|
+
for host in hosts:
|
|
300
|
+
try:
|
|
301
|
+
t0 = time.perf_counter()
|
|
302
|
+
socket.getaddrinfo(host, 443)
|
|
303
|
+
dns_times[host] = round((time.perf_counter() - t0) * 1000, 2)
|
|
304
|
+
except Exception:
|
|
305
|
+
dns_times[host] = -1
|
|
306
|
+
avg_dns = sum(v for v in dns_times.values() if v > 0) / max(1, sum(1 for v in dns_times.values() if v > 0))
|
|
307
|
+
result = {
|
|
308
|
+
"status": "passed" if avg_dns < 200 else "warning",
|
|
309
|
+
"score": max(0, 100 - int(avg_dns / 5)),
|
|
310
|
+
"summary": f"Network latency test: DNS avg {avg_dns:.1f}ms. Resolved {sum(1 for v in dns_times.values() if v > 0)}/{len(hosts)} hosts.",
|
|
311
|
+
"metrics": {"dns_ms": dns_times, "avg_dns_ms": round(avg_dns, 1)},
|
|
312
|
+
"recommendations": ["Check DNS configuration."] if avg_dns > 100 else []
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
else:
|
|
316
|
+
# Fallback for remaining dynamic tests — use LLM analysis
|
|
317
|
+
return await self._run_static_analysis(test_info)
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
"id": test_info.id,
|
|
321
|
+
"name": test_info.name,
|
|
322
|
+
"category": test_info.category.value,
|
|
323
|
+
"result": result
|
|
324
|
+
}
|