npm - @voria/cli - Versions diffs - 0.0.3 → 0.0.5 - Mend

@voria/cli 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/python/voria/core/llm/siliconflow_provider.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""SiliconFlow LLM Provider
+SiliconFlow provides access to multiple open-source models via OpenAI-compatible API.
+"""
+import json
+import logging
+from typing import List, Dict, Any, Optional
+import httpx
+from .base import BaseLLMProvider, Message, LLMResponse
+logger = logging.getLogger(__name__)
+class SiliconFlowProvider(BaseLLMProvider):
+    """SiliconFlow LLM Provider (OpenAI-compatible API)"""
+    API_ENDPOINT = "https://api.siliconflow.cn/v1/chat/completions"
+    DEFAULT_MODEL = "deepseek-ai/DeepSeek-V2.5"
+    def __init__(self, api_key: str, model: str = DEFAULT_MODEL):
+        super().__init__(api_key, model)
+        self.client = httpx.AsyncClient(
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            },
+            timeout=300.0,
+        )
+    async def generate(
+        self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
+    ) -> LLMResponse:
+        try:
+            payload = {
+                "model": self.model,
+                "messages": [
+                    {"role": msg.role, "content": msg.content} for msg in messages
+                ],
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+            }
+            response = await self.client.post(self.API_ENDPOINT, json=payload)
+            response.raise_for_status()
+            data = response.json()
+            content = data["choices"][0]["message"]["content"]
+            tokens_used = data.get("usage", {}).get("total_tokens", 0)
+            return LLMResponse(content=content, tokens_used=tokens_used, model=self.model, provider="SiliconFlow")
+        except Exception as e:
+            logger.error(f"SiliconFlow API error: {e}")
+            raise
+    async def stream_generate(
+        self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
+    ):
+        try:
+            payload = {
+                "model": self.model,
+                "messages": [{"role": m.role, "content": m.content} for m in messages],
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "stream": True,
+            }
+            async with self.client.stream("POST", self.API_ENDPOINT, json=payload) as response:
+                response.raise_for_status()
+                async for line in response.aiter_lines():
+                    if not line:
+                        continue
+                    if line.startswith("data: "):
+                        data_str = line[6:]
+                        if data_str == "[DONE]":
+                            break
+                        try:
+                            data = json.loads(data_str)
+                            delta = data["choices"][0].get("delta", {})
+                            if "content" in delta:
+                                yield delta["content"]
+                        except Exception:
+                            continue
+        except Exception as e:
+            logger.error(f"SiliconFlow stream error: {e}")
+            raise
+    async def plan(self, issue_description: str) -> str:
+        system_message = Message(role="system", content="You are an expert software architect. Create a detailed implementation plan.")
+        user_message = Message(role="user", content=f"Issue:\n{issue_description}")
+        response = await self.generate([system_message, user_message], max_tokens=2000)
+        return response.content
+    async def generate_patch(self, issue_description: str, context_files: Dict[str, str], previous_errors: Optional[str] = None) -> str:
+        system_message = Message(role="system", content="Generate a unified diff format patch.")
+        context = f"Issue:\n{issue_description}\n\n"
+        for filename, content in context_files.items():
+            context += f"\n--- {filename} ---\n{content}\n"
+        if previous_errors:
+            context += f"\nPrevious Errors:\n{previous_errors}"
+        user_message = Message(role="user", content=context)
+        response = await self.generate([system_message, user_message], max_tokens=3000, temperature=0.5)
+        return response.content
+    async def analyze_test_failure(self, test_output: str, code_context: str) -> Dict[str, Any]:
+        system_message = Message(role="system", content="Analyze the test failure and suggest fixes.")
+        user_message = Message(role="user", content=f"Test Output:\n{test_output}\n\nCode:\n{code_context}")
+        response = await self.generate([system_message, user_message], max_tokens=1500)
+        return {"analysis": response.content, "provider": "SiliconFlow", "tokens_used": response.tokens_used}
+    async def close(self):
+        await self.client.aclose()

package/python/voria/core/patcher/__pycache__/__init__.cpython-312.pyc CHANGED Viewed

Binary file

package/python/voria/core/patcher/__pycache__/patcher.cpython-312.pyc CHANGED Viewed

Binary file

package/python/voria/core/setup.py CHANGED Viewed

@@ -80,6 +80,8 @@ class ProviderSetup:
                 models = await ModelDiscovery._get_gemini_fallback()
             elif provider_name == "claude":
                 models = await ModelDiscovery._get_claude_fallback()
+            elif provider_name == "minimax":
+                models = await ModelDiscovery._get_minimax_fallback()
         # Step 4: Choose model
         chosen_model = await self._choose_model(models)
@@ -117,7 +119,7 @@ class ProviderSetup:
         while True:
             try:
-                choice = input("\nEnter number (1-4): ").strip()
+                choice = input(f"\nEnter number (1-{len(providers)}): ").strip()
                 idx = int(choice) - 1
                 if 0 <= idx < len(providers):
                     return providers[idx]
@@ -133,6 +135,7 @@ class ProviderSetup:
             "openai": ["OPENAI_API_KEY"],
             "gemini": ["GOOGLE_API_KEY", "GEMINI_API_KEY"],
             "claude": ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY"],
+            "minimax": ["MINIMAX_API_KEY", "NVIDIA_API_KEY"],
         }
         for env_var in env_vars.get(provider_name, []):

package/python/voria/core/testing/__pycache__/definitions.cpython-312.pyc ADDED Viewed

Binary file

package/python/voria/core/testing/__pycache__/runner.cpython-312.pyc ADDED Viewed

Binary file

package/python/voria/core/testing/definitions.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""
+Definitions for 50+ different types of testing supported by voria.
+Combines security (pentesting) and production/reliability tests.
+"""
+from dataclasses import dataclass
+from enum import Enum
+from typing import List, Dict, Any, Optional
+class TestCategory(Enum):
+    SECURITY = "Security (Pentesting)"
+    PRODUCTION = "Production & Reliability"
+    PERFORMANCE = "Performance & Latency"
+    STRESS = "Stress Testing"
+    QUALITY = "Code Quality & Compliance"
+@dataclass
+class TestInfo:
+    id: str
+    name: str
+    category: TestCategory
+    description: str
+    impact: str  # High, Medium, Low
+    type: str  # "static" (code analysis) or "dynamic" (runtime)
+# The master list of all 52 tests
+TEST_DEFINITIONS: List[TestInfo] = [
+    # --- SECURITY (25 tests) ---
+    TestInfo("sql_injection", "SQL Injection Scan", TestCategory.SECURITY, "Checks for improper sanitization of database queries.", "Critical", "static"),
+    TestInfo("xss", "Cross-Site Scripting (XSS)", TestCategory.SECURITY, "Checks for reflected or stored XSS vulnerabilities in web code.", "High", "static"),
+    TestInfo("csrf", "CSRF Protection Audit", TestCategory.SECURITY, "Verifies presence of CSRF tokens in state-changing requests.", "High", "static"),
+    TestInfo("path_traversal", "Path Traversal Probe", TestCategory.SECURITY, "Detects insecure file path handling that could allow unauthorized access.", "High", "static"),
+    TestInfo("insecure_deserialization", "Insecure Deserialization", TestCategory.SECURITY, "Identifies unsafe decoding of serialized data.", "Critical", "static"),
+    TestInfo("hardcoded_secrets", "Hardcoded Secret Detection", TestCategory.SECURITY, "Scans codebase for API keys, passwords, and private certificates.", "Critical", "static"),
+    TestInfo("insecure_jwt", "Insecure JWT Handling", TestCategory.SECURITY, "Checks for weak JWT algorithms or lack of signature verification.", "High", "static"),
+    TestInfo("broken_access_control", "Broken Access Control", TestCategory.SECURITY, "Analyzes authorization logic for potential bypasses.", "High", "static"),
+    TestInfo("open_redirect", "Open Redirect Audit", TestCategory.SECURITY, "Checks for unsafe user-controlled redirection URLs.", "Medium", "static"),
+    TestInfo("security_headers", "Security Headers Audit", TestCategory.SECURITY, "Verifies presence of CSP, HSTS, and X-Content-Type headers.", "Medium", "static"),
+    TestInfo("clickjacking", "Clickjacking Vulnerability", TestCategory.SECURITY, "Checks for X-Frame-Options or suitable CSP directives.", "Low", "static"),
+    TestInfo("bruteforce_protection", "Bruteforce Protection", TestCategory.SECURITY, "Identifies lack of rate limiting or account lockout logic.", "Medium", "static"),
+    TestInfo("weak_crypto", "Weak Cryptography", TestCategory.SECURITY, "Detects use of MD5, SHA1, or other deprecated algorithms.", "High", "static"),
+    TestInfo("sensitive_data_exposure", "Sensitive Data Exposure", TestCategory.SECURITY, "Checks for PII or sensitive info leaked in logs or error messages.", "High", "static"),
+    TestInfo("xxe", "XML External Entity (XXE)", TestCategory.SECURITY, "Checks for insecure XML parsers allowed to resolve external entities.", "High", "static"),
+    TestInfo("insecure_upload", "Insecure File Upload", TestCategory.SECURITY, "Analyzes file upload handling for potential malicious file execution.", "High", "static"),
+    TestInfo("command_injection", "Command Injection Scan", TestCategory.SECURITY, "Checks for shell commands built using untrusted user input.", "Critical", "static"),
+    TestInfo("directory_listing", "Directory Listing Probe", TestCategory.SECURITY, "Checks web config for inadvertent directory listing enablement.", "Medium", "static"),
+    TestInfo("ssrf", "Server-Side Request Forgery", TestCategory.SECURITY, "Detects code that makes requests to user-controlled internal URLs.", "High", "static"),
+    TestInfo("session_management", "Improper Session Management", TestCategory.SECURITY, "Analyzes session lifecycle, fixation, and timeout logic.", "Medium", "static"),
+    TestInfo("rate_limiting", "Lack of Rate Limiting", TestCategory.SECURITY, "Checks for API endpoints vulnerable to abuse without throttling.", "Medium", "static"),
+    TestInfo("info_leakage", "Information Leakage Scan", TestCategory.SECURITY, "Detects server versions or stack traces exposed to end users.", "Low", "static"),
+    TestInfo("vulnerable_components", "Known Vulnerable Components", TestCategory.SECURITY, "Audit dependencies against known vulnerability databases.", "High", "static"),
+    TestInfo("integrity_checks", "Lack of Integrity Checks", TestCategory.SECURITY, "Checks if downloaded assets or code lack checksum verification.", "Medium", "static"),
+    TestInfo("error_handling_leak", "Error Handling Leakage", TestCategory.SECURITY, "Verifies that catch blocks don't expose system internals.", "Low", "static"),
+    # --- PRODUCTION & RELIABILITY (10 tests) ---
+    TestInfo("latency_baseline", "Latency Baseline Audit", TestCategory.PRODUCTION, "Establishes baseline response times for core functions.", "Medium", "dynamic"),
+    TestInfo("deadlock_detection", "Potential Deadlock Scan", TestCategory.PRODUCTION, "Analyzes lock acquisition order for potential circular dependencies.", "High", "static"),
+    TestInfo("race_condition", "Race Condition Check", TestCategory.PRODUCTION, "Identifies non-atomic operations on shared state.", "High", "static"),
+    TestInfo("unhandled_exceptions", "Unhandled Exception Scan", TestCategory.PRODUCTION, "Checks for paths where exceptions could crash the process.", "High", "static"),
+    TestInfo("memory_leak_static", "Memory Leak static Scan", TestCategory.PRODUCTION, "Identifies patterns like growing collections or unclosed resources.", "Medium", "static"),
+    TestInfo("connection_exhaustion", "Conn Pool Exhaustion Probe", TestCategory.PRODUCTION, "Analyzes resource cleanup to prevent pool starvation.", "High", "static"),
+    TestInfo("slow_query", "Slow Query Detection", TestCategory.PRODUCTION, "Scans for unoptimized DB queries without indices.", "Medium", "static"),
+    TestInfo("cache_consistency", "Cache Inconsistency Scan", TestCategory.PRODUCTION, "Checks for missing cache invalidation after updates.", "Medium", "static"),
+    TestInfo("timeout_handling", "Missing Timeout Logic", TestCategory.PRODUCTION, "Detects blocking calls without explicit timeouts.", "Medium", "static"),
+    TestInfo("circular_dep", "Circular Dependency Audit", TestCategory.PRODUCTION, "Maps module imports for circularities that impair startup.", "Low", "static"),
+    # --- PERFORMANCE & STRESS (10 tests) ---
+    TestInfo("cpu_stress", "CPU Stress Resilience", TestCategory.STRESS, "Simulates heavy computational load to test stability.", "Medium", "dynamic"),
+    TestInfo("mem_stress", "Memory Stress Resilience", TestCategory.STRESS, "Simulates high memory allocation to test GC and OOM handling.", "Medium", "dynamic"),
+    TestInfo("concurrent_users", "High Concurrency Simulation", TestCategory.STRESS, "Simulates massive parallel user requests.", "High", "dynamic"),
+    TestInfo("payload_stress", "Large Payload Resilience", TestCategory.STRESS, "Tests handling of extremely large input data.", "Medium", "dynamic"),
+    TestInfo("network_latency", "Network Latency Simulation", TestCategory.PERFORMANCE, "Simulates slow network conditions (jitter/latency).", "Low", "dynamic"),
+    TestInfo("p99_latency", "P99 Latency Audit", TestCategory.PERFORMANCE, "Measures tail latency under normal load.", "Medium", "dynamic"),
+    TestInfo("throughput_max", "Max Throughput Benchmark", TestCategory.PERFORMANCE, "Determines the saturation point of the service.", "Medium", "dynamic"),
+    TestInfo("bundle_size", "Asset Bundle Size Audit", TestCategory.PERFORMANCE, "Analyzes production assets for excessive size.", "Low", "static"),
+    TestInfo("cold_start", "Cold Start Analysis", TestCategory.PERFORMANCE, "Measures startup time and initialization performance.", "Low", "dynamic"),
+    TestInfo("db_index_audit", "DB Index Optimization", TestCategory.PERFORMANCE, "Suggests missing indices based on query patterns.", "Medium", "static"),
+    # --- QUALITY & COMPLIANCE (7 tests) ---
+    TestInfo("license_compliance", "License Compliance Audit", TestCategory.QUALITY, "Checks dependencies for copyleft or restrictive licenses.", "Medium", "static"),
+    TestInfo("dep_graph", "Dependency Health Audit", TestCategory.QUALITY, "Analyzes depth and variety of project dependencies.", "Low", "static"),
+    TestInfo("doc_completeness", "Documentation Completeness", TestCategory.QUALITY, "Checks for missing docstrings or exported API docs.", "Low", "static"),
+    TestInfo("coverage_gap", "Test Coverage Gap Analysis", TestCategory.QUALITY, "Identifies critical paths missing automated tests.", "Medium", "static"),
+    TestInfo("lint_security", "Security-focused Linting", TestCategory.QUALITY, "Runs specialized security linter rules.", "Medium", "static"),
+    TestInfo("complexity_drift", "Complexity Drift Scan", TestCategory.QUALITY, "Detects increasing cyclomatic complexity over time.", "Low", "static"),
+    TestInfo("redundant_calls", "Redundant API Call Detection", TestCategory.QUALITY, "Identifies duplicate data fetching patterns.", "Low", "static"),
+]

package/python/voria/core/testing/runner.py ADDED Viewed

@@ -0,0 +1,324 @@
+"""
+Voria Test Runner - Executes 50+ security and production tests.
+Uses LLM for deep static analysis and subprocesses for dynamic testing.
+"""
+import asyncio
+import logging
+import sys
+import time
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+from voria.core.llm import LLMProviderFactory, Message
+from .definitions import TEST_DEFINITIONS, TestInfo, TestCategory
+logger = logging.getLogger(__name__)
+class TestRunner:
+    def __init__(self, provider_name: str, api_key: str, model: str, repo_path: str = "."):
+        self.provider = LLMProviderFactory.create(provider_name, api_key, model)
+        self.repo_path = Path(repo_path)
+        self.test_map = {t.id: t for t in TEST_DEFINITIONS}
+    def list_tests(self) -> List[TestInfo]:
+        """Return all available tests."""
+        return TEST_DEFINITIONS
+    async def run_test(self, test_id: str) -> Dict[str, Any]:
+        """Run a specific test by ID."""
+        test_info = self.test_map.get(test_id)
+        if not test_info:
+            raise ValueError(f"Unknown test: {test_id}")
+        logger.info(f"🚀 Starting {test_info.name} [{test_id}]...")
+        if test_info.type == "static":
+            return await self._run_static_analysis(test_info)
+        else:
+            return await self._run_dynamic_test(test_info)
+    async def _run_static_analysis(self, test_info: TestInfo) -> Dict[str, Any]:
+        """Use LLM to perform deep static analysis of the codebase."""
+        # Collect relevant files (limited to first 15 for context reasons)
+        files = []
+        extensions = {".py", ".js", ".ts", ".go", ".rs", ".java", ".cpp", ".c"}
+        count = 0
+        for p in self.repo_path.rglob("*"):
+            if p.suffix in extensions and "node_modules" not in str(p) and ".git" not in str(p):
+                try:
+                    content = p.read_text()
+                    files.append(f"--- File: {p.relative_to(self.repo_path)} ---\n{content[:5000]}")
+                    count += 1
+                    logger.debug(f"Collected file: {p}")
+                except Exception as e:
+                    logger.warning(f"Failed to read {p}: {e}")
+                    continue
+            if count >= 15: break
+        context = "\n\n".join(files)
+        logger.info(f"Starting static analysis for {test_info.id} with {len(files)} files...")
+        system_prompt = f"""You are a senior cybersecurity and reliability engineer.
+Your task is to perform the '{test_info.name}' ({test_info.id}) on the following codebase.
+Category: {test_info.category.value}
+Description: {test_info.description}
+Provide a detailed report in JSON format:
+{{
+  "status": "passed" | "failed" | "warning",
+  "score": 0-100,
+  "findings": [
+    {{
+      "file": "path/to/file",
+      "line": 123,
+      "description": "...",
+      "severity": "high" | "medium" | "low",
+      "fix": "..."
+    }}
+  ],
+  "summary": "...",
+  "recommendations": ["..."]
+}}
+"""
+        try:
+            messages = [
+                Message(role="system", content=system_prompt),
+                Message(role="user", content=f"Codebase Context:\n{context}")
+            ]
+            response = await self.provider.generate(messages, max_tokens=3000)
+            # BUG-08 FIX: Use balanced brace counting instead of greedy regex
+            content = response.content
+            result = self._extract_json(content)
+            if result is None:
+                result = {
+                    "status": "error",
+                    "summary": f"Could not parse LLM response: {content[:200]}...",
+                    "findings": [],
+                    "recommendations": []
+                }
+            return {
+                "id": test_info.id,
+                "name": test_info.name,
+                "category": test_info.category.value,
+                "result": result
+            }
+        except Exception as e:
+            return {
+                "id": test_info.id,
+                "name": test_info.name,
+                "status": "error",
+                "message": str(e)
+            }
+    @staticmethod
+    def _extract_json(text: str) -> Optional[Dict[str, Any]]:
+        """Extract the first balanced JSON object from text using brace counting."""
+        import json as _json
+        start = text.find('{')
+        if start == -1:
+            return None
+        depth = 0
+        in_string = False
+        escape_next = False
+        for i in range(start, len(text)):
+            c = text[i]
+            if escape_next:
+                escape_next = False
+                continue
+            if c == '\\' and in_string:
+                escape_next = True
+                continue
+            if c == '"' and not escape_next:
+                in_string = not in_string
+                continue
+            if in_string:
+                continue
+            if c == '{':
+                depth += 1
+            elif c == '}':
+                depth -= 1
+                if depth == 0:
+                    try:
+                        return _json.loads(text[start:i+1])
+                    except _json.JSONDecodeError:
+                        return None
+        return None
+    async def _run_dynamic_test(self, test_info: TestInfo) -> Dict[str, Any]:
+        """Perform dynamic testing (stress, latency, etc)."""
+        start_time = time.time()
+        if test_info.id == "latency_baseline":
+            # Measure actual function call latency in the codebase
+            import statistics
+            latencies = []
+            for _ in range(100):
+                t0 = time.perf_counter()
+                # Simulate a minimal I/O operation
+                Path(self.repo_path / ".voria_latency_probe").touch()
+                Path(self.repo_path / ".voria_latency_probe").unlink(missing_ok=True)
+                latencies.append((time.perf_counter() - t0) * 1000)
+            avg = statistics.mean(latencies)
+            p95 = sorted(latencies)[int(len(latencies) * 0.95)]
+            p99 = sorted(latencies)[int(len(latencies) * 0.99)]
+            result = {
+                "status": "passed" if avg < 50 else "warning",
+                "score": max(0, 100 - int(avg * 2)),
+                "summary": f"Baseline I/O latency: {avg:.2f}ms avg, P95={p95:.2f}ms, P99={p99:.2f}ms",
+                "metrics": {"avg_ms": round(avg, 2), "p95_ms": round(p95, 2), "p99_ms": round(p99, 2)},
+                "recommendations": ["Consider SSD storage if latency exceeds 10ms."] if avg > 10 else []
+            }
+        elif test_info.id == "cpu_stress":
+            # Real CPU stress: heavy math for a controlled duration
+            import math
+            iterations = 0
+            duration_target = 2.0  # seconds
+            while time.time() - start_time < duration_target:
+                math.sqrt(1234567.89)
+                iterations += 1
+            elapsed = time.time() - start_time
+            ops_per_sec = iterations / elapsed
+            result = {
+                "status": "passed",
+                "score": 85,
+                "summary": f"CPU stress test completed. {iterations:,} ops in {elapsed:.2f}s ({ops_per_sec:,.0f} ops/sec). System remained responsive.",
+                "metrics": {"duration_sec": round(elapsed, 2), "iterations": iterations, "ops_per_sec": round(ops_per_sec)},
+                "recommendations": []
+            }
+        elif test_info.id == "mem_stress":
+            # BUG-12 FIX: Real memory stress test
+            import gc
+            blocks = []
+            block_size = 1024 * 1024  # 1MB
+            max_blocks = 100  # 100MB max
+            try:
+                for i in range(max_blocks):
+                    blocks.append(bytearray(block_size))
+                peak_mb = len(blocks)
+                del blocks
+                gc.collect()
+                result = {
+                    "status": "passed",
+                    "score": 90,
+                    "summary": f"Memory stress test completed. Successfully allocated and freed {peak_mb}MB. GC reclaimed all memory.",
+                    "metrics": {"peak_mb": peak_mb, "duration_sec": round(time.time() - start_time, 2)},
+                    "recommendations": []
+                }
+            except MemoryError:
+                peak_mb = len(blocks)
+                del blocks
+                gc.collect()
+                result = {
+                    "status": "warning",
+                    "score": 50,
+                    "summary": f"Memory stress test hit limit at {peak_mb}MB. System may be memory-constrained.",
+                    "metrics": {"peak_mb": peak_mb},
+                    "recommendations": ["Increase available memory or implement memory-aware resource limits."]
+                }
+        elif test_info.id == "concurrent_users":
+            # BUG-12 FIX: Real concurrency simulation
+            import statistics
+            async def simulated_request(n):
+                await asyncio.sleep(0.01)  # Simulate 10ms work
+                return time.perf_counter()
+            concurrency_levels = [10, 50, 100]
+            metrics = {}
+            for level in concurrency_levels:
+                t0 = time.perf_counter()
+                tasks = [simulated_request(i) for i in range(level)]
+                results = await asyncio.gather(*tasks)
+                elapsed = time.perf_counter() - t0
+                rps = level / elapsed
+                metrics[f"c{level}_rps"] = round(rps, 1)
+                metrics[f"c{level}_total_sec"] = round(elapsed, 3)
+            result = {
+                "status": "passed",
+                "score": 80,
+                "summary": f"Concurrency test completed. At 100 concurrent: {metrics.get('c100_rps', 0)} req/s in {metrics.get('c100_total_sec', 0)}s",
+                "metrics": metrics,
+                "recommendations": ["Monitor actual HTTP endpoints for real-world concurrency limits."]
+            }
+        elif test_info.id == "payload_stress":
+            # BUG-12 FIX: Test large payload handling
+            import tempfile
+            sizes = {"1KB": 1024, "100KB": 102400, "1MB": 1048576, "10MB": 10485760}
+            write_speeds = {}
+            for label, size in sizes.items():
+                data = b"X" * size
+                t0 = time.perf_counter()
+                with tempfile.NamedTemporaryFile(dir=str(self.repo_path), delete=True) as f:
+                    f.write(data)
+                    f.flush()
+                elapsed = time.perf_counter() - t0
+                write_speeds[label] = round(elapsed * 1000, 2)  # ms
+            result = {
+                "status": "passed",
+                "score": 85,
+                "summary": f"Payload stress test completed. Write times: {write_speeds}",
+                "metrics": {"write_ms": write_speeds},
+                "recommendations": ["Consider streaming for payloads > 10MB."] if write_speeds.get("10MB", 0) > 500 else []
+            }
+        elif test_info.id == "cold_start":
+            # BUG-12 FIX: Measure Python import time
+            import subprocess
+            t0 = time.perf_counter()
+            proc = subprocess.run(
+                [sys.executable, "-c", "import voria; print('ok')"],
+                capture_output=True, text=True, timeout=30
+            )
+            import_time = (time.perf_counter() - t0) * 1000
+            result = {
+                "status": "passed" if import_time < 3000 else "warning",
+                "score": max(0, 100 - int(import_time / 50)),
+                "summary": f"Cold start: voria package imports in {import_time:.0f}ms",
+                "metrics": {"import_ms": round(import_time, 1), "success": proc.returncode == 0},
+                "recommendations": ["Lazy-load heavy modules to reduce cold start."] if import_time > 2000 else []
+            }
+        elif test_info.id == "network_latency":
+            # BUG-12 FIX: Test actual DNS resolution latency
+            import socket
+            hosts = ["github.com", "api.github.com", "integrate.api.nvidia.com"]
+            dns_times = {}
+            for host in hosts:
+                try:
+                    t0 = time.perf_counter()
+                    socket.getaddrinfo(host, 443)
+                    dns_times[host] = round((time.perf_counter() - t0) * 1000, 2)
+                except Exception:
+                    dns_times[host] = -1
+            avg_dns = sum(v for v in dns_times.values() if v > 0) / max(1, sum(1 for v in dns_times.values() if v > 0))
+            result = {
+                "status": "passed" if avg_dns < 200 else "warning",
+                "score": max(0, 100 - int(avg_dns / 5)),
+                "summary": f"Network latency test: DNS avg {avg_dns:.1f}ms. Resolved {sum(1 for v in dns_times.values() if v > 0)}/{len(hosts)} hosts.",
+                "metrics": {"dns_ms": dns_times, "avg_dns_ms": round(avg_dns, 1)},
+                "recommendations": ["Check DNS configuration."] if avg_dns > 100 else []
+            }
+        else:
+            # Fallback for remaining dynamic tests — use LLM analysis
+            return await self._run_static_analysis(test_info)
+        return {
+            "id": test_info.id,
+            "name": test_info.name,
+            "category": test_info.category.value,
+            "result": result
+        }