PyPI - coreinsight-cli - Versions diffs - 0.3.0__tar.gz → 0.3.2__tar.gz - Mend

coreinsight-cli 0.3.0tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{coreinsight_cli-0.3.0/coreinsight_cli.egg-info → coreinsight_cli-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreinsight-cli
-Version: 0.3.0
+Version: 0.3.2
 Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
 Author: Varun Jani
 License: GPL-3.0-or-later

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/main.py RENAMED Viewed

@@ -72,12 +72,10 @@ def _check_speedup_success(success: bool, logs: str) -> bool:
         pass
     return False
 def _run_single_agent(
     func_name, original_code, language, context,
     hardware_target, sandbox, agent, tier_limits,
-    stream_callback: Optional[Callable[[str], None]] = None,
-):
+    stream_callback: Optional[Callable[[str], None]] = None):
     """
     Original single-agent pipeline.
     Returns (result, optimized_code, success, logs, plot_data, is_valid).
@@ -133,12 +131,10 @@ def _run_single_agent(
     return result, optimized_code, success, logs, plot_data, is_valid
 def _run_multi_agent(
     func_name, original_code, language, context,
     hardware_target, sandbox, multi_agents, tier_limits,
-    stream_callback: Optional[Callable[[str], None]] = None,
-):
+    stream_callback: Optional[Callable[[str], None]] = None):
     """
     Multi-agent pipeline.
     BottleneckAgent  → analysis
@@ -209,21 +205,78 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
         _log(func_name, "Fetching RAG context...")
         context = indexer.get_context_for_code(original_code) if indexer else ""
-        # 0b. Memory lookup — skip LLM entirely if we've seen this pattern before
+        # 0b. Memory lookup — skip LLM if we've seen this pattern before,
+        # but validate the stored result before trusting it:
+        #   Gate A: no optimized code stored  → previous run was incomplete, re-run LLM
+        #   Gate B: correctness < 50% last run → keep analysis, re-run correctness only
+        #   Gate C: result is good             → return as-is
         if memory:
             memory_hit = memory.lookup(original_code, language)
             if memory_hit:
                 label = "exact match" if memory_hit.is_exact else f"similarity {memory_hit.similarity:.1%}"
-                _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
-                recalled_result = {
-                    "severity":       memory_hit.severity,
-                    "issue":          memory_hit.issue,
-                    "reasoning":      memory_hit.reasoning,
-                    "optimized_code": memory_hit.optimized_code,
-                    "suggestion":     "",
-                    "bottlenecks":    [],
-                }
-                return func_name, recalled_result, None, None, None, None, memory_hit, False
+                # Gate A: stored result has no optimized code — not useful, fall through to LLM
+                if not memory_hit.optimized_code:
+                    _log(func_name, f"Memory hit ({label}) — no optimized code stored, re-running LLM", style="yellow")
+                    memory_hit = None   # fall through; LLM path runs below as normal
+                # Gate B: correctness was poor last time — re-run the correctness check only
+                elif memory_hit.total_cases > 0 and memory_hit.correctness_cases / memory_hit.total_cases < 0.5:
+                    _log(
+                        func_name,
+                        f"Memory hit ({label}) — correctness was "
+                        f"{memory_hit.correctness_cases}/{memory_hit.total_cases} last run, re-checking",
+                        style="yellow",
+                    )
+                    recalled_result = {
+                        "severity":       memory_hit.severity,
+                        "issue":          memory_hit.issue,
+                        "reasoning":      memory_hit.reasoning,
+                        "optimized_code": memory_hit.optimized_code,
+                        "suggestion":     "",
+                        "bottlenecks":    [],
+                    }
+                    new_verification = None
+                    if not getattr(sandbox, "disabled", False):
+                        stored_cases = memory.lookup_test_cases(original_code)
+                        if stored_cases:
+                            _log(func_name, "Re-running correctness sandbox with stored test cases...", style="dim")
+                            correctness = sandbox.verify_correctness_only(
+                                original_code=original_code,
+                                optimized_code=memory_hit.optimized_code,
+                                original_func_name=func_name,
+                                optimized_func_name=func_name,
+                                test_cases=stored_cases,
+                                language=language,
+                                context=context,
+                            )
+                            _log(func_name, f"Re-verification: {correctness.passed_cases}/{correctness.total_cases} passed", style="dim")
+                            try:
+                                from coreinsight.sandbox import VerificationResult, SpeedupVerification
+                                new_verification = VerificationResult(
+                                    speedup=SpeedupVerification(
+                                        verified=True,
+                                        computed_speedups=[memory_hit.avg_speedup] if memory_hit.avg_speedup else [],
+                                        details=f"Speedup recalled from memory: {memory_hit.avg_speedup:.2f}x",
+                                    ),
+                                    correctness=correctness,
+                                )
+                            except Exception:
+                                pass   # verification display is non-critical
+                    return func_name, recalled_result, None, None, new_verification, None, memory_hit, False
+                # Gate C: stored result is complete and correctness is acceptable
+                else:
+                    _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
+                    recalled_result = {
+                        "severity":       memory_hit.severity,
+                        "issue":          memory_hit.issue,
+                        "reasoning":      memory_hit.reasoning,
+                        "optimized_code": memory_hit.optimized_code,
+                        "suggestion":     "",
+                        "bottlenecks":    [],
+                    }
+                    return func_name, recalled_result, None, None, None, None, memory_hit, False
         # ── Route: single-agent vs multi-agent ──────────────────────────
         if agent_mode == "multi" and multi_agents:
@@ -244,8 +297,37 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
         if result is None:
             return func_name, None, None, f"❌ Analysis error: {logs}", None, None, None, False
+        # Retry gate: Low severity or missing optimized code often means the model
+        # defaulted to "looks fine" rather than truly auditing.
+        # Retry up to 2 times before accepting the conclusion.
+        _MAX_ANALYSIS_RETRIES = 2
+        _retry = 0
+        while (result.get("severity") == "Low" or not optimized_code) and _retry < _MAX_ANALYSIS_RETRIES:
+            _retry += 1
+            _log(func_name, f"Low/missing result — retrying analysis ({_retry}/{_MAX_ANALYSIS_RETRIES})...", style="yellow")
+            if agent_mode == "multi" and multi_agents:
+                result, optimized_code, success, logs, plot_data, is_valid_optimization = \
+                    _run_multi_agent(
+                        func_name, original_code, language, context,
+                        hardware_target, sandbox, multi_agents, tier_limits,
+                        stream_callback=stream_callback,
+                    )
+            else:
+                result, optimized_code, success, logs, plot_data, is_valid_optimization = \
+                    _run_single_agent(
+                        func_name, original_code, language, context,
+                        hardware_target, sandbox, agent, tier_limits,
+                        stream_callback=stream_callback,
+                    )
+            if result is None:
+                break
+        if result is None:
+            return func_name, None, None, f"❌ Analysis error after {_retry} retries: {logs}", None, None, None, False
         if result.get("severity") == "Low" or not optimized_code:
-            return func_name, None, None, "✅ No critical bottlenecks detected. Code is optimal.", None, None, None, False
+            confirmed = f" (confirmed after {_retry} retries)" if _retry > 0 else ""
+            return func_name, None, None, f"✅ No significant bottlenecks found{confirmed}.", None, None, None, False
         # 3. Verification + AI-free hardware profiling
         verification    = None
@@ -260,6 +342,8 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
             else:
                 _log(func_name, "Generating correctness test cases...")
                 test_cases = agent.generate_test_cases(func_name, original_code, language, context, num_cases=tier_limits["num_test_cases"])
+            if memory:
+                memory.store_test_cases(original_code, test_cases)
             _log(func_name, "Running correctness verification in Docker sandbox...")
             verification = sandbox.verify(
                 csv_output=logs,
@@ -290,11 +374,29 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
     except Exception as e:
         err_str = str(e)
-        if "context" in err_str.lower() and "limit" in err_str.lower():
-            _log(func_name, f"Context limit hit: {e}", style="bold yellow")
+        err_low  = err_str.lower()
+        if "context" in err_low and "limit" in err_low:
+            _log(func_name, "Context limit hit", style="bold yellow")
             return func_name, None, None, (
-                f"⚠️  Context limit: {err_str}\n"
-                f"Try a model with a larger context window, or split the function."
+                "⚠️  Context limit — try a model with a larger context window, "
+                "or split the function into smaller pieces."
+            ), None, None, None, False
+        if any(k in err_low for k in ("cannot connect", "connection refused", "docker")):
+            _log(func_name, "Docker unavailable", style="bold yellow")
+            return func_name, None, None, (
+                "⚠️  Docker is not running — start Docker Desktop and try again.\n"
+                "    Skip the sandbox with: coreinsight analyze --no-docker <file>"
+            ), None, None, None, False
+        if "timeout" in err_low or "timed out" in err_low:
+            _log(func_name, "Sandbox timed out", style="bold yellow")
+            return func_name, None, None, (
+                "⚠️  Sandbox timed out — the benchmark likely contains an infinite loop.\n"
+                "    The LLM analysis result above is still valid."
+            ), None, None, None, False
+        if "out of memory" in err_low or "oom" in err_low:
+            _log(func_name, "Sandbox OOM", style="bold yellow")
+            return func_name, None, None, (
+                "⚠️  Sandbox ran out of memory. Try --no-docker or reduce the file size."
             ), None, None, None, False
         _log(func_name, f"Failed: {e}", style="bold red")
         return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
@@ -689,7 +791,12 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
             console.print(f"[red]Initialization Error:[/red] {e}")
             sys.exit(1)
-        mode_label = "[bold cyan]Multi-Agent[/bold cyan]" if agent_mode == "multi" else "[dim]Single-Agent[/dim]"
+        if agent_mode == "multi":
+            mode_label = "[bold cyan]Multi-Agent[/bold cyan]"
+        elif agent_mode == "auto":
+            mode_label = "[cyan]Auto[/cyan]"
+        else:
+            mode_label = "Single-Agent"
         console.print(f"[dim]⚙️  Agent mode: {mode_label}[/dim]")
         mem_count = memory.stats().get("count", 0)
@@ -760,7 +867,15 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
                 except Exception as exc:
                     with print_lock:
-                        console.print(f"[bold red]❌ Critical failure in thread processing {func['name']}:[/bold red] {exc}")
+                        exc_low = str(exc).lower()
+                        if any(k in exc_low for k in ("docker", "cannot connect", "connection refused")):
+                            console.print(f"[bold yellow]⚠️  {func['name']}: Docker unavailable — start Docker Desktop and retry.[/bold yellow]")
+                        elif "timeout" in exc_low or "timed out" in exc_low:
+                            console.print(f"[bold yellow]⚠️  {func['name']}: Sandbox timed out.[/bold yellow]")
+                        elif "out of memory" in exc_low or "oom" in exc_low:
+                            console.print(f"[bold yellow]⚠️  {func['name']}: Sandbox ran out of memory.[/bold yellow]")
+                        else:
+                            console.print(f"[bold red]❌ {func['name']}: Unexpected error — {exc}[/bold red]")
         console.print(Panel.fit(f"✅ [bold green]Analysis Complete![/bold green] Final report saved to:\n{report_path.absolute()}"))
@@ -853,6 +968,136 @@ def run_demo(lang: str = "python", no_docker: bool = False, tui_console=None):
     run_analysis(str(demo_dir / entry_file), no_docker=no_docker, tui_console=tui_console)
+def _run_test_cmd(func_name: str, no_docker: bool = False):
+    """
+    Re-run correctness verification for a stored optimized function.
+    LLM-free on all subsequent calls. On first call after an old analyze
+    run (before test case persistence was added), generates test cases
+    once via LLM and stores them so future calls need no LLM.
+    """
+    from coreinsight.memory import OptimizationMemory
+    from coreinsight.sandbox import CodeSandbox
+    mem    = OptimizationMemory()
+    record = mem.lookup_by_name(func_name)
+    if not record:
+        console.print(
+            f"[yellow]No memory record found for '[bold]{func_name}[/bold]'.[/yellow]\n"
+            f"[dim]Run [cyan]coreinsight analyze[/cyan] on a file containing this function first.[/dim]"
+        )
+        return
+    language       = record["language"]
+    original_code  = record["original_code"]
+    optimized_code = record["optimized_code"]
+    test_cases     = record["test_cases"]
+    console.print(Panel.fit(
+        f"Re-verifying [bold cyan]{func_name}[/bold cyan] ({language})",
+        border_style="cyan",
+    ))
+    if not optimized_code:
+        console.print("[red]Optimized code not found in memory store.[/red]")
+        return
+    # ── One-time LLM fallback for functions analyzed before test case persistence ──
+    if not test_cases:
+        console.print(
+            "[yellow]⚠  No test cases stored for this function.[/yellow]\n"
+            "[dim]Generating once via LLM — all future calls will be LLM-free...[/dim]"
+        )
+        try:
+            from coreinsight.analyzer import AnalyzerAgent
+            from coreinsight.config import get_model_tier, get_tier_limits
+            config     = load_config()
+            provider   = config.get("provider",   "ollama")
+            model_name = config.get("model_name", "llama3.2")
+            api_keys   = config.get("api_keys",   {})
+            model_tier = get_model_tier(provider, model_name)
+            tier_limits = get_tier_limits(config)
+            agent      = AnalyzerAgent(
+                provider=provider,
+                model_name=model_name,
+                api_keys=api_keys,
+                model_tier=model_tier,
+            )
+            test_cases = agent.generate_test_cases(
+                func_name, original_code, language,
+                context="",
+                num_cases=tier_limits["num_test_cases"],
+            )
+        except Exception as exc:
+            console.print(f"[red]LLM error generating test cases: {exc}[/red]")
+            return
+        if not test_cases:
+            console.print(
+                "[red]LLM returned no test cases. "
+                "Check your provider config with [cyan]coreinsight configure[/cyan].[/red]"
+            )
+            return
+        mem.store_test_cases(original_code, test_cases)
+        console.print(
+            f"[dim]✓ Generated and stored {len(test_cases)} test case(s). "
+            f"Future calls to [cyan]coreinsight test {func_name}[/cyan] need no LLM.[/dim]"
+        )
+    # ── Correctness sandbox — no LLM from this point ──────────────────────
+    sandbox = CodeSandbox(disabled=no_docker)
+    if language in ("cpp", "c++", "cuda"):
+        # C++/CUDA correctness harness is embedded by HarnessAgent at analysis
+        # time and cannot be reconstructed post-hoc. Show stored result instead.
+        meta         = record["meta"]
+        passed_cases = int(meta.get("correctness_cases", 0))
+        total_cases  = int(meta.get("total_cases", 0))
+        if total_cases > 0:
+            all_passed = passed_cases == total_cases
+            badge = "[bold green]✓ PASS[/bold green]" if all_passed else "[bold yellow]⚠ PARTIAL[/bold yellow]"
+            console.print(
+                f"{badge} — Stored result: "
+                f"{passed_cases}/{total_cases} test cases passed at analysis time."
+            )
+        else:
+            console.print(
+                "[dim]No stored correctness result for this function.[/dim]"
+            )
+        console.print(
+            "[dim]C++ re-verification requires re-running analysis. "
+            "Full results in [cyan]coreinsight memory[/cyan].[/dim]"
+        )
+        return
+    console.print(f"[dim]Running {len(test_cases)} test case(s) in Docker sandbox...[/dim]")
+    result = sandbox.verify_correctness_only(
+        original_code=original_code,
+        optimized_code=optimized_code,
+        original_func_name=func_name,
+        optimized_func_name=func_name,
+        test_cases=test_cases,
+        language=language,
+    )
+    if result.verified:
+        console.print(
+            f"[bold green]✓ PASS[/bold green] — "
+            f"{result.passed_cases}/{result.total_cases} test cases passed."
+        )
+    else:
+        console.print(
+            f"[bold red]✗ FAIL[/bold red] — "
+            f"{result.passed_cases}/{result.total_cases} test cases passed."
+        )
+        for failure in result.failures[:10]:
+            console.print(f"  [red]✗[/red] {failure}")
+    if result.details:
+        console.print(f"[dim]{result.details}[/dim]")
 def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv"):
     from coreinsight.memory import OptimizationMemory, MEMORY_DIR
     import shutil
@@ -915,6 +1160,7 @@ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv
     table.add_column("Function",   justify="left",   style="bold white")
     table.add_column("Language",   justify="center", style="cyan",       width=10)
     table.add_column("Speedup",    justify="right",  style="bold green", width=9)
+    table.add_column("Tests",      justify="right",  style="green",      width=10)
     table.add_column("Severity",   justify="center",                     width=10)
     table.add_column("Issue",      justify="left",   style="dim white")
     table.add_column("HW Evidence",justify="left",   style="dim",        width=22)
@@ -935,19 +1181,23 @@ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv
     )
     for i, (meta, rid) in enumerate(paired, start=1):
-        sev   = meta.get("severity", "High")
-        sev_c = severity_colors.get(sev, "white")
-        ts    = meta.get("timestamp", "")[:19].replace("T", " ")
-        hw    = meta.get("profiler_summary", "") or "—"
-        issue = (meta.get("issue", "") or "—")[:60]
+        sev      = meta.get("severity", "High")
+        sev_c    = severity_colors.get(sev, "white")
+        ts       = meta.get("timestamp", "")[:19].replace("T", " ")
+        hw       = meta.get("profiler_summary", "") or "—"
+        issue    = (meta.get("issue", "") or "—")[:60]
         if len(meta.get("issue", "")) > 60:
             issue += "…"
+        passed_c = int(meta.get("correctness_cases", 0))
+        total_c  = int(meta.get("total_cases", 0))
+        tests_str = f"{passed_c}/{total_c}" if total_c > 0 else "—"
         table.add_row(
             str(i),
             meta.get("func_name", rid[:12]),
             meta.get("language", "?"),
             f"{float(meta.get('avg_speedup', 0)):.2f}x",
+            tests_str,
             f"[{sev_c}]{sev}[/{sev_c}]",
             issue,
             hw,
@@ -1005,6 +1255,11 @@ def main_cli():
     scan_parser.add_argument("--dir", default=".", help="Directory to scan")
     scan_parser.add_argument("--top", type=int, default=10, help="Number of hotspots to show")
+    test_parser = subparsers.add_parser("test", help="Re-run verification sandbox for a stored function")
+    test_parser.add_argument("func_name", help="Name of the function to re-verify")
+    test_parser.add_argument("--no-docker", dest="no_docker", action="store_true",
+                             help="Skip Docker (will report skipped)")
     args = parser.parse_args()
     if args.command == "configure":
@@ -1025,6 +1280,11 @@ def main_cli():
             export_path=getattr(args, "export_path", None),
             export_fmt=getattr(args, "export_fmt", "csv"),
         )
+    elif args.command == "test":
+        _run_test_cmd(
+            func_name=args.func_name,
+            no_docker=getattr(args, "no_docker", False),
+        )
     elif args.command == "scan":
         scanner = ProjectScanner(args.dir)
         scanner.scan_project(max_results=args.top)

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/memory.py RENAMED Viewed

@@ -47,6 +47,8 @@ class MemoryHit:
     language:          str
     severity:          str = "High"
     correctness_cases: int = 0
+    total_cases:       int = 0
+    test_cases:        list = field(default_factory=list)
     profiler_summary:  str = ""
@@ -302,6 +304,7 @@ class OptimizationMemory:
                     "reasoning":         (result.get("reasoning") or "")[:1000],
                     "severity":          result.get("severity", "High"),
                     "correctness_cases": verification.correctness.passed_cases,
+                    "total_cases":       verification.correctness.total_cases,
                     "profiler_summary":  profiler_summary[:200],
                     "timestamp":         datetime.now(timezone.utc).isoformat(),
                 }
@@ -329,10 +332,87 @@ class OptimizationMemory:
         except Exception as exc:
             return {"count": 0, "error": str(exc)}
+    def lookup_test_cases(self, original_code: str) -> Optional[list]:
+        """
+        Return stored test cases for `original_code`, or None if not found.
+        Used to re-run correctness without regenerating via LLM.
+        """
+        if not self._ensure_db():
+            return None
+        h = self.ast_hash(original_code)
+        return self._load_test_cases(h)
+    def store_test_cases(self, original_code: str, test_cases: list) -> None:
+        """
+        Persist test cases for a function, keyed by AST hash.
+        Called from process_function immediately after test cases are generated,
+        so `coreinsight test` can re-run verification without the LLM.
+        """
+        if not self._ensure_db():
+            return
+        h = self.ast_hash(original_code)
+        with self._write_lock:
+            try:
+                self._code_dir.mkdir(parents=True, exist_ok=True)
+                self._save_test_cases(h, test_cases)
+            except Exception as exc:
+                logger.debug(f"store_test_cases failed: {exc}")
+    def lookup_by_name(self, func_name: str) -> Optional[dict]:
+        """
+        Find the most recent memory record whose func_name matches exactly.
+        Returns a dict with keys: func_name, language, original_code,
+        optimized_code, test_cases, meta. Returns None on no match.
+        """
+        if not self._ensure_db():
+            return None
+        try:
+            all_records = self._collection.get(
+                include=["metadatas", "documents"]
+            )
+            matches = [
+                (meta, doc, rid)
+                for meta, doc, rid in zip(
+                    all_records.get("metadatas", []),
+                    all_records.get("documents", []),
+                    all_records.get("ids", []),
+                )
+                if meta.get("func_name") == func_name
+            ]
+            if not matches:
+                return None
+            # Most recent first
+            matches.sort(key=lambda x: x[0].get("timestamp", ""), reverse=True)
+            meta, original_code, h = matches[0]
+            return {
+                "func_name":      func_name,
+                "language":       meta.get("language", ""),
+                "original_code":  original_code or "",
+                "optimized_code": self._load_code(h) or "",
+                "test_cases":     self._load_test_cases(h) or [],
+                "meta":           meta,
+            }
+        except Exception as exc:
+            logger.debug(f"lookup_by_name failed: {exc}")
+            return None
     # ------------------------------------------------------------------ #
     # Internal helpers
     # ------------------------------------------------------------------ #
+    def _save_test_cases(self, h: str, cases: list) -> None:
+        path = self._code_dir / f"{h}.test_cases.json"
+        path.write_text(json.dumps(cases), encoding="utf-8")
+    def _load_test_cases(self, h: str) -> Optional[list]:
+        path = self._code_dir / f"{h}.test_cases.json"
+        if not path.exists():
+            return None
+        try:
+            return json.loads(path.read_text(encoding="utf-8"))
+        except Exception:
+            return None
     def _save_code(self, h: str, language: str, code: str) -> None:
         ext  = {"python": "py", "cpp": "cpp", "c++": "cpp", "cuda": "cu"}.get(language, "txt")
         path = self._code_dir / f"{h}.{ext}"
@@ -364,5 +444,6 @@ class OptimizationMemory:
             language=          meta.get("language",          ""),
             severity=          meta.get("severity",          "High"),
             correctness_cases= int(meta.get("correctness_cases", 0)),
+            total_cases=       int(meta.get("total_cases",       0)),
             profiler_summary=  meta.get("profiler_summary",  ""),
         )

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/prompts.py RENAMED Viewed

@@ -90,7 +90,8 @@ GRADING RUBRIC AND INSTRUCTIONS (APPLY ONLY THE SPECIFIC RUBRIC FOR {language}):
 INSTRUCTIONS:
 1. Actively hunt for Medium, High, and Critical issues based ONLY on the specific {language} rubric above. Do not hallucinate GPU concepts for Python code unless PyTorch/CUDA is explicitly used.
 2. If you find an issue, you MUST explain the hardware-level or interpreter-level reasoning clearly (e.g., CPU cache misses, GIL contention, memory latency).
-3. CODE GENERATION MANDATE: You MUST provide the completely rewritten, optimized function in the `optimized_code` field. The code must be raw, syntactically correct {language} code ready to be compiled/run. Do NOT leave this field empty. Do NOT wrap the code in markdown backticks (e.g., ```cpp) inside the JSON string.
+3. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. A false negative (missing a real bottleneck) is always worse than a false positive. Only assign Low severity if you can explicitly prove the algorithm is already optimal for the target hardware — state the time complexity, memory access pattern, and why no better approach exists. "No obvious issues" is NOT sufficient justification for Low.
+4. CODE GENERATION MANDATE: You MUST provide the completely rewritten, optimized function in the `optimized_code` field. The code must be raw, syntactically correct {language} code ready to be compiled/run. Do NOT leave this field empty. Do NOT wrap the code in markdown backticks (e.g., ```cpp) inside the JSON string.
 """
 # ---------------------------------------------------------------------------
@@ -143,10 +144,10 @@ GRADING RUBRIC (apply only the {language} section):
 - Low: Trivial stylistic issues only.
 INSTRUCTIONS:
-1. Identify the single most impactful bottleneck — do not list everything, find the root cause.
-2. Explain the hardware-level or interpreter-level reasoning precisely.
-3. Set `optimized_code` to null — code generation happens in a separate agent.
-4. If the code is genuinely optimal, set severity to Low and explain why.
+1. Identify the single most impactful bottleneck — do not list everything, find the root cause. If no Critical or High issue exists, identify the most significant Medium issue. Do NOT default to Low out of uncertainty.
+2. Explain the hardware-level or interpreter-level reasoning precisely — name the specific mechanism (e.g., "O(N²) comparisons cause cache thrashing on arrays larger than L2 cache", "GIL held across network I/O blocks all threads").
+3. CRITICAL: Set `optimized_code` to null. Any non-null value in this field will corrupt the pipeline. Code generation is handled by a separate agent.
+4. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. Only assign Low if you can explicitly prove algorithmic optimality — state the time complexity, memory access pattern, and why no better approach exists for the target hardware. "No obvious issues" is NOT sufficient justification for Low.
 {format_instructions}
 """
@@ -254,8 +255,9 @@ REQUIREMENTS:
 1. Rewrite ONLY the function named {func_name} — preserve its signature exactly.
 2. Fix the identified bottleneck using the suggestion as your guide.
 3. The function must be self-contained and correct.
-4. Raw {language} code only — no explanation, no markdown fences, no JSON.
-5. Do NOT rename the function.
+4. VERIFICATION: Before outputting, mentally confirm: does the rewrite directly eliminate the identified bottleneck? If the issue was O(N²), confirm the new complexity is O(N log N) or better. If the issue was a Python loop, confirm it is vectorized with NumPy/PyTorch. If the issue was a deep copy, confirm it is eliminated. Do not output a rewrite that only partially addresses the bottleneck.
+5. Raw {language} code only — no explanation, no markdown fences, no JSON.
+6. Do NOT rename the function.
 """
 # ── Per-tier addenda for multi-agent harness (same scaffolding pattern) ──────

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/sandbox.py RENAMED Viewed

@@ -150,6 +150,35 @@ class VerificationResult:
         return "\n".join(lines)
+def _format_sandbox_error(exc: Exception, language: str = "") -> str:
+    """Map raw Docker / OS exceptions to user-friendly one-liners."""
+    msg = str(exc).lower()
+    if "timeout" in msg or "timed out" in msg or "read timeout" in msg:
+        return (
+            "⚠️ Sandbox timed out — the benchmark likely contains an infinite loop "
+            "or extremely slow path. Try --no-docker to skip the sandbox."
+        )
+    if "out of memory" in msg or "oom" in msg or ("memory" in msg and "kill" in msg):
+        return (
+            "⚠️ Sandbox ran out of memory (OOM). "
+            "Reduce N sizes in the harness or use --no-docker."
+        )
+    if "no such image" in msg or "pull access" in msg or "not found" in msg:
+        lang_label = f" ({language})" if language else ""
+        return (
+            f"⚠️ Sandbox Docker image not found{lang_label}. "
+            "It should have been built on first run — try `docker images` to check."
+        )
+    if "cannot connect" in msg or "connection refused" in msg or "docker" in msg:
+        return (
+            "⚠️ Docker is not running. "
+            "Start Docker Desktop (or the Docker daemon) and try again."
+        )
+    if "permission denied" in msg:
+        return "⚠️ Sandbox permission error — Docker may lack access to the temp directory."
+    return f"⚠️ Sandbox error: {exc}"
 class CodeSandbox:
     def __init__(self, disabled: bool = False):
         self.disabled = disabled
@@ -277,7 +306,7 @@ class CodeSandbox:
                     return False, f"Missing CSV output (exit {exit_code}).\nFull output:\n{raw_logs}", None
             except Exception as e:
-                return False, f"Sandbox error: {str(e)}", None
+                return False, _format_sandbox_error(e, language), None
             finally:
                 if container:
@@ -312,6 +341,45 @@ class CodeSandbox:
         )
         return VerificationResult(speedup=speedup_result, correctness=correctness_result)
+    def verify_correctness_only(
+        self,
+        original_code:       str,
+        optimized_code:      str,
+        original_func_name:  str,
+        optimized_func_name: str,
+        test_cases:          List[Dict[str, Any]],
+        language:            str = "python",
+        timeout_seconds:     int = 60,
+        context:             str = "",
+    ) -> CorrectnessVerification:
+        """
+        Re-run correctness sandbox only — no speedup check, no LLM.
+        Used by `coreinsight test <function_name>`.
+        C++ and CUDA are not supported: their correctness harness is a
+        main() block embedded by HarnessAgent at analysis time and cannot
+        be reconstructed from stored test cases alone.
+        """
+        if self.disabled:
+            return CorrectnessVerification(verified=False, details=SANDBOX_SKIPPED_MSG)
+        if not self.client:
+            return CorrectnessVerification(verified=False, details="Docker unavailable.")
+        if language in ("cpp", "c++", "cuda"):
+            return CorrectnessVerification(
+                verified=False,
+                details=(
+                    f"Re-verification not supported for {language}: "
+                    "correctness harness is embedded at analysis time. "
+                    "See stored pass rate in `coreinsight memory`."
+                ),
+            )
+        return self._verify_correctness(
+            original_code, optimized_code,
+            original_func_name, optimized_func_name,
+            test_cases, language, timeout_seconds,
+            context=context,
+        )
     def _verify_speedup(self, csv_output: str) -> SpeedupVerification:
         result = SpeedupVerification(verified=False)
         try:

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2/coreinsight_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreinsight-cli
-Version: 0.3.0
+Version: 0.3.2
 Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
 Author: Varun Jani
 License: GPL-3.0-or-later

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "coreinsight-cli"
-version = "0.3.0"
+version = "0.3.2"
 description = "Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA"
 license = {text = "GPL-3.0-or-later"}
 authors = [

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/LICENSE RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/README.md RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/__init__.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/analyzer.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/config.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/demo/__init__.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/demo/bad_loop.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/demo/data_processor.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/demo/slow.cpp RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/embeddings.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/hardware.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/indexer.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/parser.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/profiler.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/scanner.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight/tui.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/entry_points.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/requires.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/top_level.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.0 → coreinsight_cli-0.3.2}/setup.cfg RENAMED Viewed

File without changes

coreinsight-cli 0.3.0__tar.gz → 0.3.2__tar.gz

coreinsight-cli 0.3.0tar.gz → 0.3.2tar.gz