PyPI - coreinsight-cli - Versions diffs - 0.3.1__tar.gz → 0.3.2__tar.gz - Mend

coreinsight-cli 0.3.1tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{coreinsight_cli-0.3.1/coreinsight_cli.egg-info → coreinsight_cli-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreinsight-cli
-Version: 0.3.1
+Version: 0.3.2
 Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
 Author: Varun Jani
 License: GPL-3.0-or-later

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/main.py RENAMED Viewed

@@ -205,21 +205,78 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
         _log(func_name, "Fetching RAG context...")
         context = indexer.get_context_for_code(original_code) if indexer else ""
-        # 0b. Memory lookup — skip LLM entirely if we've seen this pattern before
+        # 0b. Memory lookup — skip LLM if we've seen this pattern before,
+        # but validate the stored result before trusting it:
+        #   Gate A: no optimized code stored  → previous run was incomplete, re-run LLM
+        #   Gate B: correctness < 50% last run → keep analysis, re-run correctness only
+        #   Gate C: result is good             → return as-is
         if memory:
             memory_hit = memory.lookup(original_code, language)
             if memory_hit:
                 label = "exact match" if memory_hit.is_exact else f"similarity {memory_hit.similarity:.1%}"
-                _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
-                recalled_result = {
-                    "severity":       memory_hit.severity,
-                    "issue":          memory_hit.issue,
-                    "reasoning":      memory_hit.reasoning,
-                    "optimized_code": memory_hit.optimized_code,
-                    "suggestion":     "",
-                    "bottlenecks":    [],
-                }
-                return func_name, recalled_result, None, None, None, None, memory_hit, False
+                # Gate A: stored result has no optimized code — not useful, fall through to LLM
+                if not memory_hit.optimized_code:
+                    _log(func_name, f"Memory hit ({label}) — no optimized code stored, re-running LLM", style="yellow")
+                    memory_hit = None   # fall through; LLM path runs below as normal
+                # Gate B: correctness was poor last time — re-run the correctness check only
+                elif memory_hit.total_cases > 0 and memory_hit.correctness_cases / memory_hit.total_cases < 0.5:
+                    _log(
+                        func_name,
+                        f"Memory hit ({label}) — correctness was "
+                        f"{memory_hit.correctness_cases}/{memory_hit.total_cases} last run, re-checking",
+                        style="yellow",
+                    )
+                    recalled_result = {
+                        "severity":       memory_hit.severity,
+                        "issue":          memory_hit.issue,
+                        "reasoning":      memory_hit.reasoning,
+                        "optimized_code": memory_hit.optimized_code,
+                        "suggestion":     "",
+                        "bottlenecks":    [],
+                    }
+                    new_verification = None
+                    if not getattr(sandbox, "disabled", False):
+                        stored_cases = memory.lookup_test_cases(original_code)
+                        if stored_cases:
+                            _log(func_name, "Re-running correctness sandbox with stored test cases...", style="dim")
+                            correctness = sandbox.verify_correctness_only(
+                                original_code=original_code,
+                                optimized_code=memory_hit.optimized_code,
+                                original_func_name=func_name,
+                                optimized_func_name=func_name,
+                                test_cases=stored_cases,
+                                language=language,
+                                context=context,
+                            )
+                            _log(func_name, f"Re-verification: {correctness.passed_cases}/{correctness.total_cases} passed", style="dim")
+                            try:
+                                from coreinsight.sandbox import VerificationResult, SpeedupVerification
+                                new_verification = VerificationResult(
+                                    speedup=SpeedupVerification(
+                                        verified=True,
+                                        computed_speedups=[memory_hit.avg_speedup] if memory_hit.avg_speedup else [],
+                                        details=f"Speedup recalled from memory: {memory_hit.avg_speedup:.2f}x",
+                                    ),
+                                    correctness=correctness,
+                                )
+                            except Exception:
+                                pass   # verification display is non-critical
+                    return func_name, recalled_result, None, None, new_verification, None, memory_hit, False
+                # Gate C: stored result is complete and correctness is acceptable
+                else:
+                    _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
+                    recalled_result = {
+                        "severity":       memory_hit.severity,
+                        "issue":          memory_hit.issue,
+                        "reasoning":      memory_hit.reasoning,
+                        "optimized_code": memory_hit.optimized_code,
+                        "suggestion":     "",
+                        "bottlenecks":    [],
+                    }
+                    return func_name, recalled_result, None, None, None, None, memory_hit, False
         # ── Route: single-agent vs multi-agent ──────────────────────────
         if agent_mode == "multi" and multi_agents:
@@ -240,8 +297,37 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
         if result is None:
             return func_name, None, None, f"❌ Analysis error: {logs}", None, None, None, False
+        # Retry gate: Low severity or missing optimized code often means the model
+        # defaulted to "looks fine" rather than truly auditing.
+        # Retry up to 2 times before accepting the conclusion.
+        _MAX_ANALYSIS_RETRIES = 2
+        _retry = 0
+        while (result.get("severity") == "Low" or not optimized_code) and _retry < _MAX_ANALYSIS_RETRIES:
+            _retry += 1
+            _log(func_name, f"Low/missing result — retrying analysis ({_retry}/{_MAX_ANALYSIS_RETRIES})...", style="yellow")
+            if agent_mode == "multi" and multi_agents:
+                result, optimized_code, success, logs, plot_data, is_valid_optimization = \
+                    _run_multi_agent(
+                        func_name, original_code, language, context,
+                        hardware_target, sandbox, multi_agents, tier_limits,
+                        stream_callback=stream_callback,
+                    )
+            else:
+                result, optimized_code, success, logs, plot_data, is_valid_optimization = \
+                    _run_single_agent(
+                        func_name, original_code, language, context,
+                        hardware_target, sandbox, agent, tier_limits,
+                        stream_callback=stream_callback,
+                    )
+            if result is None:
+                break
+        if result is None:
+            return func_name, None, None, f"❌ Analysis error after {_retry} retries: {logs}", None, None, None, False
         if result.get("severity") == "Low" or not optimized_code:
-            return func_name, None, None, "✅ No critical bottlenecks detected. Code is optimal.", None, None, None, False
+            confirmed = f" (confirmed after {_retry} retries)" if _retry > 0 else ""
+            return func_name, None, None, f"✅ No significant bottlenecks found{confirmed}.", None, None, None, False
         # 3. Verification + AI-free hardware profiling
         verification    = None
@@ -288,11 +374,29 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
     except Exception as e:
         err_str = str(e)
-        if "context" in err_str.lower() and "limit" in err_str.lower():
-            _log(func_name, f"Context limit hit: {e}", style="bold yellow")
+        err_low  = err_str.lower()
+        if "context" in err_low and "limit" in err_low:
+            _log(func_name, "Context limit hit", style="bold yellow")
+            return func_name, None, None, (
+                "⚠️  Context limit — try a model with a larger context window, "
+                "or split the function into smaller pieces."
+            ), None, None, None, False
+        if any(k in err_low for k in ("cannot connect", "connection refused", "docker")):
+            _log(func_name, "Docker unavailable", style="bold yellow")
+            return func_name, None, None, (
+                "⚠️  Docker is not running — start Docker Desktop and try again.\n"
+                "    Skip the sandbox with: coreinsight analyze --no-docker <file>"
+            ), None, None, None, False
+        if "timeout" in err_low or "timed out" in err_low:
+            _log(func_name, "Sandbox timed out", style="bold yellow")
+            return func_name, None, None, (
+                "⚠️  Sandbox timed out — the benchmark likely contains an infinite loop.\n"
+                "    The LLM analysis result above is still valid."
+            ), None, None, None, False
+        if "out of memory" in err_low or "oom" in err_low:
+            _log(func_name, "Sandbox OOM", style="bold yellow")
             return func_name, None, None, (
-                f"⚠️  Context limit: {err_str}\n"
-                f"Try a model with a larger context window, or split the function."
+                "⚠️  Sandbox ran out of memory. Try --no-docker or reduce the file size."
             ), None, None, None, False
         _log(func_name, f"Failed: {e}", style="bold red")
         return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
@@ -763,7 +867,15 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
                 except Exception as exc:
                     with print_lock:
-                        console.print(f"[bold red]❌ Critical failure in thread processing {func['name']}:[/bold red] {exc}")
+                        exc_low = str(exc).lower()
+                        if any(k in exc_low for k in ("docker", "cannot connect", "connection refused")):
+                            console.print(f"[bold yellow]⚠️  {func['name']}: Docker unavailable — start Docker Desktop and retry.[/bold yellow]")
+                        elif "timeout" in exc_low or "timed out" in exc_low:
+                            console.print(f"[bold yellow]⚠️  {func['name']}: Sandbox timed out.[/bold yellow]")
+                        elif "out of memory" in exc_low or "oom" in exc_low:
+                            console.print(f"[bold yellow]⚠️  {func['name']}: Sandbox ran out of memory.[/bold yellow]")
+                        else:
+                            console.print(f"[bold red]❌ {func['name']}: Unexpected error — {exc}[/bold red]")
         console.print(Panel.fit(f"✅ [bold green]Analysis Complete![/bold green] Final report saved to:\n{report_path.absolute()}"))

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/memory.py RENAMED Viewed

@@ -332,6 +332,16 @@ class OptimizationMemory:
         except Exception as exc:
             return {"count": 0, "error": str(exc)}
+    def lookup_test_cases(self, original_code: str) -> Optional[list]:
+        """
+        Return stored test cases for `original_code`, or None if not found.
+        Used to re-run correctness without regenerating via LLM.
+        """
+        if not self._ensure_db():
+            return None
+        h = self.ast_hash(original_code)
+        return self._load_test_cases(h)
     def store_test_cases(self, original_code: str, test_cases: list) -> None:
         """
         Persist test cases for a function, keyed by AST hash.

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/prompts.py RENAMED Viewed

@@ -90,7 +90,8 @@ GRADING RUBRIC AND INSTRUCTIONS (APPLY ONLY THE SPECIFIC RUBRIC FOR {language}):
 INSTRUCTIONS:
 1. Actively hunt for Medium, High, and Critical issues based ONLY on the specific {language} rubric above. Do not hallucinate GPU concepts for Python code unless PyTorch/CUDA is explicitly used.
 2. If you find an issue, you MUST explain the hardware-level or interpreter-level reasoning clearly (e.g., CPU cache misses, GIL contention, memory latency).
-3. CODE GENERATION MANDATE: You MUST provide the completely rewritten, optimized function in the `optimized_code` field. The code must be raw, syntactically correct {language} code ready to be compiled/run. Do NOT leave this field empty. Do NOT wrap the code in markdown backticks (e.g., ```cpp) inside the JSON string.
+3. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. A false negative (missing a real bottleneck) is always worse than a false positive. Only assign Low severity if you can explicitly prove the algorithm is already optimal for the target hardware — state the time complexity, memory access pattern, and why no better approach exists. "No obvious issues" is NOT sufficient justification for Low.
+4. CODE GENERATION MANDATE: You MUST provide the completely rewritten, optimized function in the `optimized_code` field. The code must be raw, syntactically correct {language} code ready to be compiled/run. Do NOT leave this field empty. Do NOT wrap the code in markdown backticks (e.g., ```cpp) inside the JSON string.
 """
 # ---------------------------------------------------------------------------
@@ -143,10 +144,10 @@ GRADING RUBRIC (apply only the {language} section):
 - Low: Trivial stylistic issues only.
 INSTRUCTIONS:
-1. Identify the single most impactful bottleneck — do not list everything, find the root cause.
-2. Explain the hardware-level or interpreter-level reasoning precisely.
-3. Set `optimized_code` to null — code generation happens in a separate agent.
-4. If the code is genuinely optimal, set severity to Low and explain why.
+1. Identify the single most impactful bottleneck — do not list everything, find the root cause. If no Critical or High issue exists, identify the most significant Medium issue. Do NOT default to Low out of uncertainty.
+2. Explain the hardware-level or interpreter-level reasoning precisely — name the specific mechanism (e.g., "O(N²) comparisons cause cache thrashing on arrays larger than L2 cache", "GIL held across network I/O blocks all threads").
+3. CRITICAL: Set `optimized_code` to null. Any non-null value in this field will corrupt the pipeline. Code generation is handled by a separate agent.
+4. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. Only assign Low if you can explicitly prove algorithmic optimality — state the time complexity, memory access pattern, and why no better approach exists for the target hardware. "No obvious issues" is NOT sufficient justification for Low.
 {format_instructions}
 """
@@ -254,8 +255,9 @@ REQUIREMENTS:
 1. Rewrite ONLY the function named {func_name} — preserve its signature exactly.
 2. Fix the identified bottleneck using the suggestion as your guide.
 3. The function must be self-contained and correct.
-4. Raw {language} code only — no explanation, no markdown fences, no JSON.
-5. Do NOT rename the function.
+4. VERIFICATION: Before outputting, mentally confirm: does the rewrite directly eliminate the identified bottleneck? If the issue was O(N²), confirm the new complexity is O(N log N) or better. If the issue was a Python loop, confirm it is vectorized with NumPy/PyTorch. If the issue was a deep copy, confirm it is eliminated. Do not output a rewrite that only partially addresses the bottleneck.
+5. Raw {language} code only — no explanation, no markdown fences, no JSON.
+6. Do NOT rename the function.
 """
 # ── Per-tier addenda for multi-agent harness (same scaffolding pattern) ──────

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/sandbox.py RENAMED Viewed

@@ -150,6 +150,35 @@ class VerificationResult:
         return "\n".join(lines)
+def _format_sandbox_error(exc: Exception, language: str = "") -> str:
+    """Map raw Docker / OS exceptions to user-friendly one-liners."""
+    msg = str(exc).lower()
+    if "timeout" in msg or "timed out" in msg or "read timeout" in msg:
+        return (
+            "⚠️ Sandbox timed out — the benchmark likely contains an infinite loop "
+            "or extremely slow path. Try --no-docker to skip the sandbox."
+        )
+    if "out of memory" in msg or "oom" in msg or ("memory" in msg and "kill" in msg):
+        return (
+            "⚠️ Sandbox ran out of memory (OOM). "
+            "Reduce N sizes in the harness or use --no-docker."
+        )
+    if "no such image" in msg or "pull access" in msg or "not found" in msg:
+        lang_label = f" ({language})" if language else ""
+        return (
+            f"⚠️ Sandbox Docker image not found{lang_label}. "
+            "It should have been built on first run — try `docker images` to check."
+        )
+    if "cannot connect" in msg or "connection refused" in msg or "docker" in msg:
+        return (
+            "⚠️ Docker is not running. "
+            "Start Docker Desktop (or the Docker daemon) and try again."
+        )
+    if "permission denied" in msg:
+        return "⚠️ Sandbox permission error — Docker may lack access to the temp directory."
+    return f"⚠️ Sandbox error: {exc}"
 class CodeSandbox:
     def __init__(self, disabled: bool = False):
         self.disabled = disabled
@@ -277,7 +306,7 @@ class CodeSandbox:
                     return False, f"Missing CSV output (exit {exit_code}).\nFull output:\n{raw_logs}", None
             except Exception as e:
-                return False, f"Sandbox error: {str(e)}", None
+                return False, _format_sandbox_error(e, language), None
             finally:
                 if container:

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2/coreinsight_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreinsight-cli
-Version: 0.3.1
+Version: 0.3.2
 Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
 Author: Varun Jani
 License: GPL-3.0-or-later

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "coreinsight-cli"
-version = "0.3.1"
+version = "0.3.2"
 description = "Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA"
 license = {text = "GPL-3.0-or-later"}
 authors = [

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/LICENSE RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/README.md RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/__init__.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/analyzer.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/config.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/__init__.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/bad_loop.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/data_processor.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/slow.cpp RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/embeddings.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/hardware.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/indexer.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/parser.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/profiler.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/scanner.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/tui.py RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/entry_points.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/requires.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/top_level.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/setup.cfg RENAMED Viewed

File without changes

coreinsight-cli 0.3.1__tar.gz → 0.3.2__tar.gz

coreinsight-cli 0.3.1tar.gz → 0.3.2tar.gz