PyPI - coreinsight-cli - Versions diffs - 0.2.8__tar.gz → 0.2.9__tar.gz - Mend

coreinsight-cli 0.2.8tar.gz → 0.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{coreinsight_cli-0.2.8/coreinsight_cli.egg-info → coreinsight_cli-0.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreinsight-cli
-Version: 0.2.8
+Version: 0.2.9
 Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
 Author: Varun Jani
 License: GPL-3.0-or-later

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/analyzer.py RENAMED Viewed

@@ -805,7 +805,9 @@ class HarnessAgent:
             is_valid = self._check_speedup(success, logs)
             retries += 1
-        if is_valid and retries > 0:
+        if getattr(sandbox, 'disabled', False):
+            pass  # skipped intentionally — don't annotate as failed
+        elif is_valid and retries > 0:
             logs = f"(Succeeded after {retries} retries)\n" + logs
         elif not is_valid:
             logs    = f"(Failed after {retries} retries)\n" + logs

coreinsight_cli-0.2.9/coreinsight/embeddings.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""
+coreinsight/embeddings.py — Shared embedding utility
+Single source of truth for embedding model loading used by both
+memory.py (OptimizationMemory) and indexer.py (RepoIndexer).
+Tries to load all-MiniLM-L6-v2 from local cache first.
+Falls back to a deterministic hash-based embedder when offline
+or when the model has not yet been downloaded.
+"""
+from __future__ import annotations
+import hashlib
+import logging
+import math
+import os
+from pathlib import Path
+from typing import List, Tuple
+logger = logging.getLogger(__name__)
+# All models cached here — never hits the network if already present
+MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
+MODEL_NAME      = "all-MiniLM-L6-v2"
+class _HashEmbeddingFunction:
+    """
+    Deterministic offline fallback embedder.
+    Produces a 384-dim float vector from token overlap — no downloads,
+    no GPU, no network. Semantic quality is lower than MiniLM but RAG
+    and memory lookup still work via keyword/structural matching.
+    Run `coreinsight index` once while online to cache the real model.
+    """
+    DIM = 384
+    def __call__(self, input: List[str]) -> List[List[float]]:
+        results = []
+        for text in input:
+            tokens = text.lower().split()
+            vec    = [0.0] * self.DIM
+            for tok in tokens:
+                h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
+                vec[h % self.DIM] += 1.0
+            # L2 normalise so cosine distance works correctly
+            mag = math.sqrt(sum(x * x for x in vec)) or 1.0
+            results.append([x / mag for x in vec])
+        return results
+def load_embedding_fn() -> Tuple[object, str]:
+    """
+    Load the sentence-transformer embedding function.
+    Returns:
+        (embedding_fn, label) where label is a human-readable string
+        indicating which embedder is active — shown in CLI output.
+    Strategy:
+        1. Pin HuggingFace cache to ~/.coreinsight/models so the model
+           is never re-downloaded on subsequent runs.
+        2. Probe the model with a dummy call to force-load weights now
+           rather than silently failing later during indexing or lookup.
+        3. On any failure (network error, disk full, offline) fall back
+           to _HashEmbeddingFunction with a visible warning.
+    """
+    MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    # Pin cache dirs — must be set before chromadb.utils imports torch
+    os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(MODEL_CACHE_DIR))
+    os.environ.setdefault("HF_HUB_CACHE",              str(MODEL_CACHE_DIR))
+    # Allow download when online; callers that want strict offline can
+    # set HF_HUB_OFFLINE=1 in their environment before importing.
+    os.environ.setdefault("HF_HUB_OFFLINE", "0")
+    try:
+        from chromadb.utils import embedding_functions as _ef
+        fn = _ef.SentenceTransformerEmbeddingFunction(model_name=MODEL_NAME)
+        # Force-load now so we catch errors here, not mid-analysis.
+        fn(["probe"])
+        label = f"{MODEL_NAME} (cached)"
+        logger.debug(f"Embedding model loaded: {label}")
+        return fn, label
+    except Exception as exc:
+        logger.warning(
+            f"SentenceTransformer unavailable ({exc}). "
+            f"Using offline hash embedder — semantic quality reduced. "
+            f"Run `coreinsight index` once while online to cache the model."
+        )
+        from rich.console import Console as _Console
+        _Console().print(
+            "[yellow]⚠  Embedding model unavailable (offline or not yet downloaded). "
+            "Using keyword-based fallback — RAG and memory recall will work but with "
+            "reduced semantic accuracy. "
+            "Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
+        )
+        return _HashEmbeddingFunction(), "hash-based (offline fallback)"

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/indexer.py RENAMED Viewed

@@ -9,62 +9,11 @@ import chromadb
 from chromadb.utils import embedding_functions
 from coreinsight.parser import CodeParser
+from coreinsight.embeddings import load_embedding_fn
 console = Console()
 logger  = logging.getLogger(__name__)
-# Local model cache — never hits the network if model is already here
-_MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
-class _HashEmbeddingFunction:
-    """
-    Deterministic offline fallback embedder.
-    Produces a 384-dim float vector from token overlap — no downloads, no GPU.
-    Semantic quality is lower than MiniLM but RAG still works via keyword matching.
-    """
-    DIM = 384
-    def __call__(self, input: list[str]) -> list[list[float]]:
-        results = []
-        for text in input:
-            tokens = text.lower().split()
-            vec    = [0.0] * self.DIM
-            for tok in tokens:
-                h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
-                vec[h % self.DIM] += 1.0
-            # L2 normalise
-            mag = math.sqrt(sum(x * x for x in vec)) or 1.0
-            results.append([x / mag for x in vec])
-        return results
-def _load_embedding_fn():
-    """
-    Try to load SentenceTransformer from local cache.
-    Falls back to _HashEmbeddingFunction if offline or model not cached.
-    """
-    _MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(_MODEL_CACHE_DIR))
-    os.environ.setdefault("HF_HUB_OFFLINE", "0")  # allow download when online
-    try:
-        fn = embedding_functions.SentenceTransformerEmbeddingFunction(
-            model_name="all-MiniLM-L6-v2",
-        )
-        # Probe: actually load the model now so we catch network errors here
-        # rather than silently later during indexing.
-        fn(["probe"])
-        return fn, "all-MiniLM-L6-v2 (cached)"
-    except Exception as e:
-        logger.warning(f"SentenceTransformer unavailable ({e}). Using offline hash embedder — semantic quality reduced.")
-        console.print(
-            "[yellow]⚠  Embedding model unavailable (offline or not yet downloaded). "
-            "Using keyword-based fallback — RAG will work but with reduced semantic accuracy. "
-            "Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
-        )
-        return _HashEmbeddingFunction(), "hash-based (offline fallback)"
 class RepoIndexer:
     def __init__(self, repo_path: str):
@@ -82,7 +31,7 @@ class RepoIndexer:
             return True
         try:
             self._chroma_client   = chromadb.PersistentClient(path=str(self.db_path))
-            self._embedding_fn, self._embedding_label = _load_embedding_fn()
+            self._embedding_fn, self._embedding_label = load_embedding_fn()
             self._collection      = self._chroma_client.get_or_create_collection(
                 name="codebase_context",
                 embedding_function=self._embedding_fn,

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/main.py RENAMED Viewed

@@ -741,7 +741,7 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None):
     finally:
         console = _prev_console
-def run_demo(lang: str = "python", no_docker: bool = False):
+def run_demo(lang: str = "python", no_docker: bool = False, tui_console=None):
     import shutil
     import importlib.resources
@@ -804,11 +804,16 @@ def run_demo(lang: str = "python", no_docker: bool = False):
     # For Python: auto-index so RAG cross-file context is showcased
     if lang == "python":
         console.print("[dim]Auto-indexing demo files to showcase RAG cross-file context...[/dim]")
-        from coreinsight.indexer import RepoIndexer as _RepoIndexer
-        _RepoIndexer(str(demo_dir)).index_repository()
+        try:
+            from coreinsight.indexer import RepoIndexer as _RepoIndexer
+            _RepoIndexer(str(demo_dir)).index_repository()
+        except Exception as _idx_err:
+            # Non-fatal — SQLite write conflicts can occur when running
+            # through the TUI. RAG context will be empty for this run.
+            console.print(f"[dim yellow]Indexing skipped (will retry next run): {_idx_err}[/dim yellow]")
         console.print()
-    run_analysis(str(demo_dir / entry_file), no_docker=no_docker)
+    run_analysis(str(demo_dir / entry_file), no_docker=no_docker, tui_console=tui_console)
 def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv"):
     from coreinsight.memory import OptimizationMemory, MEMORY_DIR

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/memory.py RENAMED Viewed

@@ -21,12 +21,13 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional
+from coreinsight.embeddings import load_embedding_fn
 logger = logging.getLogger(__name__)
 MEMORY_DIR  = Path.home() / ".coreinsight" / "memory_db"
 CODE_DIR    = MEMORY_DIR / "code"
 COLLECTION  = "optimization_memory"
-EMBED_MODEL = "all-MiniLM-L6-v2"   # same model as RepoIndexer — no extra download
 # ChromaDB uses cosine *distance* (lower = more similar).
 # 0.15 distance ≈ 0.85 cosine similarity for this embedding model.
@@ -54,17 +55,19 @@ class OptimizationMemory:
     Local vector database of verified optimizations.
     Reads are thread-safe (ChromaDB handles concurrent queries).
-    Writes are called from the main thread after each future completes,
-    so no write contention across worker threads.
+    Writes are serialized via _write_lock since store() can be called
+    from concurrent threads in process_function's as_completed loop.
     """
     def __init__(self, memory_dir: Path = MEMORY_DIR) -> None:
-        self._memory_dir = memory_dir
-        self._code_dir   = memory_dir / "code"
-        self._client     = None
-        self._collection = None
-        self._embed_fn   = None
-        self._init_error = ""
+        import threading
+        self._memory_dir  = memory_dir
+        self._code_dir    = memory_dir / "code"
+        self._client      = None
+        self._collection  = None
+        self._embed_fn    = None
+        self._init_error  = ""
+        self._write_lock  = threading.Lock()
     # ------------------------------------------------------------------ #
     # Lazy init — avoids slow import at startup
@@ -78,13 +81,11 @@ class OptimizationMemory:
         try:
             try:
                 import chromadb
-                from chromadb.utils import embedding_functions
             except Exception as sqlite_exc:
                 self._init_error = (
                     f"ChromaDB unavailable (likely outdated SQLite): {sqlite_exc}. "
                     "Optimization memory disabled. "
-                    "Fix: pip install pysqlite3-binary and add the following to the top of memory.py:\n"
-                    "  import pysqlite3, sys; sys.modules['sqlite3'] = pysqlite3"
+                    "Fix: pip install coreinsight-cli[compat]"
                 )
                 return False
@@ -92,9 +93,8 @@ class OptimizationMemory:
             self._code_dir.mkdir(parents=True, exist_ok=True)
             self._client = chromadb.PersistentClient(path=str(self._memory_dir))
-            self._embed_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
-                model_name=EMBED_MODEL
-            )
+            self._embed_fn, _embed_label = load_embedding_fn()
+            logger.debug(f"Memory embedder: {_embed_label}")
             self._collection = self._client.get_or_create_collection(
                 name=COLLECTION,
                 embedding_function=self._embed_fn,
@@ -273,52 +273,53 @@ class OptimizationMemory:
         """
         if not self._ensure_db():
             return False
-        try:
-            h           = self.ast_hash(original_code)
-            opt_code    = result.get("optimized_code", "") or ""
-            avg_speedup = 0.0
-            if verification.speedup.computed_speedups:
-                avg_speedup = (
-                    sum(verification.speedup.computed_speedups)
-                    / len(verification.speedup.computed_speedups)
+        with self._write_lock:
+            try:
+                h           = self.ast_hash(original_code)
+                opt_code    = result.get("optimized_code", "") or ""
+                avg_speedup = 0.0
+                if verification.speedup.computed_speedups:
+                    avg_speedup = (
+                        sum(verification.speedup.computed_speedups)
+                        / len(verification.speedup.computed_speedups)
+                    )
+                profiler_summary = ""
+                if profiler_result and profiler_result.available and profiler_result.metrics:
+                    parts = [
+                        f"{m.name}: {m.delta}"
+                        for m in profiler_result.metrics[:2]
+                    ]
+                    profiler_summary = " | ".join(parts)
+                self._save_code(h, language, opt_code)
+                meta = {
+                    "func_name":         func_name,
+                    "language":          language,
+                    "avg_speedup":       round(avg_speedup, 4),
+                    "issue":             (result.get("issue")     or "")[:500],
+                    "reasoning":         (result.get("reasoning") or "")[:1000],
+                    "severity":          result.get("severity", "High"),
+                    "correctness_cases": verification.correctness.passed_cases,
+                    "profiler_summary":  profiler_summary[:200],
+                    "timestamp":         datetime.now(timezone.utc).isoformat(),
+                }
+                self._collection.upsert(
+                    ids=[h],
+                    documents=[original_code],
+                    metadatas=[meta],
                 )
+                logger.info(
+                    f"Memory: stored '{func_name}' "
+                    f"(hash={h[:8]}…, speedup={avg_speedup:.2f}x)"
+                )
+                return True
-            profiler_summary = ""
-            if profiler_result and profiler_result.available and profiler_result.metrics:
-                parts = [
-                    f"{m.name}: {m.delta}"
-                    for m in profiler_result.metrics[:2]
-                ]
-                profiler_summary = " | ".join(parts)
-            self._save_code(h, language, opt_code)
-            meta = {
-                "func_name":         func_name,
-                "language":          language,
-                "avg_speedup":       round(avg_speedup, 4),
-                "issue":             (result.get("issue")     or "")[:500],
-                "reasoning":         (result.get("reasoning") or "")[:1000],
-                "severity":          result.get("severity", "High"),
-                "correctness_cases": verification.correctness.passed_cases,
-                "profiler_summary":  profiler_summary[:200],
-                "timestamp":         datetime.now(timezone.utc).isoformat(),
-            }
-            self._collection.upsert(
-                ids=[h],
-                documents=[original_code],
-                metadatas=[meta],
-            )
-            logger.info(
-                f"Memory: stored '{func_name}' "
-                f"(hash={h[:8]}…, speedup={avg_speedup:.2f}x)"
-            )
-            return True
-        except Exception as exc:
-            logger.debug(f"Memory store failed: {exc}")
-            return False
+            except Exception as exc:
+                logger.debug(f"Memory store failed: {exc}")
+                return False
     def stats(self) -> Dict[str, Any]:
         if not self._ensure_db():

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/profiler.py RENAMED Viewed

@@ -156,6 +156,78 @@ def _fmt_int(n: int) -> str:
     return f"{n:,}"
+def _parse_nsys_stats(output: str) -> Dict[str, Any]:
+    """
+    Parse `nsys profile --stats=true` stdout into structured metrics.
+    Extracts kernel timing and memory throughput from the summary tables.
+    """
+    result: Dict[str, Any] = {}
+    # ── Kernel statistics ────────────────────────────────────────────────
+    # Header: Time(%)  Total Time (ns)  Instances  Avg (ns)  ...  Name
+    kernel_section = False
+    kernels = []
+    for line in output.splitlines():
+        if "CUDA Kernel Statistics" in line or "GPU Kernel Summary" in line:
+            kernel_section = True
+            continue
+        if kernel_section:
+            if line.strip() == "" or line.startswith("="):
+                if kernels:
+                    kernel_section = False
+                continue
+            # Skip header/separator lines
+            if "Time(%)" in line or "----" in line:
+                continue
+            parts = line.split()
+            if len(parts) >= 7:
+                try:
+                    kernels.append({
+                        "pct":       float(parts[0]),
+                        "total_ns":  float(parts[1].replace(",", "")),
+                        "instances": int(parts[2].replace(",", "")),
+                        "avg_ns":    float(parts[3].replace(",", "")),
+                        "name":      " ".join(parts[7:]) if len(parts) > 7 else parts[-1],
+                    })
+                except (ValueError, IndexError):
+                    continue
+    if kernels:
+        # Top kernel by total time
+        top = max(kernels, key=lambda k: k["total_ns"])
+        result["top_kernel_name"]     = top["name"]
+        result["top_kernel_avg_ns"]   = top["avg_ns"]
+        result["top_kernel_total_ns"] = top["total_ns"]
+        result["top_kernel_instances"]= top["instances"]
+        result["total_kernel_ns"]     = sum(k["total_ns"] for k in kernels)
+    # ── Memory throughput ────────────────────────────────────────────────
+    # Look for "Memory Throughput" or HtoD/DtoH transfer lines
+    mem_section = False
+    total_mem_ns = 0.0
+    for line in output.splitlines():
+        if "Memory Operation" in line or "Memory Throughput" in line:
+            mem_section = True
+            continue
+        if mem_section:
+            if line.strip() == "" or line.startswith("="):
+                mem_section = False
+                continue
+            if "Time(%)" in line or "----" in line:
+                continue
+            parts = line.split()
+            if len(parts) >= 3:
+                try:
+                    total_mem_ns += float(parts[1].replace(",", ""))
+                except (ValueError, IndexError):
+                    continue
+    if total_mem_ns:
+        result["total_mem_transfer_ns"] = total_mem_ns
+    return result
 def _parse_perf_stat(stderr: str) -> Dict[str, float]:
     """Extract hardware counter values from `perf stat` stderr output."""
     targets = {
@@ -284,7 +356,14 @@ class HardwareProfiler:
                     source_dir=source_dir,
                 )
             if language in ("cuda", "cu", "cuh"):
-                return self._profile_cuda(detected)
+                return self._profile_cuda(
+                    detected,
+                    original_code=original_code,
+                    optimized_code=optimized_code,
+                    func_name=func_name,
+                    original_file_content=original_file_content,
+                    source_dir=source_dir,
+                )
         except Exception as exc:
             logger.debug(f"HardwareProfiler.profile exception: {exc}", exc_info=True)
             return ProfilerResult(
@@ -710,17 +789,190 @@ class HardwareProfiler:
         return metrics or None
     # ------------------------------------------------------------------ #
-    # CUDA path (v0.2.0: nsys / nvprof)
+    # CUDA path — nsys CLI profiling
     # ------------------------------------------------------------------ #
-    def _profile_cuda(self, detected: Dict[str, bool]) -> ProfilerResult:
-        if detected.get("nsys"):
-            note = "nsys detected."
-        elif detected.get("nvprof"):
-            note = "nvprof detected."
-        else:
-            note = "No CUDA profiling tools found (install nsys from CUDA Toolkit)."
-        return ProfilerResult(
-            available=False, tool="none", language="cuda",
-            error=f"{note} CUDA profiling coming in v0.2.0.",
-        )
+    def _profile_cuda(
+        self,
+        detected:              Dict[str, bool],
+        original_code:         str = "",
+        optimized_code:        str = "",
+        func_name:             str = "",
+        original_file_content: str = "",
+        source_dir:            str = "",
+    ) -> ProfilerResult:
+        result = ProfilerResult(available=False, tool="nsys", language="cuda")
+        if not detected.get("nsys"):
+            if detected.get("nvprof"):
+                result.error = (
+                    "nvprof detected but not yet supported — install nsys "
+                    "from CUDA Toolkit 11.0+ for hardware profiling."
+                )
+            else:
+                result.error = (
+                    "No CUDA profiling tools found on PATH. "
+                    "Install nsys: https://developer.nvidia.com/nsight-systems"
+                )
+            return result
+        if not shutil.which("nvcc"):
+            result.error = "nvcc not found — required to compile CUDA sources for profiling."
+            return result
+        if not original_file_content:
+            result.error = "No CUDA source content available for profiling."
+            return result
+        stats_per_label: Dict[str, Dict[str, Any]] = {}
+        # Build optimized source by appending the optimized kernel —
+        # last __global__ definition with the same name wins at link time
+        # only if we can safely substitute; otherwise skip optimized run.
+        sources = [("original", original_file_content)]
+        if optimized_code and func_name:
+            opt_src = (
+                original_file_content.strip()
+                + "\n\n// --- CoreInsight optimized replacement ---\n"
+                + optimized_code.strip()
+            )
+            sources.append(("optimized", opt_src))
+        tmp = tempfile.mkdtemp()
+        try:
+            for label, src in sources:
+                src_path = os.path.join(tmp, f"{label}.cu")
+                bin_path = os.path.join(tmp, label)
+                with open(src_path, "w") as fh:
+                    fh.write(src)
+                # Compile
+                compile_proc = subprocess.run(
+                    ["nvcc", "-O3", "-arch=native", src_path, "-o", bin_path],
+                    capture_output=True, text=True, timeout=120,
+                )
+                if compile_proc.returncode != 0:
+                    # Try without -arch=native (older nvcc versions)
+                    compile_proc = subprocess.run(
+                        ["nvcc", "-O3", src_path, "-o", bin_path],
+                        capture_output=True, text=True, timeout=120,
+                    )
+                if compile_proc.returncode != 0:
+                    logger.debug(
+                        f"CUDA compile failed for {label}:\n"
+                        f"{compile_proc.stderr[:400]}"
+                    )
+                    result.error = (
+                        f"nvcc compilation failed for {label} version.\n"
+                        f"{compile_proc.stderr[:300]}"
+                    )
+                    return result
+                # Profile with nsys
+                nsys_out_base = os.path.join(tmp, f"nsys_{label}")
+                try:
+                    nsys_proc = subprocess.run(
+                        [
+                            "nsys", "profile",
+                            "--stats=true",
+                            "--force-overwrite=true",
+                            "-o", nsys_out_base,
+                            bin_path,
+                        ],
+                        capture_output=True, text=True, timeout=300,
+                    )
+                    # nsys writes stats to stdout; combined output in stderr too
+                    combined = nsys_proc.stdout + nsys_proc.stderr
+                    parsed   = _parse_nsys_stats(combined)
+                    if not parsed:
+                        logger.debug(
+                            f"nsys: no stats parsed for {label}.\n"
+                            f"nsys stdout: {nsys_proc.stdout[:300]}\n"
+                            f"nsys stderr: {nsys_proc.stderr[:300]}"
+                        )
+                        result.error = (
+                            f"nsys ran but produced no parseable stats for {label}. "
+                            f"Ensure the binary launches at least one CUDA kernel."
+                        )
+                        return result
+                    stats_per_label[label] = parsed
+                except subprocess.TimeoutExpired:
+                    result.error = "nsys profiling timed out (300s)."
+                    return result
+                except Exception as exc:
+                    result.error = f"nsys execution error: {exc}"
+                    return result
+        except Exception as exc:
+            logger.debug(f"CUDA profiling error: {exc}")
+            result.error = f"CUDA profiling failed: {exc}"
+            return result
+        finally:
+            shutil.rmtree(tmp, ignore_errors=True)
+        if "original" not in stats_per_label:
+            result.error = "No profiling data collected."
+            return result
+        orig_s = stats_per_label["original"]
+        opt_s  = stats_per_label.get("optimized", orig_s)
+        metrics: List[ProfilerMetric] = []
+        # ── Kernel timing ─────────────────────────────────────────────
+        orig_ns = orig_s.get("top_kernel_avg_ns", 0.0)
+        opt_ns  = opt_s.get("top_kernel_avg_ns",  orig_ns)
+        if orig_ns:
+            metrics.append(ProfilerMetric(
+                name=f"Kernel avg time [{orig_s.get('top_kernel_name', 'top kernel')}]",
+                original=f"{orig_ns / 1000:.2f} µs",
+                optimized=f"{opt_ns / 1000:.2f} µs",
+                delta=_pct_delta(orig_ns, opt_ns),
+                note="lower is better",
+            ))
+        orig_total = orig_s.get("total_kernel_ns", 0.0)
+        opt_total  = opt_s.get("total_kernel_ns",  orig_total)
+        if orig_total:
+            metrics.append(ProfilerMetric(
+                name="Total kernel time",
+                original=f"{orig_total / 1e6:.3f} ms",
+                optimized=f"{opt_total / 1e6:.3f} ms",
+                delta=_pct_delta(orig_total, opt_total),
+                note="lower is better",
+            ))
+        orig_inst = orig_s.get("top_kernel_instances", 0)
+        if orig_inst:
+            metrics.append(ProfilerMetric(
+                name="Kernel launches",
+                original=str(orig_inst),
+                optimized=str(opt_s.get("top_kernel_instances", orig_inst)),
+                delta="—",
+                note="",
+            ))
+        # ── Memory transfers ──────────────────────────────────────────
+        orig_mem = orig_s.get("total_mem_transfer_ns", 0.0)
+        opt_mem  = opt_s.get("total_mem_transfer_ns",  orig_mem)
+        if orig_mem:
+            metrics.append(ProfilerMetric(
+                name="Total memory transfer time",
+                original=f"{orig_mem / 1e6:.3f} ms",
+                optimized=f"{opt_mem / 1e6:.3f} ms",
+                delta=_pct_delta(orig_mem, opt_mem),
+                note="lower is better",
+            ))
+        if not metrics:
+            result.error = "nsys ran but no timing metrics could be extracted."
+            return result
+        result.available        = True
+        result.host_tool_name   = "nsys"
+        result.host_tool_metrics = metrics
+        return result

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/sandbox.py RENAMED Viewed

@@ -18,9 +18,11 @@ SANDBOX_IMAGES = {
     "cpp":    "coreinsight-cpp-sandbox:latest",
 }
+ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 DOCKERFILES = {
-    "python": "Dockerfile.python-sandbox",
-    "cpp":    "Dockerfile.cpp-sandbox",
+    "python": os.path.join(ROOT_DIR, "docker", "Dockerfile.python-sandbox"),
+    "cpp":    os.path.join(ROOT_DIR, "docker", "Dockerfile.cpp-sandbox"),
 }
 # ---------------------------------------------------------------------------
@@ -188,17 +190,21 @@ class CodeSandbox:
         label = "Python" if lang == "python" else "C++"
         console.print(f"[yellow]First run: building {label} sandbox image (one-time, ~30s)...[/yellow]")
-        dockerfile_path = importlib.resources.files("coreinsight").joinpath(DOCKERFILES[lang])
-        with importlib.resources.as_file(dockerfile_path) as dockerfile:
-            _, logs = self.client.images.build(
-                path=str(dockerfile.parent),
-                dockerfile=dockerfile.name,
-                tag=SANDBOX_IMAGES[lang],
-                rm=True,
+        dockerfile_full = DOCKERFILES[lang]
+        if not os.path.exists(dockerfile_full):
+            raise FileNotFoundError(
+                f"Dockerfile not found at {dockerfile_full}. "
+                f"Expected docker/ directory at project root."
             )
-            for chunk in logs:
-                if "stream" in chunk:
-                    logger.debug(chunk["stream"].strip())
+        _, logs = self.client.images.build(
+            path=os.path.dirname(dockerfile_full),
+            dockerfile=os.path.basename(dockerfile_full),
+            tag=SANDBOX_IMAGES[lang],
+            rm=True,
+        )
+        for chunk in logs:
+            if "stream" in chunk:
+                logger.debug(chunk["stream"].strip())
         console.print(f"[green]✓ {label} sandbox image built successfully.[/green]")

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/tui.py RENAMED Viewed

@@ -778,19 +778,15 @@ class CoreInsightApp(App):
             log.write,
             "\n[bold cyan]Running built-in Python demo...[/bold cyan]\n"
         )
-        # Temporarily patch the demo's console output into the TUI
-        import coreinsight.main as _main
-        _prev = _main.console
-        _main.console = tui_console
         try:
-            run_demo(lang="python", no_docker=no_docker)
+            # Pass tui_console directly — run_demo forwards it to run_analysis
+            # which handles the global console swap cleanly via try/finally
+            run_demo(lang="python", no_docker=no_docker, tui_console=tui_console)
         except SystemExit:
             pass
         except Exception as exc:
             self.call_from_thread(log.write, f"[red]Demo error: {exc}[/red]")
         finally:
-            _main.console = _prev
             self._busy = False
             self.call_from_thread(self._set_status, "Demo complete.")

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9/coreinsight_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreinsight-cli
-Version: 0.2.8
+Version: 0.2.9
 Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
 Author: Varun Jani
 License: GPL-3.0-or-later

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,11 +1,10 @@
 LICENSE
 README.md
 pyproject.toml
-coreinsight/Dockerfile.cpp-sandbox
-coreinsight/Dockerfile.python-sandbox
 coreinsight/__init__.py
 coreinsight/analyzer.py
 coreinsight/config.py
+coreinsight/embeddings.py
 coreinsight/hardware.py
 coreinsight/indexer.py
 coreinsight/main.py

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "coreinsight-cli"
-version = "0.2.8"
+version = "0.2.9"
 description = "Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA"
 license = {text = "GPL-3.0-or-later"}
 authors = [

coreinsight_cli-0.2.8/coreinsight/Dockerfile.cpp-sandbox DELETED Viewed

	@@ -1,2 +0,0 @@
1	- FROM gcc:latest
2	- WORKDIR /workspace

coreinsight_cli-0.2.8/coreinsight/Dockerfile.python-sandbox DELETED Viewed

@@ -1,3 +0,0 @@
-FROM python:3.11-slim
-RUN pip install --no-cache-dir numpy pandas scipy matplotlib
-WORKDIR /workspace

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/LICENSE RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/README.md RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/__init__.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/config.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/__init__.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/bad_loop.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/data_processor.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/slow.cpp RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/hardware.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/parser.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/prompts.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/scanner.py RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/entry_points.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/requires.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/top_level.txt RENAMED Viewed

File without changes

{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/setup.cfg RENAMED Viewed

File without changes

coreinsight-cli 0.2.8__tar.gz → 0.2.9__tar.gz

coreinsight-cli 0.2.8tar.gz → 0.2.9tar.gz