coreinsight-cli 0.2.8__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coreinsight_cli-0.2.8/coreinsight_cli.egg-info → coreinsight_cli-0.2.9}/PKG-INFO +1 -1
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/analyzer.py +3 -1
- coreinsight_cli-0.2.9/coreinsight/embeddings.py +103 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/indexer.py +2 -53
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/main.py +9 -4
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/memory.py +60 -59
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/profiler.py +265 -13
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/sandbox.py +18 -12
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/tui.py +3 -7
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9/coreinsight_cli.egg-info}/PKG-INFO +1 -1
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/SOURCES.txt +1 -2
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/pyproject.toml +1 -1
- coreinsight_cli-0.2.8/coreinsight/Dockerfile.cpp-sandbox +0 -2
- coreinsight_cli-0.2.8/coreinsight/Dockerfile.python-sandbox +0 -3
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/LICENSE +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/README.md +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/__init__.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/config.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/__init__.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/bad_loop.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/data_processor.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/demo/slow.cpp +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/hardware.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/parser.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/prompts.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight/scanner.py +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/entry_points.txt +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/requires.txt +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/top_level.txt +0 -0
- {coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/setup.cfg +0 -0
|
@@ -805,7 +805,9 @@ class HarnessAgent:
|
|
|
805
805
|
is_valid = self._check_speedup(success, logs)
|
|
806
806
|
retries += 1
|
|
807
807
|
|
|
808
|
-
if
|
|
808
|
+
if getattr(sandbox, 'disabled', False):
|
|
809
|
+
pass # skipped intentionally — don't annotate as failed
|
|
810
|
+
elif is_valid and retries > 0:
|
|
809
811
|
logs = f"(Succeeded after {retries} retries)\n" + logs
|
|
810
812
|
elif not is_valid:
|
|
811
813
|
logs = f"(Failed after {retries} retries)\n" + logs
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
coreinsight/embeddings.py — Shared embedding utility
|
|
3
|
+
|
|
4
|
+
Single source of truth for embedding model loading used by both
|
|
5
|
+
memory.py (OptimizationMemory) and indexer.py (RepoIndexer).
|
|
6
|
+
|
|
7
|
+
Tries to load all-MiniLM-L6-v2 from local cache first.
|
|
8
|
+
Falls back to a deterministic hash-based embedder when offline
|
|
9
|
+
or when the model has not yet been downloaded.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import hashlib
|
|
14
|
+
import logging
|
|
15
|
+
import math
|
|
16
|
+
import os
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Tuple
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# All models cached here — never hits the network if already present
|
|
23
|
+
MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
|
|
24
|
+
MODEL_NAME = "all-MiniLM-L6-v2"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class _HashEmbeddingFunction:
|
|
28
|
+
"""
|
|
29
|
+
Deterministic offline fallback embedder.
|
|
30
|
+
|
|
31
|
+
Produces a 384-dim float vector from token overlap — no downloads,
|
|
32
|
+
no GPU, no network. Semantic quality is lower than MiniLM but RAG
|
|
33
|
+
and memory lookup still work via keyword/structural matching.
|
|
34
|
+
|
|
35
|
+
Run `coreinsight index` once while online to cache the real model.
|
|
36
|
+
"""
|
|
37
|
+
DIM = 384
|
|
38
|
+
|
|
39
|
+
def __call__(self, input: List[str]) -> List[List[float]]:
|
|
40
|
+
results = []
|
|
41
|
+
for text in input:
|
|
42
|
+
tokens = text.lower().split()
|
|
43
|
+
vec = [0.0] * self.DIM
|
|
44
|
+
for tok in tokens:
|
|
45
|
+
h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
|
|
46
|
+
vec[h % self.DIM] += 1.0
|
|
47
|
+
# L2 normalise so cosine distance works correctly
|
|
48
|
+
mag = math.sqrt(sum(x * x for x in vec)) or 1.0
|
|
49
|
+
results.append([x / mag for x in vec])
|
|
50
|
+
return results
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def load_embedding_fn() -> Tuple[object, str]:
|
|
54
|
+
"""
|
|
55
|
+
Load the sentence-transformer embedding function.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
(embedding_fn, label) where label is a human-readable string
|
|
59
|
+
indicating which embedder is active — shown in CLI output.
|
|
60
|
+
|
|
61
|
+
Strategy:
|
|
62
|
+
1. Pin HuggingFace cache to ~/.coreinsight/models so the model
|
|
63
|
+
is never re-downloaded on subsequent runs.
|
|
64
|
+
2. Probe the model with a dummy call to force-load weights now
|
|
65
|
+
rather than silently failing later during indexing or lookup.
|
|
66
|
+
3. On any failure (network error, disk full, offline) fall back
|
|
67
|
+
to _HashEmbeddingFunction with a visible warning.
|
|
68
|
+
"""
|
|
69
|
+
MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
|
|
71
|
+
# Pin cache dirs — must be set before chromadb.utils imports torch
|
|
72
|
+
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(MODEL_CACHE_DIR))
|
|
73
|
+
os.environ.setdefault("HF_HUB_CACHE", str(MODEL_CACHE_DIR))
|
|
74
|
+
# Allow download when online; callers that want strict offline can
|
|
75
|
+
# set HF_HUB_OFFLINE=1 in their environment before importing.
|
|
76
|
+
os.environ.setdefault("HF_HUB_OFFLINE", "0")
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
from chromadb.utils import embedding_functions as _ef
|
|
80
|
+
|
|
81
|
+
fn = _ef.SentenceTransformerEmbeddingFunction(model_name=MODEL_NAME)
|
|
82
|
+
|
|
83
|
+
# Force-load now so we catch errors here, not mid-analysis.
|
|
84
|
+
fn(["probe"])
|
|
85
|
+
|
|
86
|
+
label = f"{MODEL_NAME} (cached)"
|
|
87
|
+
logger.debug(f"Embedding model loaded: {label}")
|
|
88
|
+
return fn, label
|
|
89
|
+
|
|
90
|
+
except Exception as exc:
|
|
91
|
+
logger.warning(
|
|
92
|
+
f"SentenceTransformer unavailable ({exc}). "
|
|
93
|
+
f"Using offline hash embedder — semantic quality reduced. "
|
|
94
|
+
f"Run `coreinsight index` once while online to cache the model."
|
|
95
|
+
)
|
|
96
|
+
from rich.console import Console as _Console
|
|
97
|
+
_Console().print(
|
|
98
|
+
"[yellow]⚠ Embedding model unavailable (offline or not yet downloaded). "
|
|
99
|
+
"Using keyword-based fallback — RAG and memory recall will work but with "
|
|
100
|
+
"reduced semantic accuracy. "
|
|
101
|
+
"Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
|
|
102
|
+
)
|
|
103
|
+
return _HashEmbeddingFunction(), "hash-based (offline fallback)"
|
|
@@ -9,62 +9,11 @@ import chromadb
|
|
|
9
9
|
from chromadb.utils import embedding_functions
|
|
10
10
|
|
|
11
11
|
from coreinsight.parser import CodeParser
|
|
12
|
+
from coreinsight.embeddings import load_embedding_fn
|
|
12
13
|
|
|
13
14
|
console = Console()
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
15
16
|
|
|
16
|
-
# Local model cache — never hits the network if model is already here
|
|
17
|
-
_MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class _HashEmbeddingFunction:
|
|
21
|
-
"""
|
|
22
|
-
Deterministic offline fallback embedder.
|
|
23
|
-
Produces a 384-dim float vector from token overlap — no downloads, no GPU.
|
|
24
|
-
Semantic quality is lower than MiniLM but RAG still works via keyword matching.
|
|
25
|
-
"""
|
|
26
|
-
DIM = 384
|
|
27
|
-
|
|
28
|
-
def __call__(self, input: list[str]) -> list[list[float]]:
|
|
29
|
-
results = []
|
|
30
|
-
for text in input:
|
|
31
|
-
tokens = text.lower().split()
|
|
32
|
-
vec = [0.0] * self.DIM
|
|
33
|
-
for tok in tokens:
|
|
34
|
-
h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
|
|
35
|
-
vec[h % self.DIM] += 1.0
|
|
36
|
-
# L2 normalise
|
|
37
|
-
mag = math.sqrt(sum(x * x for x in vec)) or 1.0
|
|
38
|
-
results.append([x / mag for x in vec])
|
|
39
|
-
return results
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _load_embedding_fn():
|
|
43
|
-
"""
|
|
44
|
-
Try to load SentenceTransformer from local cache.
|
|
45
|
-
Falls back to _HashEmbeddingFunction if offline or model not cached.
|
|
46
|
-
"""
|
|
47
|
-
_MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
48
|
-
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(_MODEL_CACHE_DIR))
|
|
49
|
-
os.environ.setdefault("HF_HUB_OFFLINE", "0") # allow download when online
|
|
50
|
-
|
|
51
|
-
try:
|
|
52
|
-
fn = embedding_functions.SentenceTransformerEmbeddingFunction(
|
|
53
|
-
model_name="all-MiniLM-L6-v2",
|
|
54
|
-
)
|
|
55
|
-
# Probe: actually load the model now so we catch network errors here
|
|
56
|
-
# rather than silently later during indexing.
|
|
57
|
-
fn(["probe"])
|
|
58
|
-
return fn, "all-MiniLM-L6-v2 (cached)"
|
|
59
|
-
except Exception as e:
|
|
60
|
-
logger.warning(f"SentenceTransformer unavailable ({e}). Using offline hash embedder — semantic quality reduced.")
|
|
61
|
-
console.print(
|
|
62
|
-
"[yellow]⚠ Embedding model unavailable (offline or not yet downloaded). "
|
|
63
|
-
"Using keyword-based fallback — RAG will work but with reduced semantic accuracy. "
|
|
64
|
-
"Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
|
|
65
|
-
)
|
|
66
|
-
return _HashEmbeddingFunction(), "hash-based (offline fallback)"
|
|
67
|
-
|
|
68
17
|
|
|
69
18
|
class RepoIndexer:
|
|
70
19
|
def __init__(self, repo_path: str):
|
|
@@ -82,7 +31,7 @@ class RepoIndexer:
|
|
|
82
31
|
return True
|
|
83
32
|
try:
|
|
84
33
|
self._chroma_client = chromadb.PersistentClient(path=str(self.db_path))
|
|
85
|
-
self._embedding_fn, self._embedding_label =
|
|
34
|
+
self._embedding_fn, self._embedding_label = load_embedding_fn()
|
|
86
35
|
self._collection = self._chroma_client.get_or_create_collection(
|
|
87
36
|
name="codebase_context",
|
|
88
37
|
embedding_function=self._embedding_fn,
|
|
@@ -741,7 +741,7 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None):
|
|
|
741
741
|
finally:
|
|
742
742
|
console = _prev_console
|
|
743
743
|
|
|
744
|
-
def run_demo(lang: str = "python", no_docker: bool = False):
|
|
744
|
+
def run_demo(lang: str = "python", no_docker: bool = False, tui_console=None):
|
|
745
745
|
import shutil
|
|
746
746
|
import importlib.resources
|
|
747
747
|
|
|
@@ -804,11 +804,16 @@ def run_demo(lang: str = "python", no_docker: bool = False):
|
|
|
804
804
|
# For Python: auto-index so RAG cross-file context is showcased
|
|
805
805
|
if lang == "python":
|
|
806
806
|
console.print("[dim]Auto-indexing demo files to showcase RAG cross-file context...[/dim]")
|
|
807
|
-
|
|
808
|
-
|
|
807
|
+
try:
|
|
808
|
+
from coreinsight.indexer import RepoIndexer as _RepoIndexer
|
|
809
|
+
_RepoIndexer(str(demo_dir)).index_repository()
|
|
810
|
+
except Exception as _idx_err:
|
|
811
|
+
# Non-fatal — SQLite write conflicts can occur when running
|
|
812
|
+
# through the TUI. RAG context will be empty for this run.
|
|
813
|
+
console.print(f"[dim yellow]Indexing skipped (will retry next run): {_idx_err}[/dim yellow]")
|
|
809
814
|
console.print()
|
|
810
815
|
|
|
811
|
-
run_analysis(str(demo_dir / entry_file), no_docker=no_docker)
|
|
816
|
+
run_analysis(str(demo_dir / entry_file), no_docker=no_docker, tui_console=tui_console)
|
|
812
817
|
|
|
813
818
|
def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv"):
|
|
814
819
|
from coreinsight.memory import OptimizationMemory, MEMORY_DIR
|
|
@@ -21,12 +21,13 @@ from datetime import datetime, timezone
|
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
from typing import Any, Dict, List, Optional
|
|
23
23
|
|
|
24
|
+
from coreinsight.embeddings import load_embedding_fn
|
|
25
|
+
|
|
24
26
|
logger = logging.getLogger(__name__)
|
|
25
27
|
|
|
26
28
|
MEMORY_DIR = Path.home() / ".coreinsight" / "memory_db"
|
|
27
29
|
CODE_DIR = MEMORY_DIR / "code"
|
|
28
30
|
COLLECTION = "optimization_memory"
|
|
29
|
-
EMBED_MODEL = "all-MiniLM-L6-v2" # same model as RepoIndexer — no extra download
|
|
30
31
|
|
|
31
32
|
# ChromaDB uses cosine *distance* (lower = more similar).
|
|
32
33
|
# 0.15 distance ≈ 0.85 cosine similarity for this embedding model.
|
|
@@ -54,17 +55,19 @@ class OptimizationMemory:
|
|
|
54
55
|
Local vector database of verified optimizations.
|
|
55
56
|
|
|
56
57
|
Reads are thread-safe (ChromaDB handles concurrent queries).
|
|
57
|
-
Writes are
|
|
58
|
-
|
|
58
|
+
Writes are serialized via _write_lock since store() can be called
|
|
59
|
+
from concurrent threads in process_function's as_completed loop.
|
|
59
60
|
"""
|
|
60
61
|
|
|
61
62
|
def __init__(self, memory_dir: Path = MEMORY_DIR) -> None:
|
|
62
|
-
|
|
63
|
-
self.
|
|
64
|
-
self.
|
|
65
|
-
self.
|
|
66
|
-
self.
|
|
67
|
-
self.
|
|
63
|
+
import threading
|
|
64
|
+
self._memory_dir = memory_dir
|
|
65
|
+
self._code_dir = memory_dir / "code"
|
|
66
|
+
self._client = None
|
|
67
|
+
self._collection = None
|
|
68
|
+
self._embed_fn = None
|
|
69
|
+
self._init_error = ""
|
|
70
|
+
self._write_lock = threading.Lock()
|
|
68
71
|
|
|
69
72
|
# ------------------------------------------------------------------ #
|
|
70
73
|
# Lazy init — avoids slow import at startup
|
|
@@ -78,13 +81,11 @@ class OptimizationMemory:
|
|
|
78
81
|
try:
|
|
79
82
|
try:
|
|
80
83
|
import chromadb
|
|
81
|
-
from chromadb.utils import embedding_functions
|
|
82
84
|
except Exception as sqlite_exc:
|
|
83
85
|
self._init_error = (
|
|
84
86
|
f"ChromaDB unavailable (likely outdated SQLite): {sqlite_exc}. "
|
|
85
87
|
"Optimization memory disabled. "
|
|
86
|
-
"Fix: pip install
|
|
87
|
-
" import pysqlite3, sys; sys.modules['sqlite3'] = pysqlite3"
|
|
88
|
+
"Fix: pip install coreinsight-cli[compat]"
|
|
88
89
|
)
|
|
89
90
|
return False
|
|
90
91
|
|
|
@@ -92,9 +93,8 @@ class OptimizationMemory:
|
|
|
92
93
|
self._code_dir.mkdir(parents=True, exist_ok=True)
|
|
93
94
|
|
|
94
95
|
self._client = chromadb.PersistentClient(path=str(self._memory_dir))
|
|
95
|
-
self._embed_fn =
|
|
96
|
-
|
|
97
|
-
)
|
|
96
|
+
self._embed_fn, _embed_label = load_embedding_fn()
|
|
97
|
+
logger.debug(f"Memory embedder: {_embed_label}")
|
|
98
98
|
self._collection = self._client.get_or_create_collection(
|
|
99
99
|
name=COLLECTION,
|
|
100
100
|
embedding_function=self._embed_fn,
|
|
@@ -273,52 +273,53 @@ class OptimizationMemory:
|
|
|
273
273
|
"""
|
|
274
274
|
if not self._ensure_db():
|
|
275
275
|
return False
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
276
|
+
with self._write_lock:
|
|
277
|
+
try:
|
|
278
|
+
h = self.ast_hash(original_code)
|
|
279
|
+
opt_code = result.get("optimized_code", "") or ""
|
|
280
|
+
avg_speedup = 0.0
|
|
281
|
+
if verification.speedup.computed_speedups:
|
|
282
|
+
avg_speedup = (
|
|
283
|
+
sum(verification.speedup.computed_speedups)
|
|
284
|
+
/ len(verification.speedup.computed_speedups)
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
profiler_summary = ""
|
|
288
|
+
if profiler_result and profiler_result.available and profiler_result.metrics:
|
|
289
|
+
parts = [
|
|
290
|
+
f"{m.name}: {m.delta}"
|
|
291
|
+
for m in profiler_result.metrics[:2]
|
|
292
|
+
]
|
|
293
|
+
profiler_summary = " | ".join(parts)
|
|
294
|
+
|
|
295
|
+
self._save_code(h, language, opt_code)
|
|
296
|
+
|
|
297
|
+
meta = {
|
|
298
|
+
"func_name": func_name,
|
|
299
|
+
"language": language,
|
|
300
|
+
"avg_speedup": round(avg_speedup, 4),
|
|
301
|
+
"issue": (result.get("issue") or "")[:500],
|
|
302
|
+
"reasoning": (result.get("reasoning") or "")[:1000],
|
|
303
|
+
"severity": result.get("severity", "High"),
|
|
304
|
+
"correctness_cases": verification.correctness.passed_cases,
|
|
305
|
+
"profiler_summary": profiler_summary[:200],
|
|
306
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
self._collection.upsert(
|
|
310
|
+
ids=[h],
|
|
311
|
+
documents=[original_code],
|
|
312
|
+
metadatas=[meta],
|
|
284
313
|
)
|
|
314
|
+
logger.info(
|
|
315
|
+
f"Memory: stored '{func_name}' "
|
|
316
|
+
f"(hash={h[:8]}…, speedup={avg_speedup:.2f}x)"
|
|
317
|
+
)
|
|
318
|
+
return True
|
|
285
319
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
f"{m.name}: {m.delta}"
|
|
290
|
-
for m in profiler_result.metrics[:2]
|
|
291
|
-
]
|
|
292
|
-
profiler_summary = " | ".join(parts)
|
|
293
|
-
|
|
294
|
-
self._save_code(h, language, opt_code)
|
|
295
|
-
|
|
296
|
-
meta = {
|
|
297
|
-
"func_name": func_name,
|
|
298
|
-
"language": language,
|
|
299
|
-
"avg_speedup": round(avg_speedup, 4),
|
|
300
|
-
"issue": (result.get("issue") or "")[:500],
|
|
301
|
-
"reasoning": (result.get("reasoning") or "")[:1000],
|
|
302
|
-
"severity": result.get("severity", "High"),
|
|
303
|
-
"correctness_cases": verification.correctness.passed_cases,
|
|
304
|
-
"profiler_summary": profiler_summary[:200],
|
|
305
|
-
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
self._collection.upsert(
|
|
309
|
-
ids=[h],
|
|
310
|
-
documents=[original_code],
|
|
311
|
-
metadatas=[meta],
|
|
312
|
-
)
|
|
313
|
-
logger.info(
|
|
314
|
-
f"Memory: stored '{func_name}' "
|
|
315
|
-
f"(hash={h[:8]}…, speedup={avg_speedup:.2f}x)"
|
|
316
|
-
)
|
|
317
|
-
return True
|
|
318
|
-
|
|
319
|
-
except Exception as exc:
|
|
320
|
-
logger.debug(f"Memory store failed: {exc}")
|
|
321
|
-
return False
|
|
320
|
+
except Exception as exc:
|
|
321
|
+
logger.debug(f"Memory store failed: {exc}")
|
|
322
|
+
return False
|
|
322
323
|
|
|
323
324
|
def stats(self) -> Dict[str, Any]:
|
|
324
325
|
if not self._ensure_db():
|
|
@@ -156,6 +156,78 @@ def _fmt_int(n: int) -> str:
|
|
|
156
156
|
return f"{n:,}"
|
|
157
157
|
|
|
158
158
|
|
|
159
|
+
def _parse_nsys_stats(output: str) -> Dict[str, Any]:
|
|
160
|
+
"""
|
|
161
|
+
Parse `nsys profile --stats=true` stdout into structured metrics.
|
|
162
|
+
Extracts kernel timing and memory throughput from the summary tables.
|
|
163
|
+
"""
|
|
164
|
+
result: Dict[str, Any] = {}
|
|
165
|
+
|
|
166
|
+
# ── Kernel statistics ────────────────────────────────────────────────
|
|
167
|
+
# Header: Time(%) Total Time (ns) Instances Avg (ns) ... Name
|
|
168
|
+
kernel_section = False
|
|
169
|
+
kernels = []
|
|
170
|
+
for line in output.splitlines():
|
|
171
|
+
if "CUDA Kernel Statistics" in line or "GPU Kernel Summary" in line:
|
|
172
|
+
kernel_section = True
|
|
173
|
+
continue
|
|
174
|
+
if kernel_section:
|
|
175
|
+
if line.strip() == "" or line.startswith("="):
|
|
176
|
+
if kernels:
|
|
177
|
+
kernel_section = False
|
|
178
|
+
continue
|
|
179
|
+
# Skip header/separator lines
|
|
180
|
+
if "Time(%)" in line or "----" in line:
|
|
181
|
+
continue
|
|
182
|
+
parts = line.split()
|
|
183
|
+
if len(parts) >= 7:
|
|
184
|
+
try:
|
|
185
|
+
kernels.append({
|
|
186
|
+
"pct": float(parts[0]),
|
|
187
|
+
"total_ns": float(parts[1].replace(",", "")),
|
|
188
|
+
"instances": int(parts[2].replace(",", "")),
|
|
189
|
+
"avg_ns": float(parts[3].replace(",", "")),
|
|
190
|
+
"name": " ".join(parts[7:]) if len(parts) > 7 else parts[-1],
|
|
191
|
+
})
|
|
192
|
+
except (ValueError, IndexError):
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
if kernels:
|
|
196
|
+
# Top kernel by total time
|
|
197
|
+
top = max(kernels, key=lambda k: k["total_ns"])
|
|
198
|
+
result["top_kernel_name"] = top["name"]
|
|
199
|
+
result["top_kernel_avg_ns"] = top["avg_ns"]
|
|
200
|
+
result["top_kernel_total_ns"] = top["total_ns"]
|
|
201
|
+
result["top_kernel_instances"]= top["instances"]
|
|
202
|
+
result["total_kernel_ns"] = sum(k["total_ns"] for k in kernels)
|
|
203
|
+
|
|
204
|
+
# ── Memory throughput ────────────────────────────────────────────────
|
|
205
|
+
# Look for "Memory Throughput" or HtoD/DtoH transfer lines
|
|
206
|
+
mem_section = False
|
|
207
|
+
total_mem_ns = 0.0
|
|
208
|
+
for line in output.splitlines():
|
|
209
|
+
if "Memory Operation" in line or "Memory Throughput" in line:
|
|
210
|
+
mem_section = True
|
|
211
|
+
continue
|
|
212
|
+
if mem_section:
|
|
213
|
+
if line.strip() == "" or line.startswith("="):
|
|
214
|
+
mem_section = False
|
|
215
|
+
continue
|
|
216
|
+
if "Time(%)" in line or "----" in line:
|
|
217
|
+
continue
|
|
218
|
+
parts = line.split()
|
|
219
|
+
if len(parts) >= 3:
|
|
220
|
+
try:
|
|
221
|
+
total_mem_ns += float(parts[1].replace(",", ""))
|
|
222
|
+
except (ValueError, IndexError):
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
if total_mem_ns:
|
|
226
|
+
result["total_mem_transfer_ns"] = total_mem_ns
|
|
227
|
+
|
|
228
|
+
return result
|
|
229
|
+
|
|
230
|
+
|
|
159
231
|
def _parse_perf_stat(stderr: str) -> Dict[str, float]:
|
|
160
232
|
"""Extract hardware counter values from `perf stat` stderr output."""
|
|
161
233
|
targets = {
|
|
@@ -284,7 +356,14 @@ class HardwareProfiler:
|
|
|
284
356
|
source_dir=source_dir,
|
|
285
357
|
)
|
|
286
358
|
if language in ("cuda", "cu", "cuh"):
|
|
287
|
-
return self._profile_cuda(
|
|
359
|
+
return self._profile_cuda(
|
|
360
|
+
detected,
|
|
361
|
+
original_code=original_code,
|
|
362
|
+
optimized_code=optimized_code,
|
|
363
|
+
func_name=func_name,
|
|
364
|
+
original_file_content=original_file_content,
|
|
365
|
+
source_dir=source_dir,
|
|
366
|
+
)
|
|
288
367
|
except Exception as exc:
|
|
289
368
|
logger.debug(f"HardwareProfiler.profile exception: {exc}", exc_info=True)
|
|
290
369
|
return ProfilerResult(
|
|
@@ -710,17 +789,190 @@ class HardwareProfiler:
|
|
|
710
789
|
return metrics or None
|
|
711
790
|
|
|
712
791
|
# ------------------------------------------------------------------ #
|
|
713
|
-
# CUDA path
|
|
792
|
+
# CUDA path — nsys CLI profiling
|
|
714
793
|
# ------------------------------------------------------------------ #
|
|
715
794
|
|
|
716
|
-
def _profile_cuda(
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
795
|
+
def _profile_cuda(
|
|
796
|
+
self,
|
|
797
|
+
detected: Dict[str, bool],
|
|
798
|
+
original_code: str = "",
|
|
799
|
+
optimized_code: str = "",
|
|
800
|
+
func_name: str = "",
|
|
801
|
+
original_file_content: str = "",
|
|
802
|
+
source_dir: str = "",
|
|
803
|
+
) -> ProfilerResult:
|
|
804
|
+
result = ProfilerResult(available=False, tool="nsys", language="cuda")
|
|
805
|
+
|
|
806
|
+
if not detected.get("nsys"):
|
|
807
|
+
if detected.get("nvprof"):
|
|
808
|
+
result.error = (
|
|
809
|
+
"nvprof detected but not yet supported — install nsys "
|
|
810
|
+
"from CUDA Toolkit 11.0+ for hardware profiling."
|
|
811
|
+
)
|
|
812
|
+
else:
|
|
813
|
+
result.error = (
|
|
814
|
+
"No CUDA profiling tools found on PATH. "
|
|
815
|
+
"Install nsys: https://developer.nvidia.com/nsight-systems"
|
|
816
|
+
)
|
|
817
|
+
return result
|
|
818
|
+
|
|
819
|
+
if not shutil.which("nvcc"):
|
|
820
|
+
result.error = "nvcc not found — required to compile CUDA sources for profiling."
|
|
821
|
+
return result
|
|
822
|
+
|
|
823
|
+
if not original_file_content:
|
|
824
|
+
result.error = "No CUDA source content available for profiling."
|
|
825
|
+
return result
|
|
826
|
+
|
|
827
|
+
stats_per_label: Dict[str, Dict[str, Any]] = {}
|
|
828
|
+
|
|
829
|
+
# Build optimized source by appending the optimized kernel —
|
|
830
|
+
# last __global__ definition with the same name wins at link time
|
|
831
|
+
# only if we can safely substitute; otherwise skip optimized run.
|
|
832
|
+
sources = [("original", original_file_content)]
|
|
833
|
+
if optimized_code and func_name:
|
|
834
|
+
opt_src = (
|
|
835
|
+
original_file_content.strip()
|
|
836
|
+
+ "\n\n// --- CoreInsight optimized replacement ---\n"
|
|
837
|
+
+ optimized_code.strip()
|
|
838
|
+
)
|
|
839
|
+
sources.append(("optimized", opt_src))
|
|
840
|
+
|
|
841
|
+
tmp = tempfile.mkdtemp()
|
|
842
|
+
try:
|
|
843
|
+
for label, src in sources:
|
|
844
|
+
src_path = os.path.join(tmp, f"{label}.cu")
|
|
845
|
+
bin_path = os.path.join(tmp, label)
|
|
846
|
+
|
|
847
|
+
with open(src_path, "w") as fh:
|
|
848
|
+
fh.write(src)
|
|
849
|
+
|
|
850
|
+
# Compile
|
|
851
|
+
compile_proc = subprocess.run(
|
|
852
|
+
["nvcc", "-O3", "-arch=native", src_path, "-o", bin_path],
|
|
853
|
+
capture_output=True, text=True, timeout=120,
|
|
854
|
+
)
|
|
855
|
+
if compile_proc.returncode != 0:
|
|
856
|
+
# Try without -arch=native (older nvcc versions)
|
|
857
|
+
compile_proc = subprocess.run(
|
|
858
|
+
["nvcc", "-O3", src_path, "-o", bin_path],
|
|
859
|
+
capture_output=True, text=True, timeout=120,
|
|
860
|
+
)
|
|
861
|
+
if compile_proc.returncode != 0:
|
|
862
|
+
logger.debug(
|
|
863
|
+
f"CUDA compile failed for {label}:\n"
|
|
864
|
+
f"{compile_proc.stderr[:400]}"
|
|
865
|
+
)
|
|
866
|
+
result.error = (
|
|
867
|
+
f"nvcc compilation failed for {label} version.\n"
|
|
868
|
+
f"{compile_proc.stderr[:300]}"
|
|
869
|
+
)
|
|
870
|
+
return result
|
|
871
|
+
|
|
872
|
+
# Profile with nsys
|
|
873
|
+
nsys_out_base = os.path.join(tmp, f"nsys_{label}")
|
|
874
|
+
try:
|
|
875
|
+
nsys_proc = subprocess.run(
|
|
876
|
+
[
|
|
877
|
+
"nsys", "profile",
|
|
878
|
+
"--stats=true",
|
|
879
|
+
"--force-overwrite=true",
|
|
880
|
+
"-o", nsys_out_base,
|
|
881
|
+
bin_path,
|
|
882
|
+
],
|
|
883
|
+
capture_output=True, text=True, timeout=300,
|
|
884
|
+
)
|
|
885
|
+
# nsys writes stats to stdout; combined output in stderr too
|
|
886
|
+
combined = nsys_proc.stdout + nsys_proc.stderr
|
|
887
|
+
parsed = _parse_nsys_stats(combined)
|
|
888
|
+
|
|
889
|
+
if not parsed:
|
|
890
|
+
logger.debug(
|
|
891
|
+
f"nsys: no stats parsed for {label}.\n"
|
|
892
|
+
f"nsys stdout: {nsys_proc.stdout[:300]}\n"
|
|
893
|
+
f"nsys stderr: {nsys_proc.stderr[:300]}"
|
|
894
|
+
)
|
|
895
|
+
result.error = (
|
|
896
|
+
f"nsys ran but produced no parseable stats for {label}. "
|
|
897
|
+
f"Ensure the binary launches at least one CUDA kernel."
|
|
898
|
+
)
|
|
899
|
+
return result
|
|
900
|
+
|
|
901
|
+
stats_per_label[label] = parsed
|
|
902
|
+
|
|
903
|
+
except subprocess.TimeoutExpired:
|
|
904
|
+
result.error = "nsys profiling timed out (300s)."
|
|
905
|
+
return result
|
|
906
|
+
except Exception as exc:
|
|
907
|
+
result.error = f"nsys execution error: {exc}"
|
|
908
|
+
return result
|
|
909
|
+
|
|
910
|
+
except Exception as exc:
|
|
911
|
+
logger.debug(f"CUDA profiling error: {exc}")
|
|
912
|
+
result.error = f"CUDA profiling failed: {exc}"
|
|
913
|
+
return result
|
|
914
|
+
finally:
|
|
915
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
916
|
+
|
|
917
|
+
if "original" not in stats_per_label:
|
|
918
|
+
result.error = "No profiling data collected."
|
|
919
|
+
return result
|
|
920
|
+
|
|
921
|
+
orig_s = stats_per_label["original"]
|
|
922
|
+
opt_s = stats_per_label.get("optimized", orig_s)
|
|
923
|
+
|
|
924
|
+
metrics: List[ProfilerMetric] = []
|
|
925
|
+
|
|
926
|
+
# ── Kernel timing ─────────────────────────────────────────────
|
|
927
|
+
orig_ns = orig_s.get("top_kernel_avg_ns", 0.0)
|
|
928
|
+
opt_ns = opt_s.get("top_kernel_avg_ns", orig_ns)
|
|
929
|
+
if orig_ns:
|
|
930
|
+
metrics.append(ProfilerMetric(
|
|
931
|
+
name=f"Kernel avg time [{orig_s.get('top_kernel_name', 'top kernel')}]",
|
|
932
|
+
original=f"{orig_ns / 1000:.2f} µs",
|
|
933
|
+
optimized=f"{opt_ns / 1000:.2f} µs",
|
|
934
|
+
delta=_pct_delta(orig_ns, opt_ns),
|
|
935
|
+
note="lower is better",
|
|
936
|
+
))
|
|
937
|
+
|
|
938
|
+
orig_total = orig_s.get("total_kernel_ns", 0.0)
|
|
939
|
+
opt_total = opt_s.get("total_kernel_ns", orig_total)
|
|
940
|
+
if orig_total:
|
|
941
|
+
metrics.append(ProfilerMetric(
|
|
942
|
+
name="Total kernel time",
|
|
943
|
+
original=f"{orig_total / 1e6:.3f} ms",
|
|
944
|
+
optimized=f"{opt_total / 1e6:.3f} ms",
|
|
945
|
+
delta=_pct_delta(orig_total, opt_total),
|
|
946
|
+
note="lower is better",
|
|
947
|
+
))
|
|
948
|
+
|
|
949
|
+
orig_inst = orig_s.get("top_kernel_instances", 0)
|
|
950
|
+
if orig_inst:
|
|
951
|
+
metrics.append(ProfilerMetric(
|
|
952
|
+
name="Kernel launches",
|
|
953
|
+
original=str(orig_inst),
|
|
954
|
+
optimized=str(opt_s.get("top_kernel_instances", orig_inst)),
|
|
955
|
+
delta="—",
|
|
956
|
+
note="",
|
|
957
|
+
))
|
|
958
|
+
|
|
959
|
+
# ── Memory transfers ──────────────────────────────────────────
|
|
960
|
+
orig_mem = orig_s.get("total_mem_transfer_ns", 0.0)
|
|
961
|
+
opt_mem = opt_s.get("total_mem_transfer_ns", orig_mem)
|
|
962
|
+
if orig_mem:
|
|
963
|
+
metrics.append(ProfilerMetric(
|
|
964
|
+
name="Total memory transfer time",
|
|
965
|
+
original=f"{orig_mem / 1e6:.3f} ms",
|
|
966
|
+
optimized=f"{opt_mem / 1e6:.3f} ms",
|
|
967
|
+
delta=_pct_delta(orig_mem, opt_mem),
|
|
968
|
+
note="lower is better",
|
|
969
|
+
))
|
|
970
|
+
|
|
971
|
+
if not metrics:
|
|
972
|
+
result.error = "nsys ran but no timing metrics could be extracted."
|
|
973
|
+
return result
|
|
974
|
+
|
|
975
|
+
result.available = True
|
|
976
|
+
result.host_tool_name = "nsys"
|
|
977
|
+
result.host_tool_metrics = metrics
|
|
978
|
+
return result
|
|
@@ -18,9 +18,11 @@ SANDBOX_IMAGES = {
|
|
|
18
18
|
"cpp": "coreinsight-cpp-sandbox:latest",
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
22
|
+
|
|
21
23
|
DOCKERFILES = {
|
|
22
|
-
"python": "Dockerfile.python-sandbox",
|
|
23
|
-
"cpp": "Dockerfile.cpp-sandbox",
|
|
24
|
+
"python": os.path.join(ROOT_DIR, "docker", "Dockerfile.python-sandbox"),
|
|
25
|
+
"cpp": os.path.join(ROOT_DIR, "docker", "Dockerfile.cpp-sandbox"),
|
|
24
26
|
}
|
|
25
27
|
|
|
26
28
|
# ---------------------------------------------------------------------------
|
|
@@ -188,17 +190,21 @@ class CodeSandbox:
|
|
|
188
190
|
label = "Python" if lang == "python" else "C++"
|
|
189
191
|
console.print(f"[yellow]First run: building {label} sandbox image (one-time, ~30s)...[/yellow]")
|
|
190
192
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
tag=SANDBOX_IMAGES[lang],
|
|
197
|
-
rm=True,
|
|
193
|
+
dockerfile_full = DOCKERFILES[lang]
|
|
194
|
+
if not os.path.exists(dockerfile_full):
|
|
195
|
+
raise FileNotFoundError(
|
|
196
|
+
f"Dockerfile not found at {dockerfile_full}. "
|
|
197
|
+
f"Expected docker/ directory at project root."
|
|
198
198
|
)
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
199
|
+
_, logs = self.client.images.build(
|
|
200
|
+
path=os.path.dirname(dockerfile_full),
|
|
201
|
+
dockerfile=os.path.basename(dockerfile_full),
|
|
202
|
+
tag=SANDBOX_IMAGES[lang],
|
|
203
|
+
rm=True,
|
|
204
|
+
)
|
|
205
|
+
for chunk in logs:
|
|
206
|
+
if "stream" in chunk:
|
|
207
|
+
logger.debug(chunk["stream"].strip())
|
|
202
208
|
|
|
203
209
|
console.print(f"[green]✓ {label} sandbox image built successfully.[/green]")
|
|
204
210
|
|
|
@@ -778,19 +778,15 @@ class CoreInsightApp(App):
|
|
|
778
778
|
log.write,
|
|
779
779
|
"\n[bold cyan]Running built-in Python demo...[/bold cyan]\n"
|
|
780
780
|
)
|
|
781
|
-
|
|
782
|
-
# Temporarily patch the demo's console output into the TUI
|
|
783
|
-
import coreinsight.main as _main
|
|
784
|
-
_prev = _main.console
|
|
785
|
-
_main.console = tui_console
|
|
786
781
|
try:
|
|
787
|
-
run_demo
|
|
782
|
+
# Pass tui_console directly — run_demo forwards it to run_analysis
|
|
783
|
+
# which handles the global console swap cleanly via try/finally
|
|
784
|
+
run_demo(lang="python", no_docker=no_docker, tui_console=tui_console)
|
|
788
785
|
except SystemExit:
|
|
789
786
|
pass
|
|
790
787
|
except Exception as exc:
|
|
791
788
|
self.call_from_thread(log.write, f"[red]Demo error: {exc}[/red]")
|
|
792
789
|
finally:
|
|
793
|
-
_main.console = _prev
|
|
794
790
|
self._busy = False
|
|
795
791
|
self.call_from_thread(self._set_status, "Demo complete.")
|
|
796
792
|
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
|
-
coreinsight/Dockerfile.cpp-sandbox
|
|
5
|
-
coreinsight/Dockerfile.python-sandbox
|
|
6
4
|
coreinsight/__init__.py
|
|
7
5
|
coreinsight/analyzer.py
|
|
8
6
|
coreinsight/config.py
|
|
7
|
+
coreinsight/embeddings.py
|
|
9
8
|
coreinsight/hardware.py
|
|
10
9
|
coreinsight/indexer.py
|
|
11
10
|
coreinsight/main.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "coreinsight-cli"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.9"
|
|
8
8
|
description = "Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA"
|
|
9
9
|
license = {text = "GPL-3.0-or-later"}
|
|
10
10
|
authors = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{coreinsight_cli-0.2.8 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|