coreinsight-cli 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {coreinsight_cli-0.3.0/coreinsight_cli.egg-info → coreinsight_cli-0.3.1}/PKG-INFO +1 -1
  2. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/main.py +160 -12
  3. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/memory.py +71 -0
  4. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/sandbox.py +39 -0
  5. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1/coreinsight_cli.egg-info}/PKG-INFO +1 -1
  6. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/pyproject.toml +1 -1
  7. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/LICENSE +0 -0
  8. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/README.md +0 -0
  9. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/__init__.py +0 -0
  10. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/analyzer.py +0 -0
  11. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/config.py +0 -0
  12. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/demo/__init__.py +0 -0
  13. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/demo/bad_loop.py +0 -0
  14. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/demo/data_processor.py +0 -0
  15. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/demo/slow.cpp +0 -0
  16. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/embeddings.py +0 -0
  17. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/hardware.py +0 -0
  18. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/indexer.py +0 -0
  19. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/parser.py +0 -0
  20. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/profiler.py +0 -0
  21. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/prompts.py +0 -0
  22. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/scanner.py +0 -0
  23. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight/tui.py +0 -0
  24. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight_cli.egg-info/SOURCES.txt +0 -0
  25. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
  26. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight_cli.egg-info/entry_points.txt +0 -0
  27. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight_cli.egg-info/requires.txt +0 -0
  28. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/coreinsight_cli.egg-info/top_level.txt +0 -0
  29. {coreinsight_cli-0.3.0 → coreinsight_cli-0.3.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coreinsight-cli
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
5
  Author: Varun Jani
6
6
  License: GPL-3.0-or-later
@@ -72,12 +72,10 @@ def _check_speedup_success(success: bool, logs: str) -> bool:
72
72
  pass
73
73
  return False
74
74
 
75
-
76
75
  def _run_single_agent(
77
76
  func_name, original_code, language, context,
78
77
  hardware_target, sandbox, agent, tier_limits,
79
- stream_callback: Optional[Callable[[str], None]] = None,
80
- ):
78
+ stream_callback: Optional[Callable[[str], None]] = None):
81
79
  """
82
80
  Original single-agent pipeline.
83
81
  Returns (result, optimized_code, success, logs, plot_data, is_valid).
@@ -133,12 +131,10 @@ def _run_single_agent(
133
131
 
134
132
  return result, optimized_code, success, logs, plot_data, is_valid
135
133
 
136
-
137
134
  def _run_multi_agent(
138
135
  func_name, original_code, language, context,
139
136
  hardware_target, sandbox, multi_agents, tier_limits,
140
- stream_callback: Optional[Callable[[str], None]] = None,
141
- ):
137
+ stream_callback: Optional[Callable[[str], None]] = None):
142
138
  """
143
139
  Multi-agent pipeline.
144
140
  BottleneckAgent → analysis
@@ -260,6 +256,8 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
260
256
  else:
261
257
  _log(func_name, "Generating correctness test cases...")
262
258
  test_cases = agent.generate_test_cases(func_name, original_code, language, context, num_cases=tier_limits["num_test_cases"])
259
+ if memory:
260
+ memory.store_test_cases(original_code, test_cases)
263
261
  _log(func_name, "Running correctness verification in Docker sandbox...")
264
262
  verification = sandbox.verify(
265
263
  csv_output=logs,
@@ -689,7 +687,12 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
689
687
  console.print(f"[red]Initialization Error:[/red] {e}")
690
688
  sys.exit(1)
691
689
 
692
- mode_label = "[bold cyan]Multi-Agent[/bold cyan]" if agent_mode == "multi" else "[dim]Single-Agent[/dim]"
690
+ if agent_mode == "multi":
691
+ mode_label = "[bold cyan]Multi-Agent[/bold cyan]"
692
+ elif agent_mode == "auto":
693
+ mode_label = "[cyan]Auto[/cyan]"
694
+ else:
695
+ mode_label = "Single-Agent"
693
696
  console.print(f"[dim]⚙️ Agent mode: {mode_label}[/dim]")
694
697
 
695
698
  mem_count = memory.stats().get("count", 0)
@@ -853,6 +856,136 @@ def run_demo(lang: str = "python", no_docker: bool = False, tui_console=None):
853
856
 
854
857
  run_analysis(str(demo_dir / entry_file), no_docker=no_docker, tui_console=tui_console)
855
858
 
859
+ def _run_test_cmd(func_name: str, no_docker: bool = False):
860
+ """
861
+ Re-run correctness verification for a stored optimized function.
862
+ LLM-free on all subsequent calls. On first call after an old analyze
863
+ run (before test case persistence was added), generates test cases
864
+ once via LLM and stores them so future calls need no LLM.
865
+ """
866
+ from coreinsight.memory import OptimizationMemory
867
+ from coreinsight.sandbox import CodeSandbox
868
+
869
+ mem = OptimizationMemory()
870
+ record = mem.lookup_by_name(func_name)
871
+
872
+ if not record:
873
+ console.print(
874
+ f"[yellow]No memory record found for '[bold]{func_name}[/bold]'.[/yellow]\n"
875
+ f"[dim]Run [cyan]coreinsight analyze[/cyan] on a file containing this function first.[/dim]"
876
+ )
877
+ return
878
+
879
+ language = record["language"]
880
+ original_code = record["original_code"]
881
+ optimized_code = record["optimized_code"]
882
+ test_cases = record["test_cases"]
883
+
884
+ console.print(Panel.fit(
885
+ f"Re-verifying [bold cyan]{func_name}[/bold cyan] ({language})",
886
+ border_style="cyan",
887
+ ))
888
+
889
+ if not optimized_code:
890
+ console.print("[red]Optimized code not found in memory store.[/red]")
891
+ return
892
+
893
+ # ── One-time LLM fallback for functions analyzed before test case persistence ──
894
+ if not test_cases:
895
+ console.print(
896
+ "[yellow]⚠ No test cases stored for this function.[/yellow]\n"
897
+ "[dim]Generating once via LLM — all future calls will be LLM-free...[/dim]"
898
+ )
899
+ try:
900
+ from coreinsight.analyzer import AnalyzerAgent
901
+ from coreinsight.config import get_model_tier, get_tier_limits
902
+ config = load_config()
903
+ provider = config.get("provider", "ollama")
904
+ model_name = config.get("model_name", "llama3.2")
905
+ api_keys = config.get("api_keys", {})
906
+ model_tier = get_model_tier(provider, model_name)
907
+ tier_limits = get_tier_limits(config)
908
+ agent = AnalyzerAgent(
909
+ provider=provider,
910
+ model_name=model_name,
911
+ api_keys=api_keys,
912
+ model_tier=model_tier,
913
+ )
914
+ test_cases = agent.generate_test_cases(
915
+ func_name, original_code, language,
916
+ context="",
917
+ num_cases=tier_limits["num_test_cases"],
918
+ )
919
+ except Exception as exc:
920
+ console.print(f"[red]LLM error generating test cases: {exc}[/red]")
921
+ return
922
+
923
+ if not test_cases:
924
+ console.print(
925
+ "[red]LLM returned no test cases. "
926
+ "Check your provider config with [cyan]coreinsight configure[/cyan].[/red]"
927
+ )
928
+ return
929
+
930
+ mem.store_test_cases(original_code, test_cases)
931
+ console.print(
932
+ f"[dim]✓ Generated and stored {len(test_cases)} test case(s). "
933
+ f"Future calls to [cyan]coreinsight test {func_name}[/cyan] need no LLM.[/dim]"
934
+ )
935
+
936
+ # ── Correctness sandbox — no LLM from this point ──────────────────────
937
+ sandbox = CodeSandbox(disabled=no_docker)
938
+
939
+ if language in ("cpp", "c++", "cuda"):
940
+ # C++/CUDA correctness harness is embedded by HarnessAgent at analysis
941
+ # time and cannot be reconstructed post-hoc. Show stored result instead.
942
+ meta = record["meta"]
943
+ passed_cases = int(meta.get("correctness_cases", 0))
944
+ total_cases = int(meta.get("total_cases", 0))
945
+ if total_cases > 0:
946
+ all_passed = passed_cases == total_cases
947
+ badge = "[bold green]✓ PASS[/bold green]" if all_passed else "[bold yellow]⚠ PARTIAL[/bold yellow]"
948
+ console.print(
949
+ f"{badge} — Stored result: "
950
+ f"{passed_cases}/{total_cases} test cases passed at analysis time."
951
+ )
952
+ else:
953
+ console.print(
954
+ "[dim]No stored correctness result for this function.[/dim]"
955
+ )
956
+ console.print(
957
+ "[dim]C++ re-verification requires re-running analysis. "
958
+ "Full results in [cyan]coreinsight memory[/cyan].[/dim]"
959
+ )
960
+ return
961
+
962
+ console.print(f"[dim]Running {len(test_cases)} test case(s) in Docker sandbox...[/dim]")
963
+
964
+ result = sandbox.verify_correctness_only(
965
+ original_code=original_code,
966
+ optimized_code=optimized_code,
967
+ original_func_name=func_name,
968
+ optimized_func_name=func_name,
969
+ test_cases=test_cases,
970
+ language=language,
971
+ )
972
+
973
+ if result.verified:
974
+ console.print(
975
+ f"[bold green]✓ PASS[/bold green] — "
976
+ f"{result.passed_cases}/{result.total_cases} test cases passed."
977
+ )
978
+ else:
979
+ console.print(
980
+ f"[bold red]✗ FAIL[/bold red] — "
981
+ f"{result.passed_cases}/{result.total_cases} test cases passed."
982
+ )
983
+ for failure in result.failures[:10]:
984
+ console.print(f" [red]✗[/red] {failure}")
985
+
986
+ if result.details:
987
+ console.print(f"[dim]{result.details}[/dim]")
988
+
856
989
  def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv"):
857
990
  from coreinsight.memory import OptimizationMemory, MEMORY_DIR
858
991
  import shutil
@@ -915,6 +1048,7 @@ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv
915
1048
  table.add_column("Function", justify="left", style="bold white")
916
1049
  table.add_column("Language", justify="center", style="cyan", width=10)
917
1050
  table.add_column("Speedup", justify="right", style="bold green", width=9)
1051
+ table.add_column("Tests", justify="right", style="green", width=10)
918
1052
  table.add_column("Severity", justify="center", width=10)
919
1053
  table.add_column("Issue", justify="left", style="dim white")
920
1054
  table.add_column("HW Evidence",justify="left", style="dim", width=22)
@@ -935,19 +1069,23 @@ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv
935
1069
  )
936
1070
 
937
1071
  for i, (meta, rid) in enumerate(paired, start=1):
938
- sev = meta.get("severity", "High")
939
- sev_c = severity_colors.get(sev, "white")
940
- ts = meta.get("timestamp", "")[:19].replace("T", " ")
941
- hw = meta.get("profiler_summary", "") or "—"
942
- issue = (meta.get("issue", "") or "—")[:60]
1072
+ sev = meta.get("severity", "High")
1073
+ sev_c = severity_colors.get(sev, "white")
1074
+ ts = meta.get("timestamp", "")[:19].replace("T", " ")
1075
+ hw = meta.get("profiler_summary", "") or "—"
1076
+ issue = (meta.get("issue", "") or "—")[:60]
943
1077
  if len(meta.get("issue", "")) > 60:
944
1078
  issue += "…"
1079
+ passed_c = int(meta.get("correctness_cases", 0))
1080
+ total_c = int(meta.get("total_cases", 0))
1081
+ tests_str = f"{passed_c}/{total_c}" if total_c > 0 else "—"
945
1082
 
946
1083
  table.add_row(
947
1084
  str(i),
948
1085
  meta.get("func_name", rid[:12]),
949
1086
  meta.get("language", "?"),
950
1087
  f"{float(meta.get('avg_speedup', 0)):.2f}x",
1088
+ tests_str,
951
1089
  f"[{sev_c}]{sev}[/{sev_c}]",
952
1090
  issue,
953
1091
  hw,
@@ -1005,6 +1143,11 @@ def main_cli():
1005
1143
  scan_parser.add_argument("--dir", default=".", help="Directory to scan")
1006
1144
  scan_parser.add_argument("--top", type=int, default=10, help="Number of hotspots to show")
1007
1145
 
1146
+ test_parser = subparsers.add_parser("test", help="Re-run verification sandbox for a stored function")
1147
+ test_parser.add_argument("func_name", help="Name of the function to re-verify")
1148
+ test_parser.add_argument("--no-docker", dest="no_docker", action="store_true",
1149
+ help="Skip Docker (will report skipped)")
1150
+
1008
1151
  args = parser.parse_args()
1009
1152
 
1010
1153
  if args.command == "configure":
@@ -1025,6 +1168,11 @@ def main_cli():
1025
1168
  export_path=getattr(args, "export_path", None),
1026
1169
  export_fmt=getattr(args, "export_fmt", "csv"),
1027
1170
  )
1171
+ elif args.command == "test":
1172
+ _run_test_cmd(
1173
+ func_name=args.func_name,
1174
+ no_docker=getattr(args, "no_docker", False),
1175
+ )
1028
1176
  elif args.command == "scan":
1029
1177
  scanner = ProjectScanner(args.dir)
1030
1178
  scanner.scan_project(max_results=args.top)
@@ -47,6 +47,8 @@ class MemoryHit:
47
47
  language: str
48
48
  severity: str = "High"
49
49
  correctness_cases: int = 0
50
+ total_cases: int = 0
51
+ test_cases: list = field(default_factory=list)
50
52
  profiler_summary: str = ""
51
53
 
52
54
 
@@ -302,6 +304,7 @@ class OptimizationMemory:
302
304
  "reasoning": (result.get("reasoning") or "")[:1000],
303
305
  "severity": result.get("severity", "High"),
304
306
  "correctness_cases": verification.correctness.passed_cases,
307
+ "total_cases": verification.correctness.total_cases,
305
308
  "profiler_summary": profiler_summary[:200],
306
309
  "timestamp": datetime.now(timezone.utc).isoformat(),
307
310
  }
@@ -329,10 +332,77 @@ class OptimizationMemory:
329
332
  except Exception as exc:
330
333
  return {"count": 0, "error": str(exc)}
331
334
 
335
+ def store_test_cases(self, original_code: str, test_cases: list) -> None:
336
+ """
337
+ Persist test cases for a function, keyed by AST hash.
338
+ Called from process_function immediately after test cases are generated,
339
+ so `coreinsight test` can re-run verification without the LLM.
340
+ """
341
+ if not self._ensure_db():
342
+ return
343
+ h = self.ast_hash(original_code)
344
+ with self._write_lock:
345
+ try:
346
+ self._code_dir.mkdir(parents=True, exist_ok=True)
347
+ self._save_test_cases(h, test_cases)
348
+ except Exception as exc:
349
+ logger.debug(f"store_test_cases failed: {exc}")
350
+
351
+ def lookup_by_name(self, func_name: str) -> Optional[dict]:
352
+ """
353
+ Find the most recent memory record whose func_name matches exactly.
354
+ Returns a dict with keys: func_name, language, original_code,
355
+ optimized_code, test_cases, meta. Returns None on no match.
356
+ """
357
+ if not self._ensure_db():
358
+ return None
359
+ try:
360
+ all_records = self._collection.get(
361
+ include=["metadatas", "documents"]
362
+ )
363
+ matches = [
364
+ (meta, doc, rid)
365
+ for meta, doc, rid in zip(
366
+ all_records.get("metadatas", []),
367
+ all_records.get("documents", []),
368
+ all_records.get("ids", []),
369
+ )
370
+ if meta.get("func_name") == func_name
371
+ ]
372
+ if not matches:
373
+ return None
374
+ # Most recent first
375
+ matches.sort(key=lambda x: x[0].get("timestamp", ""), reverse=True)
376
+ meta, original_code, h = matches[0]
377
+ return {
378
+ "func_name": func_name,
379
+ "language": meta.get("language", ""),
380
+ "original_code": original_code or "",
381
+ "optimized_code": self._load_code(h) or "",
382
+ "test_cases": self._load_test_cases(h) or [],
383
+ "meta": meta,
384
+ }
385
+ except Exception as exc:
386
+ logger.debug(f"lookup_by_name failed: {exc}")
387
+ return None
388
+
332
389
  # ------------------------------------------------------------------ #
333
390
  # Internal helpers
334
391
  # ------------------------------------------------------------------ #
335
392
 
393
+ def _save_test_cases(self, h: str, cases: list) -> None:
394
+ path = self._code_dir / f"{h}.test_cases.json"
395
+ path.write_text(json.dumps(cases), encoding="utf-8")
396
+
397
+ def _load_test_cases(self, h: str) -> Optional[list]:
398
+ path = self._code_dir / f"{h}.test_cases.json"
399
+ if not path.exists():
400
+ return None
401
+ try:
402
+ return json.loads(path.read_text(encoding="utf-8"))
403
+ except Exception:
404
+ return None
405
+
336
406
  def _save_code(self, h: str, language: str, code: str) -> None:
337
407
  ext = {"python": "py", "cpp": "cpp", "c++": "cpp", "cuda": "cu"}.get(language, "txt")
338
408
  path = self._code_dir / f"{h}.{ext}"
@@ -364,5 +434,6 @@ class OptimizationMemory:
364
434
  language= meta.get("language", ""),
365
435
  severity= meta.get("severity", "High"),
366
436
  correctness_cases= int(meta.get("correctness_cases", 0)),
437
+ total_cases= int(meta.get("total_cases", 0)),
367
438
  profiler_summary= meta.get("profiler_summary", ""),
368
439
  )
@@ -312,6 +312,45 @@ class CodeSandbox:
312
312
  )
313
313
  return VerificationResult(speedup=speedup_result, correctness=correctness_result)
314
314
 
315
+ def verify_correctness_only(
316
+ self,
317
+ original_code: str,
318
+ optimized_code: str,
319
+ original_func_name: str,
320
+ optimized_func_name: str,
321
+ test_cases: List[Dict[str, Any]],
322
+ language: str = "python",
323
+ timeout_seconds: int = 60,
324
+ context: str = "",
325
+ ) -> CorrectnessVerification:
326
+ """
327
+ Re-run correctness sandbox only — no speedup check, no LLM.
328
+ Used by `coreinsight test <function_name>`.
329
+
330
+ C++ and CUDA are not supported: their correctness harness is a
331
+ main() block embedded by HarnessAgent at analysis time and cannot
332
+ be reconstructed from stored test cases alone.
333
+ """
334
+ if self.disabled:
335
+ return CorrectnessVerification(verified=False, details=SANDBOX_SKIPPED_MSG)
336
+ if not self.client:
337
+ return CorrectnessVerification(verified=False, details="Docker unavailable.")
338
+ if language in ("cpp", "c++", "cuda"):
339
+ return CorrectnessVerification(
340
+ verified=False,
341
+ details=(
342
+ f"Re-verification not supported for {language}: "
343
+ "correctness harness is embedded at analysis time. "
344
+ "See stored pass rate in `coreinsight memory`."
345
+ ),
346
+ )
347
+ return self._verify_correctness(
348
+ original_code, optimized_code,
349
+ original_func_name, optimized_func_name,
350
+ test_cases, language, timeout_seconds,
351
+ context=context,
352
+ )
353
+
315
354
  def _verify_speedup(self, csv_output: str) -> SpeedupVerification:
316
355
  result = SpeedupVerification(verified=False)
317
356
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coreinsight-cli
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
5
  Author: Varun Jani
6
6
  License: GPL-3.0-or-later
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coreinsight-cli"
7
- version = "0.3.0"
7
+ version = "0.3.1"
8
8
  description = "Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA"
9
9
  license = {text = "GPL-3.0-or-later"}
10
10
  authors = [
File without changes