coreinsight-cli 0.3.1__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {coreinsight_cli-0.3.1/coreinsight_cli.egg-info → coreinsight_cli-0.3.2}/PKG-INFO +1 -1
  2. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/main.py +129 -17
  3. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/memory.py +10 -0
  4. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/prompts.py +9 -7
  5. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/sandbox.py +30 -1
  6. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2/coreinsight_cli.egg-info}/PKG-INFO +1 -1
  7. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/pyproject.toml +1 -1
  8. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/LICENSE +0 -0
  9. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/README.md +0 -0
  10. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/__init__.py +0 -0
  11. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/analyzer.py +0 -0
  12. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/config.py +0 -0
  13. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/__init__.py +0 -0
  14. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/bad_loop.py +0 -0
  15. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/data_processor.py +0 -0
  16. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/slow.cpp +0 -0
  17. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/embeddings.py +0 -0
  18. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/hardware.py +0 -0
  19. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/indexer.py +0 -0
  20. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/parser.py +0 -0
  21. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/profiler.py +0 -0
  22. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/scanner.py +0 -0
  23. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/tui.py +0 -0
  24. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/SOURCES.txt +0 -0
  25. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
  26. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/entry_points.txt +0 -0
  27. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/requires.txt +0 -0
  28. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/top_level.txt +0 -0
  29. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coreinsight-cli
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
5
  Author: Varun Jani
6
6
  License: GPL-3.0-or-later
@@ -205,21 +205,78 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
205
205
  _log(func_name, "Fetching RAG context...")
206
206
  context = indexer.get_context_for_code(original_code) if indexer else ""
207
207
 
208
- # 0b. Memory lookup — skip LLM entirely if we've seen this pattern before
208
+ # 0b. Memory lookup — skip LLM if we've seen this pattern before,
209
+ # but validate the stored result before trusting it:
210
+ # Gate A: no optimized code stored → previous run was incomplete, re-run LLM
211
+ # Gate B: correctness < 50% last run → keep analysis, re-run correctness only
212
+ # Gate C: result is good → return as-is
209
213
  if memory:
210
214
  memory_hit = memory.lookup(original_code, language)
211
215
  if memory_hit:
212
216
  label = "exact match" if memory_hit.is_exact else f"similarity {memory_hit.similarity:.1%}"
213
- _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
214
- recalled_result = {
215
- "severity": memory_hit.severity,
216
- "issue": memory_hit.issue,
217
- "reasoning": memory_hit.reasoning,
218
- "optimized_code": memory_hit.optimized_code,
219
- "suggestion": "",
220
- "bottlenecks": [],
221
- }
222
- return func_name, recalled_result, None, None, None, None, memory_hit, False
217
+
218
+ # Gate A: stored result has no optimized code — not useful, fall through to LLM
219
+ if not memory_hit.optimized_code:
220
+ _log(func_name, f"Memory hit ({label}) — no optimized code stored, re-running LLM", style="yellow")
221
+ memory_hit = None # fall through; LLM path runs below as normal
222
+
223
+ # Gate B: correctness was poor last time — re-run the correctness check only
224
+ elif memory_hit.total_cases > 0 and memory_hit.correctness_cases / memory_hit.total_cases < 0.5:
225
+ _log(
226
+ func_name,
227
+ f"Memory hit ({label}) — correctness was "
228
+ f"{memory_hit.correctness_cases}/{memory_hit.total_cases} last run, re-checking",
229
+ style="yellow",
230
+ )
231
+ recalled_result = {
232
+ "severity": memory_hit.severity,
233
+ "issue": memory_hit.issue,
234
+ "reasoning": memory_hit.reasoning,
235
+ "optimized_code": memory_hit.optimized_code,
236
+ "suggestion": "",
237
+ "bottlenecks": [],
238
+ }
239
+ new_verification = None
240
+ if not getattr(sandbox, "disabled", False):
241
+ stored_cases = memory.lookup_test_cases(original_code)
242
+ if stored_cases:
243
+ _log(func_name, "Re-running correctness sandbox with stored test cases...", style="dim")
244
+ correctness = sandbox.verify_correctness_only(
245
+ original_code=original_code,
246
+ optimized_code=memory_hit.optimized_code,
247
+ original_func_name=func_name,
248
+ optimized_func_name=func_name,
249
+ test_cases=stored_cases,
250
+ language=language,
251
+ context=context,
252
+ )
253
+ _log(func_name, f"Re-verification: {correctness.passed_cases}/{correctness.total_cases} passed", style="dim")
254
+ try:
255
+ from coreinsight.sandbox import VerificationResult, SpeedupVerification
256
+ new_verification = VerificationResult(
257
+ speedup=SpeedupVerification(
258
+ verified=True,
259
+ computed_speedups=[memory_hit.avg_speedup] if memory_hit.avg_speedup else [],
260
+ details=f"Speedup recalled from memory: {memory_hit.avg_speedup:.2f}x",
261
+ ),
262
+ correctness=correctness,
263
+ )
264
+ except Exception:
265
+ pass # verification display is non-critical
266
+ return func_name, recalled_result, None, None, new_verification, None, memory_hit, False
267
+
268
+ # Gate C: stored result is complete and correctness is acceptable
269
+ else:
270
+ _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
271
+ recalled_result = {
272
+ "severity": memory_hit.severity,
273
+ "issue": memory_hit.issue,
274
+ "reasoning": memory_hit.reasoning,
275
+ "optimized_code": memory_hit.optimized_code,
276
+ "suggestion": "",
277
+ "bottlenecks": [],
278
+ }
279
+ return func_name, recalled_result, None, None, None, None, memory_hit, False
223
280
 
224
281
  # ── Route: single-agent vs multi-agent ──────────────────────────
225
282
  if agent_mode == "multi" and multi_agents:
@@ -240,8 +297,37 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
240
297
  if result is None:
241
298
  return func_name, None, None, f"❌ Analysis error: {logs}", None, None, None, False
242
299
 
300
+ # Retry gate: Low severity or missing optimized code often means the model
301
+ # defaulted to "looks fine" rather than truly auditing.
302
+ # Retry up to 2 times before accepting the conclusion.
303
+ _MAX_ANALYSIS_RETRIES = 2
304
+ _retry = 0
305
+ while (result.get("severity") == "Low" or not optimized_code) and _retry < _MAX_ANALYSIS_RETRIES:
306
+ _retry += 1
307
+ _log(func_name, f"Low/missing result — retrying analysis ({_retry}/{_MAX_ANALYSIS_RETRIES})...", style="yellow")
308
+ if agent_mode == "multi" and multi_agents:
309
+ result, optimized_code, success, logs, plot_data, is_valid_optimization = \
310
+ _run_multi_agent(
311
+ func_name, original_code, language, context,
312
+ hardware_target, sandbox, multi_agents, tier_limits,
313
+ stream_callback=stream_callback,
314
+ )
315
+ else:
316
+ result, optimized_code, success, logs, plot_data, is_valid_optimization = \
317
+ _run_single_agent(
318
+ func_name, original_code, language, context,
319
+ hardware_target, sandbox, agent, tier_limits,
320
+ stream_callback=stream_callback,
321
+ )
322
+ if result is None:
323
+ break
324
+
325
+ if result is None:
326
+ return func_name, None, None, f"❌ Analysis error after {_retry} retries: {logs}", None, None, None, False
327
+
243
328
  if result.get("severity") == "Low" or not optimized_code:
244
- return func_name, None, None, " No critical bottlenecks detected. Code is optimal.", None, None, None, False
329
+ confirmed = f" (confirmed after {_retry} retries)" if _retry > 0 else ""
330
+ return func_name, None, None, f"✅ No significant bottlenecks found{confirmed}.", None, None, None, False
245
331
 
246
332
  # 3. Verification + AI-free hardware profiling
247
333
  verification = None
@@ -288,11 +374,29 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
288
374
 
289
375
  except Exception as e:
290
376
  err_str = str(e)
291
- if "context" in err_str.lower() and "limit" in err_str.lower():
292
- _log(func_name, f"Context limit hit: {e}", style="bold yellow")
377
+ err_low = err_str.lower()
378
+ if "context" in err_low and "limit" in err_low:
379
+ _log(func_name, "Context limit hit", style="bold yellow")
380
+ return func_name, None, None, (
381
+ "⚠️ Context limit — try a model with a larger context window, "
382
+ "or split the function into smaller pieces."
383
+ ), None, None, None, False
384
+ if any(k in err_low for k in ("cannot connect", "connection refused", "docker")):
385
+ _log(func_name, "Docker unavailable", style="bold yellow")
386
+ return func_name, None, None, (
387
+ "⚠️ Docker is not running — start Docker Desktop and try again.\n"
388
+ " Skip the sandbox with: coreinsight analyze --no-docker <file>"
389
+ ), None, None, None, False
390
+ if "timeout" in err_low or "timed out" in err_low:
391
+ _log(func_name, "Sandbox timed out", style="bold yellow")
392
+ return func_name, None, None, (
393
+ "⚠️ Sandbox timed out — the benchmark likely contains an infinite loop.\n"
394
+ " The LLM analysis result above is still valid."
395
+ ), None, None, None, False
396
+ if "out of memory" in err_low or "oom" in err_low:
397
+ _log(func_name, "Sandbox OOM", style="bold yellow")
293
398
  return func_name, None, None, (
294
- f"⚠️ Context limit: {err_str}\n"
295
- f"Try a model with a larger context window, or split the function."
399
+ "⚠️ Sandbox ran out of memory. Try --no-docker or reduce the file size."
296
400
  ), None, None, None, False
297
401
  _log(func_name, f"Failed: {e}", style="bold red")
298
402
  return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
@@ -763,7 +867,15 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
763
867
 
764
868
  except Exception as exc:
765
869
  with print_lock:
766
- console.print(f"[bold red]❌ Critical failure in thread processing {func['name']}:[/bold red] {exc}")
870
+ exc_low = str(exc).lower()
871
+ if any(k in exc_low for k in ("docker", "cannot connect", "connection refused")):
872
+ console.print(f"[bold yellow]⚠️ {func['name']}: Docker unavailable — start Docker Desktop and retry.[/bold yellow]")
873
+ elif "timeout" in exc_low or "timed out" in exc_low:
874
+ console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox timed out.[/bold yellow]")
875
+ elif "out of memory" in exc_low or "oom" in exc_low:
876
+ console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox ran out of memory.[/bold yellow]")
877
+ else:
878
+ console.print(f"[bold red]❌ {func['name']}: Unexpected error — {exc}[/bold red]")
767
879
 
768
880
  console.print(Panel.fit(f"✅ [bold green]Analysis Complete![/bold green] Final report saved to:\n{report_path.absolute()}"))
769
881
 
@@ -332,6 +332,16 @@ class OptimizationMemory:
332
332
  except Exception as exc:
333
333
  return {"count": 0, "error": str(exc)}
334
334
 
335
+ def lookup_test_cases(self, original_code: str) -> Optional[list]:
336
+ """
337
+ Return stored test cases for `original_code`, or None if not found.
338
+ Used to re-run correctness without regenerating via LLM.
339
+ """
340
+ if not self._ensure_db():
341
+ return None
342
+ h = self.ast_hash(original_code)
343
+ return self._load_test_cases(h)
344
+
335
345
  def store_test_cases(self, original_code: str, test_cases: list) -> None:
336
346
  """
337
347
  Persist test cases for a function, keyed by AST hash.
@@ -90,7 +90,8 @@ GRADING RUBRIC AND INSTRUCTIONS (APPLY ONLY THE SPECIFIC RUBRIC FOR {language}):
90
90
  INSTRUCTIONS:
91
91
  1. Actively hunt for Medium, High, and Critical issues based ONLY on the specific {language} rubric above. Do not hallucinate GPU concepts for Python code unless PyTorch/CUDA is explicitly used.
92
92
  2. If you find an issue, you MUST explain the hardware-level or interpreter-level reasoning clearly (e.g., CPU cache misses, GIL contention, memory latency).
93
- 3. CODE GENERATION MANDATE: You MUST provide the completely rewritten, optimized function in the `optimized_code` field. The code must be raw, syntactically correct {language} code ready to be compiled/run. Do NOT leave this field empty. Do NOT wrap the code in markdown backticks (e.g., ```cpp) inside the JSON string.
93
+ 3. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. A false negative (missing a real bottleneck) is always worse than a false positive. Only assign Low severity if you can explicitly prove the algorithm is already optimal for the target hardware — state the time complexity, memory access pattern, and why no better approach exists. "No obvious issues" is NOT sufficient justification for Low.
94
+ 4. CODE GENERATION MANDATE: You MUST provide the completely rewritten, optimized function in the `optimized_code` field. The code must be raw, syntactically correct {language} code ready to be compiled/run. Do NOT leave this field empty. Do NOT wrap the code in markdown backticks (e.g., ```cpp) inside the JSON string.
94
95
  """
95
96
 
96
97
  # ---------------------------------------------------------------------------
@@ -143,10 +144,10 @@ GRADING RUBRIC (apply only the {language} section):
143
144
  - Low: Trivial stylistic issues only.
144
145
 
145
146
  INSTRUCTIONS:
146
- 1. Identify the single most impactful bottleneck — do not list everything, find the root cause.
147
- 2. Explain the hardware-level or interpreter-level reasoning precisely.
148
- 3. Set `optimized_code` to null code generation happens in a separate agent.
149
- 4. If the code is genuinely optimal, set severity to Low and explain why.
147
+ 1. Identify the single most impactful bottleneck — do not list everything, find the root cause. If no Critical or High issue exists, identify the most significant Medium issue. Do NOT default to Low out of uncertainty.
148
+ 2. Explain the hardware-level or interpreter-level reasoning precisely — name the specific mechanism (e.g., "O(N²) comparisons cause cache thrashing on arrays larger than L2 cache", "GIL held across network I/O blocks all threads").
149
+ 3. CRITICAL: Set `optimized_code` to null. Any non-null value in this field will corrupt the pipeline. Code generation is handled by a separate agent.
150
+ 4. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. Only assign Low if you can explicitly prove algorithmic optimality — state the time complexity, memory access pattern, and why no better approach exists for the target hardware. "No obvious issues" is NOT sufficient justification for Low.
150
151
 
151
152
  {format_instructions}
152
153
  """
@@ -254,8 +255,9 @@ REQUIREMENTS:
254
255
  1. Rewrite ONLY the function named {func_name} — preserve its signature exactly.
255
256
  2. Fix the identified bottleneck using the suggestion as your guide.
256
257
  3. The function must be self-contained and correct.
257
- 4. Raw {language} code only no explanation, no markdown fences, no JSON.
258
- 5. Do NOT rename the function.
258
+ 4. VERIFICATION: Before outputting, mentally confirm: does the rewrite directly eliminate the identified bottleneck? If the issue was O(N²), confirm the new complexity is O(N log N) or better. If the issue was a Python loop, confirm it is vectorized with NumPy/PyTorch. If the issue was a deep copy, confirm it is eliminated. Do not output a rewrite that only partially addresses the bottleneck.
259
+ 5. Raw {language} code only — no explanation, no markdown fences, no JSON.
260
+ 6. Do NOT rename the function.
259
261
  """
260
262
 
261
263
  # ── Per-tier addenda for multi-agent harness (same scaffolding pattern) ──────
@@ -150,6 +150,35 @@ class VerificationResult:
150
150
  return "\n".join(lines)
151
151
 
152
152
 
153
+ def _format_sandbox_error(exc: Exception, language: str = "") -> str:
154
+ """Map raw Docker / OS exceptions to user-friendly one-liners."""
155
+ msg = str(exc).lower()
156
+ if "timeout" in msg or "timed out" in msg or "read timeout" in msg:
157
+ return (
158
+ "⚠️ Sandbox timed out — the benchmark likely contains an infinite loop "
159
+ "or extremely slow path. Try --no-docker to skip the sandbox."
160
+ )
161
+ if "out of memory" in msg or "oom" in msg or ("memory" in msg and "kill" in msg):
162
+ return (
163
+ "⚠️ Sandbox ran out of memory (OOM). "
164
+ "Reduce N sizes in the harness or use --no-docker."
165
+ )
166
+ if "no such image" in msg or "pull access" in msg or "not found" in msg:
167
+ lang_label = f" ({language})" if language else ""
168
+ return (
169
+ f"⚠️ Sandbox Docker image not found{lang_label}. "
170
+ "It should have been built on first run — try `docker images` to check."
171
+ )
172
+ if "cannot connect" in msg or "connection refused" in msg or "docker" in msg:
173
+ return (
174
+ "⚠️ Docker is not running. "
175
+ "Start Docker Desktop (or the Docker daemon) and try again."
176
+ )
177
+ if "permission denied" in msg:
178
+ return "⚠️ Sandbox permission error — Docker may lack access to the temp directory."
179
+ return f"⚠️ Sandbox error: {exc}"
180
+
181
+
153
182
  class CodeSandbox:
154
183
  def __init__(self, disabled: bool = False):
155
184
  self.disabled = disabled
@@ -277,7 +306,7 @@ class CodeSandbox:
277
306
  return False, f"Missing CSV output (exit {exit_code}).\nFull output:\n{raw_logs}", None
278
307
 
279
308
  except Exception as e:
280
- return False, f"Sandbox error: {str(e)}", None
309
+ return False, _format_sandbox_error(e, language), None
281
310
 
282
311
  finally:
283
312
  if container:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coreinsight-cli
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
5
  Author: Varun Jani
6
6
  License: GPL-3.0-or-later
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coreinsight-cli"
7
- version = "0.3.1"
7
+ version = "0.3.2"
8
8
  description = "Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA"
9
9
  license = {text = "GPL-3.0-or-later"}
10
10
  authors = [
File without changes