coreinsight-cli 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coreinsight_cli-0.3.1/coreinsight_cli.egg-info → coreinsight_cli-0.3.2}/PKG-INFO +1 -1
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/main.py +129 -17
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/memory.py +10 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/prompts.py +9 -7
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/sandbox.py +30 -1
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2/coreinsight_cli.egg-info}/PKG-INFO +1 -1
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/pyproject.toml +1 -1
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/LICENSE +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/README.md +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/__init__.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/analyzer.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/config.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/__init__.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/bad_loop.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/data_processor.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/demo/slow.cpp +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/embeddings.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/hardware.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/indexer.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/parser.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/profiler.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/scanner.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight/tui.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/SOURCES.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/entry_points.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/requires.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/top_level.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/setup.cfg +0 -0
|
@@ -205,21 +205,78 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
|
|
|
205
205
|
_log(func_name, "Fetching RAG context...")
|
|
206
206
|
context = indexer.get_context_for_code(original_code) if indexer else ""
|
|
207
207
|
|
|
208
|
-
# 0b. Memory lookup — skip LLM
|
|
208
|
+
# 0b. Memory lookup — skip LLM if we've seen this pattern before,
|
|
209
|
+
# but validate the stored result before trusting it:
|
|
210
|
+
# Gate A: no optimized code stored → previous run was incomplete, re-run LLM
|
|
211
|
+
# Gate B: correctness < 50% last run → keep analysis, re-run correctness only
|
|
212
|
+
# Gate C: result is good → return as-is
|
|
209
213
|
if memory:
|
|
210
214
|
memory_hit = memory.lookup(original_code, language)
|
|
211
215
|
if memory_hit:
|
|
212
216
|
label = "exact match" if memory_hit.is_exact else f"similarity {memory_hit.similarity:.1%}"
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
"
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
217
|
+
|
|
218
|
+
# Gate A: stored result has no optimized code — not useful, fall through to LLM
|
|
219
|
+
if not memory_hit.optimized_code:
|
|
220
|
+
_log(func_name, f"Memory hit ({label}) — no optimized code stored, re-running LLM", style="yellow")
|
|
221
|
+
memory_hit = None # fall through; LLM path runs below as normal
|
|
222
|
+
|
|
223
|
+
# Gate B: correctness was poor last time — re-run the correctness check only
|
|
224
|
+
elif memory_hit.total_cases > 0 and memory_hit.correctness_cases / memory_hit.total_cases < 0.5:
|
|
225
|
+
_log(
|
|
226
|
+
func_name,
|
|
227
|
+
f"Memory hit ({label}) — correctness was "
|
|
228
|
+
f"{memory_hit.correctness_cases}/{memory_hit.total_cases} last run, re-checking",
|
|
229
|
+
style="yellow",
|
|
230
|
+
)
|
|
231
|
+
recalled_result = {
|
|
232
|
+
"severity": memory_hit.severity,
|
|
233
|
+
"issue": memory_hit.issue,
|
|
234
|
+
"reasoning": memory_hit.reasoning,
|
|
235
|
+
"optimized_code": memory_hit.optimized_code,
|
|
236
|
+
"suggestion": "",
|
|
237
|
+
"bottlenecks": [],
|
|
238
|
+
}
|
|
239
|
+
new_verification = None
|
|
240
|
+
if not getattr(sandbox, "disabled", False):
|
|
241
|
+
stored_cases = memory.lookup_test_cases(original_code)
|
|
242
|
+
if stored_cases:
|
|
243
|
+
_log(func_name, "Re-running correctness sandbox with stored test cases...", style="dim")
|
|
244
|
+
correctness = sandbox.verify_correctness_only(
|
|
245
|
+
original_code=original_code,
|
|
246
|
+
optimized_code=memory_hit.optimized_code,
|
|
247
|
+
original_func_name=func_name,
|
|
248
|
+
optimized_func_name=func_name,
|
|
249
|
+
test_cases=stored_cases,
|
|
250
|
+
language=language,
|
|
251
|
+
context=context,
|
|
252
|
+
)
|
|
253
|
+
_log(func_name, f"Re-verification: {correctness.passed_cases}/{correctness.total_cases} passed", style="dim")
|
|
254
|
+
try:
|
|
255
|
+
from coreinsight.sandbox import VerificationResult, SpeedupVerification
|
|
256
|
+
new_verification = VerificationResult(
|
|
257
|
+
speedup=SpeedupVerification(
|
|
258
|
+
verified=True,
|
|
259
|
+
computed_speedups=[memory_hit.avg_speedup] if memory_hit.avg_speedup else [],
|
|
260
|
+
details=f"Speedup recalled from memory: {memory_hit.avg_speedup:.2f}x",
|
|
261
|
+
),
|
|
262
|
+
correctness=correctness,
|
|
263
|
+
)
|
|
264
|
+
except Exception:
|
|
265
|
+
pass # verification display is non-critical
|
|
266
|
+
return func_name, recalled_result, None, None, new_verification, None, memory_hit, False
|
|
267
|
+
|
|
268
|
+
# Gate C: stored result is complete and correctness is acceptable
|
|
269
|
+
else:
|
|
270
|
+
_log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
|
|
271
|
+
recalled_result = {
|
|
272
|
+
"severity": memory_hit.severity,
|
|
273
|
+
"issue": memory_hit.issue,
|
|
274
|
+
"reasoning": memory_hit.reasoning,
|
|
275
|
+
"optimized_code": memory_hit.optimized_code,
|
|
276
|
+
"suggestion": "",
|
|
277
|
+
"bottlenecks": [],
|
|
278
|
+
}
|
|
279
|
+
return func_name, recalled_result, None, None, None, None, memory_hit, False
|
|
223
280
|
|
|
224
281
|
# ── Route: single-agent vs multi-agent ──────────────────────────
|
|
225
282
|
if agent_mode == "multi" and multi_agents:
|
|
@@ -240,8 +297,37 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
|
|
|
240
297
|
if result is None:
|
|
241
298
|
return func_name, None, None, f"❌ Analysis error: {logs}", None, None, None, False
|
|
242
299
|
|
|
300
|
+
# Retry gate: Low severity or missing optimized code often means the model
|
|
301
|
+
# defaulted to "looks fine" rather than truly auditing.
|
|
302
|
+
# Retry up to 2 times before accepting the conclusion.
|
|
303
|
+
_MAX_ANALYSIS_RETRIES = 2
|
|
304
|
+
_retry = 0
|
|
305
|
+
while (result.get("severity") == "Low" or not optimized_code) and _retry < _MAX_ANALYSIS_RETRIES:
|
|
306
|
+
_retry += 1
|
|
307
|
+
_log(func_name, f"Low/missing result — retrying analysis ({_retry}/{_MAX_ANALYSIS_RETRIES})...", style="yellow")
|
|
308
|
+
if agent_mode == "multi" and multi_agents:
|
|
309
|
+
result, optimized_code, success, logs, plot_data, is_valid_optimization = \
|
|
310
|
+
_run_multi_agent(
|
|
311
|
+
func_name, original_code, language, context,
|
|
312
|
+
hardware_target, sandbox, multi_agents, tier_limits,
|
|
313
|
+
stream_callback=stream_callback,
|
|
314
|
+
)
|
|
315
|
+
else:
|
|
316
|
+
result, optimized_code, success, logs, plot_data, is_valid_optimization = \
|
|
317
|
+
_run_single_agent(
|
|
318
|
+
func_name, original_code, language, context,
|
|
319
|
+
hardware_target, sandbox, agent, tier_limits,
|
|
320
|
+
stream_callback=stream_callback,
|
|
321
|
+
)
|
|
322
|
+
if result is None:
|
|
323
|
+
break
|
|
324
|
+
|
|
325
|
+
if result is None:
|
|
326
|
+
return func_name, None, None, f"❌ Analysis error after {_retry} retries: {logs}", None, None, None, False
|
|
327
|
+
|
|
243
328
|
if result.get("severity") == "Low" or not optimized_code:
|
|
244
|
-
|
|
329
|
+
confirmed = f" (confirmed after {_retry} retries)" if _retry > 0 else ""
|
|
330
|
+
return func_name, None, None, f"✅ No significant bottlenecks found{confirmed}.", None, None, None, False
|
|
245
331
|
|
|
246
332
|
# 3. Verification + AI-free hardware profiling
|
|
247
333
|
verification = None
|
|
@@ -288,11 +374,29 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
|
|
|
288
374
|
|
|
289
375
|
except Exception as e:
|
|
290
376
|
err_str = str(e)
|
|
291
|
-
|
|
292
|
-
|
|
377
|
+
err_low = err_str.lower()
|
|
378
|
+
if "context" in err_low and "limit" in err_low:
|
|
379
|
+
_log(func_name, "Context limit hit", style="bold yellow")
|
|
380
|
+
return func_name, None, None, (
|
|
381
|
+
"⚠️ Context limit — try a model with a larger context window, "
|
|
382
|
+
"or split the function into smaller pieces."
|
|
383
|
+
), None, None, None, False
|
|
384
|
+
if any(k in err_low for k in ("cannot connect", "connection refused", "docker")):
|
|
385
|
+
_log(func_name, "Docker unavailable", style="bold yellow")
|
|
386
|
+
return func_name, None, None, (
|
|
387
|
+
"⚠️ Docker is not running — start Docker Desktop and try again.\n"
|
|
388
|
+
" Skip the sandbox with: coreinsight analyze --no-docker <file>"
|
|
389
|
+
), None, None, None, False
|
|
390
|
+
if "timeout" in err_low or "timed out" in err_low:
|
|
391
|
+
_log(func_name, "Sandbox timed out", style="bold yellow")
|
|
392
|
+
return func_name, None, None, (
|
|
393
|
+
"⚠️ Sandbox timed out — the benchmark likely contains an infinite loop.\n"
|
|
394
|
+
" The LLM analysis result above is still valid."
|
|
395
|
+
), None, None, None, False
|
|
396
|
+
if "out of memory" in err_low or "oom" in err_low:
|
|
397
|
+
_log(func_name, "Sandbox OOM", style="bold yellow")
|
|
293
398
|
return func_name, None, None, (
|
|
294
|
-
|
|
295
|
-
f"Try a model with a larger context window, or split the function."
|
|
399
|
+
"⚠️ Sandbox ran out of memory. Try --no-docker or reduce the file size."
|
|
296
400
|
), None, None, None, False
|
|
297
401
|
_log(func_name, f"Failed: {e}", style="bold red")
|
|
298
402
|
return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
|
|
@@ -763,7 +867,15 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
|
|
|
763
867
|
|
|
764
868
|
except Exception as exc:
|
|
765
869
|
with print_lock:
|
|
766
|
-
|
|
870
|
+
exc_low = str(exc).lower()
|
|
871
|
+
if any(k in exc_low for k in ("docker", "cannot connect", "connection refused")):
|
|
872
|
+
console.print(f"[bold yellow]⚠️ {func['name']}: Docker unavailable — start Docker Desktop and retry.[/bold yellow]")
|
|
873
|
+
elif "timeout" in exc_low or "timed out" in exc_low:
|
|
874
|
+
console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox timed out.[/bold yellow]")
|
|
875
|
+
elif "out of memory" in exc_low or "oom" in exc_low:
|
|
876
|
+
console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox ran out of memory.[/bold yellow]")
|
|
877
|
+
else:
|
|
878
|
+
console.print(f"[bold red]❌ {func['name']}: Unexpected error — {exc}[/bold red]")
|
|
767
879
|
|
|
768
880
|
console.print(Panel.fit(f"✅ [bold green]Analysis Complete![/bold green] Final report saved to:\n{report_path.absolute()}"))
|
|
769
881
|
|
|
@@ -332,6 +332,16 @@ class OptimizationMemory:
|
|
|
332
332
|
except Exception as exc:
|
|
333
333
|
return {"count": 0, "error": str(exc)}
|
|
334
334
|
|
|
335
|
+
def lookup_test_cases(self, original_code: str) -> Optional[list]:
|
|
336
|
+
"""
|
|
337
|
+
Return stored test cases for `original_code`, or None if not found.
|
|
338
|
+
Used to re-run correctness without regenerating via LLM.
|
|
339
|
+
"""
|
|
340
|
+
if not self._ensure_db():
|
|
341
|
+
return None
|
|
342
|
+
h = self.ast_hash(original_code)
|
|
343
|
+
return self._load_test_cases(h)
|
|
344
|
+
|
|
335
345
|
def store_test_cases(self, original_code: str, test_cases: list) -> None:
|
|
336
346
|
"""
|
|
337
347
|
Persist test cases for a function, keyed by AST hash.
|
|
@@ -90,7 +90,8 @@ GRADING RUBRIC AND INSTRUCTIONS (APPLY ONLY THE SPECIFIC RUBRIC FOR {language}):
|
|
|
90
90
|
INSTRUCTIONS:
|
|
91
91
|
1. Actively hunt for Medium, High, and Critical issues based ONLY on the specific {language} rubric above. Do not hallucinate GPU concepts for Python code unless PyTorch/CUDA is explicitly used.
|
|
92
92
|
2. If you find an issue, you MUST explain the hardware-level or interpreter-level reasoning clearly (e.g., CPU cache misses, GIL contention, memory latency).
|
|
93
|
-
3.
|
|
93
|
+
3. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. A false negative (missing a real bottleneck) is always worse than a false positive. Only assign Low severity if you can explicitly prove the algorithm is already optimal for the target hardware — state the time complexity, memory access pattern, and why no better approach exists. "No obvious issues" is NOT sufficient justification for Low.
|
|
94
|
+
4. CODE GENERATION MANDATE: You MUST provide the completely rewritten, optimized function in the `optimized_code` field. The code must be raw, syntactically correct {language} code ready to be compiled/run. Do NOT leave this field empty. Do NOT wrap the code in markdown backticks (e.g., ```cpp) inside the JSON string.
|
|
94
95
|
"""
|
|
95
96
|
|
|
96
97
|
# ---------------------------------------------------------------------------
|
|
@@ -143,10 +144,10 @@ GRADING RUBRIC (apply only the {language} section):
|
|
|
143
144
|
- Low: Trivial stylistic issues only.
|
|
144
145
|
|
|
145
146
|
INSTRUCTIONS:
|
|
146
|
-
1. Identify the single most impactful bottleneck — do not list everything, find the root cause.
|
|
147
|
-
2. Explain the hardware-level or interpreter-level reasoning precisely.
|
|
148
|
-
3. Set `optimized_code` to null
|
|
149
|
-
4.
|
|
147
|
+
1. Identify the single most impactful bottleneck — do not list everything, find the root cause. If no Critical or High issue exists, identify the most significant Medium issue. Do NOT default to Low out of uncertainty.
|
|
148
|
+
2. Explain the hardware-level or interpreter-level reasoning precisely — name the specific mechanism (e.g., "O(N²) comparisons cause cache thrashing on arrays larger than L2 cache", "GIL held across network I/O blocks all threads").
|
|
149
|
+
3. CRITICAL: Set `optimized_code` to null. Any non-null value in this field will corrupt the pipeline. Code generation is handled by a separate agent.
|
|
150
|
+
4. SEVERITY BIAS: When uncertain between two severity levels, always choose the higher one. Only assign Low if you can explicitly prove algorithmic optimality — state the time complexity, memory access pattern, and why no better approach exists for the target hardware. "No obvious issues" is NOT sufficient justification for Low.
|
|
150
151
|
|
|
151
152
|
{format_instructions}
|
|
152
153
|
"""
|
|
@@ -254,8 +255,9 @@ REQUIREMENTS:
|
|
|
254
255
|
1. Rewrite ONLY the function named {func_name} — preserve its signature exactly.
|
|
255
256
|
2. Fix the identified bottleneck using the suggestion as your guide.
|
|
256
257
|
3. The function must be self-contained and correct.
|
|
257
|
-
4.
|
|
258
|
-
5.
|
|
258
|
+
4. VERIFICATION: Before outputting, mentally confirm: does the rewrite directly eliminate the identified bottleneck? If the issue was O(N²), confirm the new complexity is O(N log N) or better. If the issue was a Python loop, confirm it is vectorized with NumPy/PyTorch. If the issue was a deep copy, confirm it is eliminated. Do not output a rewrite that only partially addresses the bottleneck.
|
|
259
|
+
5. Raw {language} code only — no explanation, no markdown fences, no JSON.
|
|
260
|
+
6. Do NOT rename the function.
|
|
259
261
|
"""
|
|
260
262
|
|
|
261
263
|
# ── Per-tier addenda for multi-agent harness (same scaffolding pattern) ──────
|
|
@@ -150,6 +150,35 @@ class VerificationResult:
|
|
|
150
150
|
return "\n".join(lines)
|
|
151
151
|
|
|
152
152
|
|
|
153
|
+
def _format_sandbox_error(exc: Exception, language: str = "") -> str:
|
|
154
|
+
"""Map raw Docker / OS exceptions to user-friendly one-liners."""
|
|
155
|
+
msg = str(exc).lower()
|
|
156
|
+
if "timeout" in msg or "timed out" in msg or "read timeout" in msg:
|
|
157
|
+
return (
|
|
158
|
+
"⚠️ Sandbox timed out — the benchmark likely contains an infinite loop "
|
|
159
|
+
"or extremely slow path. Try --no-docker to skip the sandbox."
|
|
160
|
+
)
|
|
161
|
+
if "out of memory" in msg or "oom" in msg or ("memory" in msg and "kill" in msg):
|
|
162
|
+
return (
|
|
163
|
+
"⚠️ Sandbox ran out of memory (OOM). "
|
|
164
|
+
"Reduce N sizes in the harness or use --no-docker."
|
|
165
|
+
)
|
|
166
|
+
if "no such image" in msg or "pull access" in msg or "not found" in msg:
|
|
167
|
+
lang_label = f" ({language})" if language else ""
|
|
168
|
+
return (
|
|
169
|
+
f"⚠️ Sandbox Docker image not found{lang_label}. "
|
|
170
|
+
"It should have been built on first run — try `docker images` to check."
|
|
171
|
+
)
|
|
172
|
+
if "cannot connect" in msg or "connection refused" in msg or "docker" in msg:
|
|
173
|
+
return (
|
|
174
|
+
"⚠️ Docker is not running. "
|
|
175
|
+
"Start Docker Desktop (or the Docker daemon) and try again."
|
|
176
|
+
)
|
|
177
|
+
if "permission denied" in msg:
|
|
178
|
+
return "⚠️ Sandbox permission error — Docker may lack access to the temp directory."
|
|
179
|
+
return f"⚠️ Sandbox error: {exc}"
|
|
180
|
+
|
|
181
|
+
|
|
153
182
|
class CodeSandbox:
|
|
154
183
|
def __init__(self, disabled: bool = False):
|
|
155
184
|
self.disabled = disabled
|
|
@@ -277,7 +306,7 @@ class CodeSandbox:
|
|
|
277
306
|
return False, f"Missing CSV output (exit {exit_code}).\nFull output:\n{raw_logs}", None
|
|
278
307
|
|
|
279
308
|
except Exception as e:
|
|
280
|
-
return False,
|
|
309
|
+
return False, _format_sandbox_error(e, language), None
|
|
281
310
|
|
|
282
311
|
finally:
|
|
283
312
|
if container:
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "coreinsight-cli"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.2"
|
|
8
8
|
description = "Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA"
|
|
9
9
|
license = {text = "GPL-3.0-or-later"}
|
|
10
10
|
authors = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{coreinsight_cli-0.3.1 → coreinsight_cli-0.3.2}/coreinsight_cli.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|