coreinsight-cli 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {coreinsight_cli-0.3.1/coreinsight_cli.egg-info → coreinsight_cli-0.3.3}/PKG-INFO +37 -11
  2. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/README.md +16 -0
  3. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/analyzer.py +55 -193
  4. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/main.py +134 -20
  5. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/memory.py +13 -2
  6. coreinsight_cli-0.3.3/coreinsight/prompts/__init__.py +59 -0
  7. coreinsight_cli-0.3.3/coreinsight/prompts/_base.py +15 -0
  8. coreinsight_cli-0.3.3/coreinsight/prompts/bottleneck.py +131 -0
  9. coreinsight_cli-0.3.3/coreinsight/prompts/harness.py +291 -0
  10. coreinsight_cli-0.3.3/coreinsight/prompts/optimizer.py +97 -0
  11. coreinsight_cli-0.3.3/coreinsight/prompts/test_cases.py +44 -0
  12. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/sandbox.py +30 -1
  13. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3/coreinsight_cli.egg-info}/PKG-INFO +37 -11
  14. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/SOURCES.txt +6 -1
  15. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/requires.txt +25 -9
  16. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/pyproject.toml +31 -10
  17. coreinsight_cli-0.3.1/coreinsight/prompts.py +0 -299
  18. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/LICENSE +0 -0
  19. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/__init__.py +0 -0
  20. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/config.py +0 -0
  21. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/__init__.py +0 -0
  22. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/bad_loop.py +0 -0
  23. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/data_processor.py +0 -0
  24. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/slow.cpp +0 -0
  25. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/embeddings.py +0 -0
  26. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/hardware.py +0 -0
  27. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/indexer.py +0 -0
  28. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/parser.py +0 -0
  29. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/profiler.py +0 -0
  30. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/scanner.py +0 -0
  31. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/tui.py +0 -0
  32. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
  33. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/entry_points.txt +0 -0
  34. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/top_level.txt +0 -0
  35. {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coreinsight-cli
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
5
  Author: Varun Jani
6
6
  License: GPL-3.0-or-later
@@ -20,20 +20,30 @@ Requires-Python: >=3.9
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
22
  Requires-Dist: rich>=13.0
23
+ Requires-Dist: textual>=0.60.0
24
+ Requires-Dist: psutil>=5.9
25
+ Requires-Dist: pydantic>=2.0
23
26
  Requires-Dist: docker>=6.0
24
- Requires-Dist: tree-sitter==0.21.3
25
- Requires-Dist: tree-sitter-languages
26
- Requires-Dist: langchain>=0.2.0
27
27
  Requires-Dist: langchain-core>=0.2.0
28
+ Requires-Dist: langchain>=0.2.0
28
29
  Requires-Dist: langchain-ollama>=0.1.0
29
- Requires-Dist: langchain-google-genai>=1.0.0
30
30
  Requires-Dist: langchain-openai>=0.1.0
31
- Requires-Dist: langchain-anthropic>=0.1.0
32
- Requires-Dist: pydantic>=2.0
33
- Requires-Dist: chromadb>=0.5.0
34
- Requires-Dist: sentence-transformers>=3.0.0
35
- Requires-Dist: textual>=0.60.0
36
- Requires-Dist: psutil>=5.9
31
+ Requires-Dist: tree-sitter==0.21.3
32
+ Requires-Dist: tree-sitter-languages
33
+ Provides-Extra: openai
34
+ Provides-Extra: google
35
+ Requires-Dist: langchain-google-genai>=1.0.0; extra == "google"
36
+ Provides-Extra: anthropic
37
+ Requires-Dist: langchain-anthropic>=0.1.0; extra == "anthropic"
38
+ Provides-Extra: memory
39
+ Requires-Dist: chromadb>=0.5.0; extra == "memory"
40
+ Requires-Dist: sentence-transformers>=3.0.0; extra == "memory"
41
+ Provides-Extra: cloud
42
+ Requires-Dist: langchain-openai>=0.1.0; extra == "cloud"
43
+ Requires-Dist: langchain-google-genai>=1.0.0; extra == "cloud"
44
+ Requires-Dist: langchain-anthropic>=0.1.0; extra == "cloud"
45
+ Provides-Extra: all
46
+ Requires-Dist: coreinsight-cli[cloud,memory]; extra == "all"
37
47
  Provides-Extra: compat
38
48
  Requires-Dist: pysqlite3-binary>=0.5.0; extra == "compat"
39
49
  Dynamic: license-file
@@ -49,7 +59,23 @@ CoreInsight finds hardware bottlenecks in your code, generates optimized replace
49
59
  ## Install
50
60
 
51
61
  ```bash
62
+ # OpenAI key - quick install
63
+ pip install coreinsight-cli[openai]
64
+
65
+ # Gemini key - quick install
66
+ pip install coreinsight-cli[google]
67
+
68
+ # Claude key - quick install
69
+ pip install coreinsight-cli[anthropic]
70
+
71
+ # Local Ollama install
52
72
  pip install coreinsight-cli
73
+
74
+ # Memory and additional usage install
75
+ pip install coreinsight-cli[openai,memory]
76
+
77
+ # Install everything
78
+ pip install coreinsight-cli[all]
53
79
  ```
54
80
 
55
81
  **Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)
@@ -9,7 +9,23 @@ CoreInsight finds hardware bottlenecks in your code, generates optimized replace
9
9
  ## Install
10
10
 
11
11
  ```bash
12
+ # OpenAI key - quick install
13
+ pip install coreinsight-cli[openai]
14
+
15
+ # Gemini key - quick install
16
+ pip install coreinsight-cli[google]
17
+
18
+ # Claude key - quick install
19
+ pip install coreinsight-cli[anthropic]
20
+
21
+ # Local Ollama install
12
22
  pip install coreinsight-cli
23
+
24
+ # Memory and additional usage install
25
+ pip install coreinsight-cli[openai,memory]
26
+
27
+ # Install everything
28
+ pip install coreinsight-cli[all]
13
29
  ```
14
30
 
15
31
  **Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)
@@ -8,11 +8,11 @@ from langchain_core.prompts import PromptTemplate
8
8
  from langchain_core.exceptions import OutputParserException
9
9
 
10
10
  from langchain_ollama import ChatOllama
11
- from langchain_google_genai import ChatGoogleGenerativeAI
12
- from langchain_openai import ChatOpenAI
13
- from langchain_anthropic import ChatAnthropic
14
11
 
15
- from coreinsight.prompts import SYSTEM_PROMPT, ANALYSIS_TEMPLATE, HARNESS_ADDENDUM
12
+ from coreinsight.prompts import (
13
+ SYSTEM_PROMPT, ANALYSIS_TEMPLATE, HARNESS_ADDENDUM,
14
+ _HARNESS_TEMPLATE, _FIX_TEMPLATE, _TEST_CASES_TEMPLATE,
15
+ )
16
16
 
17
17
  # Phrases that appear at the start of a truncated LLM response
18
18
  _TRUNCATION_HINTS = (
@@ -99,117 +99,6 @@ class AuditResult(BaseModel):
99
99
  optimized_code: Optional[str] = Field(description="The entirely rewritten optimized code, ready to drop in", default=None)
100
100
 
101
101
 
102
- _HARNESS_TEMPLATE = """
103
- You are a strict QA engineer writing a standalone asymptotic scaling benchmark script in {language}.
104
-
105
- ORIGINAL FUNCTION (Name: {func_name}):
106
- {original}
107
-
108
- OPTIMIZED FUNCTION:
109
- {optimized}
110
-
111
- GLOBAL DEPENDENCIES (Helper functions/structs required to run the code):
112
- {context}
113
-
114
- Write the complete executable script (e.g., `int main()` or `if __name__ == "__main__":`) that:
115
- 1. Includes necessary imports/headers.
116
- 2. Includes ALL required helper functions or structs from GLOBAL DEPENDENCIES so the script is fully standalone.
117
- 3. Defines BOTH the original and optimized functions exactly as provided above.
118
- 4. Tests multiple data sizes (e.g., N=10, 100, 1000, 5000).
119
- 5. Target Hardware: {hardware_target}. The largest N MUST cross cache boundaries but MUST NOT exceed 20% of available RAM to prevent OOM crashes.
120
- 6. Initializes realistic dummy data for each size N.
121
- 7. Times execution of original vs optimized using high-resolution timers.
122
-
123
- CRITICAL TIMING:
124
- - Python: use `time.perf_counter()`. C++: use `std::chrono::high_resolution_clock`.
125
- - Clamp: `orig_time = max(end - start, 1e-9)` to prevent zero-division.
126
- - Speedup: `speedup = orig_time / opt_time`.
127
-
128
- ISOLATION RULES (CRITICAL):
129
- - This runs in an empty Docker container. NO local files exist.
130
- - DO NOT use local imports. Define everything inline.
131
- - DO NOT rename the original function — call it exactly `{func_name}`.
132
-
133
- OUTPUT FORMAT (CRITICAL):
134
- Print ONLY this exact CSV to stdout, no other text:
135
- N,Original_Time,Optimized_Time,Speedup
136
- 10,0.002,0.001,2.00
137
-
138
- [PYTHON ONLY]: Also import matplotlib, plot results, and save as `benchmark_plot.png`.
139
-
140
- FORMATTING RULE: Wrap your ENTIRE script in a single markdown code block. No text before or after.
141
- """
142
-
143
- _FIX_TEMPLATE = """
144
- You are an expert {language} developer. Your previous benchmark script FAILED in an isolated sandbox.
145
-
146
- ORIGINAL FUNCTION (Name: {func_name}):
147
- {original}
148
-
149
- GLOBAL DEPENDENCIES:
150
- {context}
151
-
152
- YOUR FAILED SCRIPT:
153
- {bad_harness}
154
-
155
- EXECUTION ERROR LOGS:
156
- {error_logs}
157
-
158
- ISOLATION CONSTRAINTS (CRITICAL):
159
- - Empty Docker container. No local files. NO local imports.
160
- - Define `{func_name}` and all GLOBAL DEPENDENCIES inline.
161
-
162
- FIX INSTRUCTIONS:
163
- 1. Diagnose the failure from the error logs above.
164
- 2. Fix imports, NameErrors, type mismatches, infinite loops, or OOM issues.
165
- 3. Maintain the CSV stdout format exactly: N,Original_Time,Optimized_Time,Speedup
166
- 4. Use high-resolution timers and clamp with `max(t, 1e-9)`.
167
- 5. [PYTHON ONLY]: Save benchmark plot as `benchmark_plot.png`.
168
-
169
- FORMATTING RULE: Wrap your ENTIRE fixed script in a single markdown code block. No text before or after.
170
- """
171
-
172
- _TEST_CASES_TEMPLATE = """
173
- You are a QA engineer writing correctness test cases for a function.
174
-
175
- FUNCTION NAME: {func_name}
176
- LANGUAGE: {language}
177
-
178
- FUNCTION SIGNATURE AND BODY:
179
- {original}
180
-
181
- GLOBAL DEPENDENCIES (helper functions / structs this function relies on):
182
- {context}
183
-
184
- Your task: generate {num_cases} diverse test cases that call `{func_name}` with different
185
- arguments. The cases must cover:
186
- - Small inputs (N ~ 10)
187
- - Medium inputs (N ~ 100-500)
188
- - Edge cases: empty collections, single-element, all-zeros, negative values (where applicable)
189
- - Boundary conditions specific to this function's logic
190
-
191
- OUTPUT FORMAT — respond with ONLY a valid JSON array, nothing else. No markdown fences,
192
- no explanation. Each element must be a JSON object with exactly two keys:
193
- "args" : a JSON array of positional arguments (use only JSON-serialisable types:
194
- numbers, strings, booleans, arrays, objects — NO numpy, NO bytes)
195
- "kwargs": a JSON object of keyword arguments (may be empty {{}})
196
-
197
- Example (do NOT copy this — generate cases specific to {func_name}):
198
- [
199
- {{"args": [[1, 2, 3]], "kwargs": {{}}}},
200
- {{"args": [[]], "kwargs": {{}}}},
201
- {{"args": [[9, -1, 4, 0, 7]], "kwargs": {{"reverse": true}}}}
202
- ]
203
-
204
- CONSTRAINTS:
205
- - All values must be plain JSON types — no numpy arrays, no custom objects.
206
- - If the function operates on a matrix, represent it as a list-of-lists.
207
- - If the function takes a size integer N, generate concrete data of that size inline.
208
- - Do NOT include function calls or expressions — only literal values.
209
- - Produce exactly {num_cases} test cases.
210
- """
211
-
212
-
213
102
  class AnalyzerAgent:
214
103
  def __init__(self, provider="ollama", model_name="llama3.2", api_keys=None, model_tier="large"):
215
104
  self.model_tier = model_tier
@@ -217,70 +106,15 @@ class AnalyzerAgent:
217
106
  self.provider = provider
218
107
  api_keys = api_keys or {}
219
108
 
220
- if provider == "openai":
221
- if not api_keys.get("openai"):
222
- raise ValueError("OpenAI API Key required.")
223
- self.base_llm = ChatOpenAI(
224
- model=model_name,
225
- api_key=api_keys["openai"],
226
- temperature=0.1,
227
- model_kwargs={"response_format": {"type": "json_object"}},
228
- )
229
- self.json_llm = self.base_llm
230
-
109
+ # Reuse shared LLM factory — handles lazy imports and provider validation
110
+ from coreinsight.prompts import ModelTier
111
+ if provider == "ollama":
112
+ api_keys["_ctx"] = 4096 if model_tier == ModelTier.SMALL else 8192
113
+ api_keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
231
114
  elif provider == "local_server":
232
- from coreinsight.prompts import ModelTier
233
- base_url = api_keys.get("local_url", "http://localhost:1234/v1")
234
- _max_tokens = 2048 if model_tier == ModelTier.SMALL else 4096
235
- self.base_llm = ChatOpenAI(
236
- model=model_name,
237
- api_key="not-needed",
238
- base_url=base_url,
239
- temperature=0.1,
240
- max_tokens=_max_tokens,
241
- model_kwargs={"response_format": {"type": "json_object"}},
242
- )
243
- self.json_llm = self.base_llm
244
-
245
- elif provider == "anthropic":
246
- if not api_keys.get("anthropic"):
247
- raise ValueError("Anthropic API Key required.")
248
- self.base_llm = ChatAnthropic(
249
- model=model_name,
250
- api_key=api_keys["anthropic"],
251
- temperature=0.1,
252
- )
253
- # Anthropic doesn't support response_format; JSON is enforced via prompt only
254
- self.json_llm = self.base_llm
255
-
256
- elif provider == "google":
257
- if not api_keys.get("google"):
258
- raise ValueError("Google Gemini API Key required.")
259
- self.base_llm = ChatGoogleGenerativeAI(
260
- model=model_name,
261
- google_api_key=api_keys["google"],
262
- temperature=0.1,
263
- convert_system_message_to_human=True,
264
- )
265
- self.json_llm = self.base_llm
266
-
267
- else: # Ollama default
268
- from coreinsight.prompts import ModelTier
269
- # Small models (7B) typically have 4096 native context.
270
- # Asking for more causes silent degradation or OOM on the host.
271
- # Medium/large local models can handle 8192 comfortably.
272
- _ctx = 4096 if model_tier == ModelTier.SMALL else 8192
273
- # num_predict: small models need room for JSON + code in one shot.
274
- # Capping at 2048 for small prevents runaway generation that hits
275
- # the limit mid-JSON and returns truncated garbage.
276
- _predict = 2048 if model_tier == ModelTier.SMALL else 4096
277
- self.base_llm = ChatOllama(
278
- model=model_name,
279
- temperature=0.1,
280
- num_predict=_predict,
281
- num_ctx=_ctx,
282
- )
283
- self.json_llm = self.base_llm.bind(format="json")
115
+ api_keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
116
+
117
+ self.base_llm, self.json_llm = _build_llm(provider, model_name, api_keys)
284
118
 
285
119
  self.prompt = PromptTemplate(
286
120
  template=ANALYSIS_TEMPLATE + "\n\n{format_instructions}",
@@ -556,16 +390,17 @@ class AnalyzerAgent:
556
390
  # ---------------------------------------------------------------------------
557
391
 
558
392
  def _build_llm(provider: str, model_name: str, api_keys: dict):
559
- """
560
- Shared LLM factory for all multi-agent classes.
561
- Returns (base_llm, json_llm) — same pattern as AnalyzerAgent.__init__.
562
- Raises ValueError on missing credentials.
563
- """
564
393
  api_keys = api_keys or {}
565
394
 
566
395
  if provider == "openai":
567
396
  if not api_keys.get("openai"):
568
397
  raise ValueError("OpenAI API key required.")
398
+ try:
399
+ from langchain_openai import ChatOpenAI
400
+ except ImportError:
401
+ raise ImportError(
402
+ "OpenAI provider requires: pip install coreinsight-cli[openai]"
403
+ )
569
404
  llm = ChatOpenAI(
570
405
  model=model_name,
571
406
  api_key=api_keys["openai"],
@@ -575,8 +410,14 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
575
410
  return llm, llm
576
411
 
577
412
  if provider == "local_server":
413
+ try:
414
+ from langchain_openai import ChatOpenAI
415
+ except ImportError:
416
+ raise ImportError(
417
+ "local_server provider requires: pip install coreinsight-cli[openai]"
418
+ )
578
419
  base_url = api_keys.get("local_url", "http://localhost:1234/v1")
579
- _max_tokens = api_keys.pop("_predict", 4096) # reuse same key as Ollama path
420
+ _max_tokens = api_keys.pop("_predict", 4096)
580
421
  llm = ChatOpenAI(
581
422
  model=model_name,
582
423
  api_key="not-needed",
@@ -590,6 +431,12 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
590
431
  if provider == "anthropic":
591
432
  if not api_keys.get("anthropic"):
592
433
  raise ValueError("Anthropic API key required.")
434
+ try:
435
+ from langchain_anthropic import ChatAnthropic
436
+ except ImportError:
437
+ raise ImportError(
438
+ "Anthropic provider requires: pip install coreinsight-cli[anthropic]"
439
+ )
593
440
  llm = ChatAnthropic(
594
441
  model=model_name,
595
442
  api_key=api_keys["anthropic"],
@@ -600,6 +447,12 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
600
447
  if provider == "google":
601
448
  if not api_keys.get("google"):
602
449
  raise ValueError("Google Gemini API key required.")
450
+ try:
451
+ from langchain_google_genai import ChatGoogleGenerativeAI
452
+ except ImportError:
453
+ raise ImportError(
454
+ "Google provider requires: pip install coreinsight-cli[google]"
455
+ )
603
456
  llm = ChatGoogleGenerativeAI(
604
457
  model=model_name,
605
458
  google_api_key=api_keys["google"],
@@ -608,9 +461,7 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
608
461
  )
609
462
  return llm, llm
610
463
 
611
- # Ollama default — context and predict budget are passed in from the
612
- # calling agent which knows its own model_tier.
613
- # Default to medium-safe values; callers override via kwargs if needed.
464
+ # Ollama default
614
465
  _ctx = api_keys.pop("_ctx", 8192)
615
466
  _predict = api_keys.pop("_predict", 4096)
616
467
  base = ChatOllama(
@@ -650,13 +501,13 @@ class BottleneckAgent:
650
501
  api_keys: dict,
651
502
  model_tier: str,
652
503
  ) -> None:
653
- from coreinsight.prompts import BOTTLENECK_TEMPLATE, SYSTEM_PROMPT
504
+ from coreinsight.prompts import BOTTLENECK_TEMPLATES, SYSTEM_PROMPT
654
505
  self.model_tier = model_tier
655
506
  self.parser = JsonOutputParser(pydantic_object=AuditResult)
656
507
  self._base_llm, self._json_llm = _build_llm_tiered(provider, model_name, api_keys, model_tier)
657
508
 
658
509
  self._prompt = PromptTemplate(
659
- template=BOTTLENECK_TEMPLATE,
510
+ template=BOTTLENECK_TEMPLATES[model_tier],
660
511
  input_variables=[
661
512
  "language", "code_content", "context", "hardware_target",
662
513
  ],
@@ -736,10 +587,10 @@ class OptimizerAgent:
736
587
  api_keys: dict,
737
588
  model_tier: str,
738
589
  ) -> None:
739
- from coreinsight.prompts import OPTIMIZER_TEMPLATE
590
+ from coreinsight.prompts import OPTIMIZER_TEMPLATES
740
591
  self.model_tier = model_tier
741
592
  self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
742
- self._template = OPTIMIZER_TEMPLATE
593
+ self._template = OPTIMIZER_TEMPLATES[model_tier]
743
594
 
744
595
  def _extract_code(self, raw: str) -> str:
745
596
  """Reuse the same extraction logic as AnalyzerAgent."""
@@ -898,7 +749,13 @@ class HarnessAgent:
898
749
  except Exception as e:
899
750
  return False, f"Harness generation failed: {e}", None, 0
900
751
 
901
- success, logs, plot_data = sandbox.execute_benchmark(harness, language)
752
+ # Catch missing int main() before hitting the sandbox
753
+ if language in ("cpp", "c++") and "int main(" not in harness and "int main (" not in harness:
754
+ logs = "Missing CSV output (exit 1).\nFull output:\nundefined reference to `main'"
755
+ success = False
756
+ plot_data = None
757
+ else:
758
+ success, logs, plot_data = sandbox.execute_benchmark(harness, language)
902
759
  is_valid = self._check_speedup(success, logs)
903
760
  retries = 0
904
761
 
@@ -921,7 +778,12 @@ class HarnessAgent:
921
778
  logs += f"\nFix generation failed: {e}"
922
779
  break
923
780
 
924
- success, logs, plot_data = sandbox.execute_benchmark(harness, language)
781
+ if language in ("cpp", "c++") and "int main(" not in harness and "int main (" not in harness:
782
+ logs = "Missing CSV output (exit 1).\nFull output:\nundefined reference to `main'"
783
+ success = False
784
+ plot_data = None
785
+ else:
786
+ success, logs, plot_data = sandbox.execute_benchmark(harness, language)
925
787
  is_valid = self._check_speedup(success, logs)
926
788
  retries += 1
927
789
 
@@ -161,7 +161,6 @@ def _run_multi_agent(
161
161
  optimized_code = multi_agents["optimizer"].generate(
162
162
  func_name, original_code, result,
163
163
  language, context, hardware_target,
164
- stream_callback=stream_callback, # readable code, stream it
165
164
  )
166
165
  if not optimized_code or optimized_code == original_code:
167
166
  return result, None, False, "", None, False
@@ -205,21 +204,78 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
205
204
  _log(func_name, "Fetching RAG context...")
206
205
  context = indexer.get_context_for_code(original_code) if indexer else ""
207
206
 
208
- # 0b. Memory lookup — skip LLM entirely if we've seen this pattern before
207
+ # 0b. Memory lookup — skip LLM if we've seen this pattern before,
208
+ # but validate the stored result before trusting it:
209
+ # Gate A: no optimized code stored → previous run was incomplete, re-run LLM
210
+ # Gate B: correctness < 50% last run → keep analysis, re-run correctness only
211
+ # Gate C: result is good → return as-is
209
212
  if memory:
210
213
  memory_hit = memory.lookup(original_code, language)
211
214
  if memory_hit:
212
215
  label = "exact match" if memory_hit.is_exact else f"similarity {memory_hit.similarity:.1%}"
213
- _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
214
- recalled_result = {
215
- "severity": memory_hit.severity,
216
- "issue": memory_hit.issue,
217
- "reasoning": memory_hit.reasoning,
218
- "optimized_code": memory_hit.optimized_code,
219
- "suggestion": "",
220
- "bottlenecks": [],
221
- }
222
- return func_name, recalled_result, None, None, None, None, memory_hit, False
216
+
217
+ # Gate A: stored result has no optimized code — not useful, fall through to LLM
218
+ if not memory_hit.optimized_code:
219
+ _log(func_name, f"Memory hit ({label}) — no optimized code stored, re-running LLM", style="yellow")
220
+ memory_hit = None # fall through; LLM path runs below as normal
221
+
222
+ # Gate B: correctness was poor last time — re-run the correctness check only
223
+ elif memory_hit.total_cases > 0 and memory_hit.correctness_cases / memory_hit.total_cases < 0.5:
224
+ _log(
225
+ func_name,
226
+ f"Memory hit ({label}) — correctness was "
227
+ f"{memory_hit.correctness_cases}/{memory_hit.total_cases} last run, re-checking",
228
+ style="yellow",
229
+ )
230
+ recalled_result = {
231
+ "severity": memory_hit.severity,
232
+ "issue": memory_hit.issue,
233
+ "reasoning": memory_hit.reasoning,
234
+ "optimized_code": memory_hit.optimized_code,
235
+ "suggestion": "",
236
+ "bottlenecks": [],
237
+ }
238
+ new_verification = None
239
+ if not getattr(sandbox, "disabled", False):
240
+ stored_cases = memory.lookup_test_cases(original_code)
241
+ if stored_cases:
242
+ _log(func_name, "Re-running correctness sandbox with stored test cases...", style="dim")
243
+ correctness = sandbox.verify_correctness_only(
244
+ original_code=original_code,
245
+ optimized_code=memory_hit.optimized_code,
246
+ original_func_name=func_name,
247
+ optimized_func_name=func_name,
248
+ test_cases=stored_cases,
249
+ language=language,
250
+ context=context,
251
+ )
252
+ _log(func_name, f"Re-verification: {correctness.passed_cases}/{correctness.total_cases} passed", style="dim")
253
+ try:
254
+ from coreinsight.sandbox import VerificationResult, SpeedupVerification
255
+ new_verification = VerificationResult(
256
+ speedup=SpeedupVerification(
257
+ verified=True,
258
+ computed_speedups=[memory_hit.avg_speedup] if memory_hit.avg_speedup else [],
259
+ details=f"Speedup recalled from memory: {memory_hit.avg_speedup:.2f}x",
260
+ ),
261
+ correctness=correctness,
262
+ )
263
+ except Exception:
264
+ pass # verification display is non-critical
265
+ return func_name, recalled_result, None, None, new_verification, None, memory_hit, False
266
+
267
+ # Gate C: stored result is complete and correctness is acceptable
268
+ else:
269
+ _log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
270
+ recalled_result = {
271
+ "severity": memory_hit.severity,
272
+ "issue": memory_hit.issue,
273
+ "reasoning": memory_hit.reasoning,
274
+ "optimized_code": memory_hit.optimized_code,
275
+ "suggestion": "",
276
+ "bottlenecks": [],
277
+ }
278
+ return func_name, recalled_result, None, None, None, None, memory_hit, False
223
279
 
224
280
  # ── Route: single-agent vs multi-agent ──────────────────────────
225
281
  if agent_mode == "multi" and multi_agents:
@@ -240,8 +296,37 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
240
296
  if result is None:
241
297
  return func_name, None, None, f"❌ Analysis error: {logs}", None, None, None, False
242
298
 
299
+ # Retry gate: Low severity or missing optimized code often means the model
300
+ # defaulted to "looks fine" rather than truly auditing.
301
+ # Retry up to 2 times before accepting the conclusion.
302
+ _MAX_ANALYSIS_RETRIES = 2
303
+ _retry = 0
304
+ while (result.get("severity") == "Low" or not optimized_code) and _retry < _MAX_ANALYSIS_RETRIES:
305
+ _retry += 1
306
+ _log(func_name, f"Low/missing result — retrying analysis ({_retry}/{_MAX_ANALYSIS_RETRIES})...", style="yellow")
307
+ if agent_mode == "multi" and multi_agents:
308
+ result, optimized_code, success, logs, plot_data, is_valid_optimization = \
309
+ _run_multi_agent(
310
+ func_name, original_code, language, context,
311
+ hardware_target, sandbox, multi_agents, tier_limits,
312
+ stream_callback=stream_callback,
313
+ )
314
+ else:
315
+ result, optimized_code, success, logs, plot_data, is_valid_optimization = \
316
+ _run_single_agent(
317
+ func_name, original_code, language, context,
318
+ hardware_target, sandbox, agent, tier_limits,
319
+ stream_callback=stream_callback,
320
+ )
321
+ if result is None:
322
+ break
323
+
324
+ if result is None:
325
+ return func_name, None, None, f"❌ Analysis error after {_retry} retries: {logs}", None, None, None, False
326
+
243
327
  if result.get("severity") == "Low" or not optimized_code:
244
- return func_name, None, None, " No critical bottlenecks detected. Code is optimal.", None, None, None, False
328
+ confirmed = f" (confirmed after {_retry} retries)" if _retry > 0 else ""
329
+ return func_name, None, None, f"✅ No significant bottlenecks found{confirmed}.", None, None, None, False
245
330
 
246
331
  # 3. Verification + AI-free hardware profiling
247
332
  verification = None
@@ -288,11 +373,29 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
288
373
 
289
374
  except Exception as e:
290
375
  err_str = str(e)
291
- if "context" in err_str.lower() and "limit" in err_str.lower():
292
- _log(func_name, f"Context limit hit: {e}", style="bold yellow")
376
+ err_low = err_str.lower()
377
+ if "context" in err_low and "limit" in err_low:
378
+ _log(func_name, "Context limit hit", style="bold yellow")
379
+ return func_name, None, None, (
380
+ "⚠️ Context limit — try a model with a larger context window, "
381
+ "or split the function into smaller pieces."
382
+ ), None, None, None, False
383
+ if any(k in err_low for k in ("cannot connect", "connection refused", "docker")):
384
+ _log(func_name, "Docker unavailable", style="bold yellow")
385
+ return func_name, None, None, (
386
+ "⚠️ Docker is not running — start Docker Desktop and try again.\n"
387
+ " Skip the sandbox with: coreinsight analyze --no-docker <file>"
388
+ ), None, None, None, False
389
+ if "timeout" in err_low or "timed out" in err_low:
390
+ _log(func_name, "Sandbox timed out", style="bold yellow")
391
+ return func_name, None, None, (
392
+ "⚠️ Sandbox timed out — the benchmark likely contains an infinite loop.\n"
393
+ " The LLM analysis result above is still valid."
394
+ ), None, None, None, False
395
+ if "out of memory" in err_low or "oom" in err_low:
396
+ _log(func_name, "Sandbox OOM", style="bold yellow")
293
397
  return func_name, None, None, (
294
- f"⚠️ Context limit: {err_str}\n"
295
- f"Try a model with a larger context window, or split the function."
398
+ "⚠️ Sandbox ran out of memory. Try --no-docker or reduce the file size."
296
399
  ), None, None, None, False
297
400
  _log(func_name, f"Failed: {e}", style="bold red")
298
401
  return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
@@ -763,7 +866,16 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
763
866
 
764
867
  except Exception as exc:
765
868
  with print_lock:
766
- console.print(f"[bold red]❌ Critical failure in thread processing {func['name']}:[/bold red] {exc}")
869
+ exc_low = str(exc).lower()
870
+ if any(k in exc_low for k in ("docker", "cannot connect", "connection refused")):
871
+ console.print(f"[bold yellow]⚠️ {func['name']}: Docker unavailable — start Docker Desktop and retry.[/bold yellow]")
872
+ elif "timeout" in exc_low or "timed out" in exc_low:
873
+ console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox timed out.[/bold yellow]")
874
+ elif "out of memory" in exc_low or "oom" in exc_low:
875
+ console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox ran out of memory.[/bold yellow]")
876
+ else:
877
+ from rich.markup import escape
878
+ console.print(f"[bold red]❌ {func['name']}: Unexpected error — {escape(str(exc))}[/bold red]")
767
879
 
768
880
  console.print(Panel.fit(f"✅ [bold green]Analysis Complete![/bold green] Final report saved to:\n{report_path.absolute()}"))
769
881
 
@@ -917,7 +1029,8 @@ def _run_test_cmd(func_name: str, no_docker: bool = False):
917
1029
  num_cases=tier_limits["num_test_cases"],
918
1030
  )
919
1031
  except Exception as exc:
920
- console.print(f"[red]LLM error generating test cases: {exc}[/red]")
1032
+ from rich.markup import escape
1033
+ console.print(f"[red]LLM error generating test cases: {escape(str(exc))}[/red]")
921
1034
  return
922
1035
 
923
1036
  if not test_cases:
@@ -1034,7 +1147,8 @@ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv
1034
1147
  metadatas = all_records.get("metadatas", []) or []
1035
1148
  ids = all_records.get("ids", []) or []
1036
1149
  except Exception as exc:
1037
- console.print(f"[red]Failed to read memory store: {exc}[/red]")
1150
+ from rich.markup import escape
1151
+ console.print(f"[red]Failed to read memory store: {escape(str(exc))}[/red]")
1038
1152
  return
1039
1153
 
1040
1154
  # Build the detail table