coreinsight-cli 0.3.1__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coreinsight_cli-0.3.1/coreinsight_cli.egg-info → coreinsight_cli-0.3.3}/PKG-INFO +37 -11
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/README.md +16 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/analyzer.py +55 -193
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/main.py +134 -20
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/memory.py +13 -2
- coreinsight_cli-0.3.3/coreinsight/prompts/__init__.py +59 -0
- coreinsight_cli-0.3.3/coreinsight/prompts/_base.py +15 -0
- coreinsight_cli-0.3.3/coreinsight/prompts/bottleneck.py +131 -0
- coreinsight_cli-0.3.3/coreinsight/prompts/harness.py +291 -0
- coreinsight_cli-0.3.3/coreinsight/prompts/optimizer.py +97 -0
- coreinsight_cli-0.3.3/coreinsight/prompts/test_cases.py +44 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/sandbox.py +30 -1
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3/coreinsight_cli.egg-info}/PKG-INFO +37 -11
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/SOURCES.txt +6 -1
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/requires.txt +25 -9
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/pyproject.toml +31 -10
- coreinsight_cli-0.3.1/coreinsight/prompts.py +0 -299
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/LICENSE +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/__init__.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/config.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/__init__.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/bad_loop.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/data_processor.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/demo/slow.cpp +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/embeddings.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/hardware.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/indexer.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/parser.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/profiler.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/scanner.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight/tui.py +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/entry_points.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/coreinsight_cli.egg-info/top_level.txt +0 -0
- {coreinsight_cli-0.3.1 → coreinsight_cli-0.3.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coreinsight-cli
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
|
|
5
5
|
Author: Varun Jani
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -20,20 +20,30 @@ Requires-Python: >=3.9
|
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE
|
|
22
22
|
Requires-Dist: rich>=13.0
|
|
23
|
+
Requires-Dist: textual>=0.60.0
|
|
24
|
+
Requires-Dist: psutil>=5.9
|
|
25
|
+
Requires-Dist: pydantic>=2.0
|
|
23
26
|
Requires-Dist: docker>=6.0
|
|
24
|
-
Requires-Dist: tree-sitter==0.21.3
|
|
25
|
-
Requires-Dist: tree-sitter-languages
|
|
26
|
-
Requires-Dist: langchain>=0.2.0
|
|
27
27
|
Requires-Dist: langchain-core>=0.2.0
|
|
28
|
+
Requires-Dist: langchain>=0.2.0
|
|
28
29
|
Requires-Dist: langchain-ollama>=0.1.0
|
|
29
|
-
Requires-Dist: langchain-google-genai>=1.0.0
|
|
30
30
|
Requires-Dist: langchain-openai>=0.1.0
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
Requires-Dist:
|
|
36
|
-
|
|
31
|
+
Requires-Dist: tree-sitter==0.21.3
|
|
32
|
+
Requires-Dist: tree-sitter-languages
|
|
33
|
+
Provides-Extra: openai
|
|
34
|
+
Provides-Extra: google
|
|
35
|
+
Requires-Dist: langchain-google-genai>=1.0.0; extra == "google"
|
|
36
|
+
Provides-Extra: anthropic
|
|
37
|
+
Requires-Dist: langchain-anthropic>=0.1.0; extra == "anthropic"
|
|
38
|
+
Provides-Extra: memory
|
|
39
|
+
Requires-Dist: chromadb>=0.5.0; extra == "memory"
|
|
40
|
+
Requires-Dist: sentence-transformers>=3.0.0; extra == "memory"
|
|
41
|
+
Provides-Extra: cloud
|
|
42
|
+
Requires-Dist: langchain-openai>=0.1.0; extra == "cloud"
|
|
43
|
+
Requires-Dist: langchain-google-genai>=1.0.0; extra == "cloud"
|
|
44
|
+
Requires-Dist: langchain-anthropic>=0.1.0; extra == "cloud"
|
|
45
|
+
Provides-Extra: all
|
|
46
|
+
Requires-Dist: coreinsight-cli[cloud,memory]; extra == "all"
|
|
37
47
|
Provides-Extra: compat
|
|
38
48
|
Requires-Dist: pysqlite3-binary>=0.5.0; extra == "compat"
|
|
39
49
|
Dynamic: license-file
|
|
@@ -49,7 +59,23 @@ CoreInsight finds hardware bottlenecks in your code, generates optimized replace
|
|
|
49
59
|
## Install
|
|
50
60
|
|
|
51
61
|
```bash
|
|
62
|
+
# OpenAI key - quick install
|
|
63
|
+
pip install coreinsight-cli[openai]
|
|
64
|
+
|
|
65
|
+
# Gemini key - quick install
|
|
66
|
+
pip install coreinsight-cli[google]
|
|
67
|
+
|
|
68
|
+
# Claude key - quick install
|
|
69
|
+
pip install coreinsight-cli[anthropic]
|
|
70
|
+
|
|
71
|
+
# Local Ollama install
|
|
52
72
|
pip install coreinsight-cli
|
|
73
|
+
|
|
74
|
+
# Memory and additional usage install
|
|
75
|
+
pip install coreinsight-cli[openai,memory]
|
|
76
|
+
|
|
77
|
+
# Install everything
|
|
78
|
+
pip install coreinsight-cli[all]
|
|
53
79
|
```
|
|
54
80
|
|
|
55
81
|
**Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)
|
|
@@ -9,7 +9,23 @@ CoreInsight finds hardware bottlenecks in your code, generates optimized replace
|
|
|
9
9
|
## Install
|
|
10
10
|
|
|
11
11
|
```bash
|
|
12
|
+
# OpenAI key - quick install
|
|
13
|
+
pip install coreinsight-cli[openai]
|
|
14
|
+
|
|
15
|
+
# Gemini key - quick install
|
|
16
|
+
pip install coreinsight-cli[google]
|
|
17
|
+
|
|
18
|
+
# Claude key - quick install
|
|
19
|
+
pip install coreinsight-cli[anthropic]
|
|
20
|
+
|
|
21
|
+
# Local Ollama install
|
|
12
22
|
pip install coreinsight-cli
|
|
23
|
+
|
|
24
|
+
# Memory and additional usage install
|
|
25
|
+
pip install coreinsight-cli[openai,memory]
|
|
26
|
+
|
|
27
|
+
# Install everything
|
|
28
|
+
pip install coreinsight-cli[all]
|
|
13
29
|
```
|
|
14
30
|
|
|
15
31
|
**Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)
|
|
@@ -8,11 +8,11 @@ from langchain_core.prompts import PromptTemplate
|
|
|
8
8
|
from langchain_core.exceptions import OutputParserException
|
|
9
9
|
|
|
10
10
|
from langchain_ollama import ChatOllama
|
|
11
|
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
12
|
-
from langchain_openai import ChatOpenAI
|
|
13
|
-
from langchain_anthropic import ChatAnthropic
|
|
14
11
|
|
|
15
|
-
from coreinsight.prompts import
|
|
12
|
+
from coreinsight.prompts import (
|
|
13
|
+
SYSTEM_PROMPT, ANALYSIS_TEMPLATE, HARNESS_ADDENDUM,
|
|
14
|
+
_HARNESS_TEMPLATE, _FIX_TEMPLATE, _TEST_CASES_TEMPLATE,
|
|
15
|
+
)
|
|
16
16
|
|
|
17
17
|
# Phrases that appear at the start of a truncated LLM response
|
|
18
18
|
_TRUNCATION_HINTS = (
|
|
@@ -99,117 +99,6 @@ class AuditResult(BaseModel):
|
|
|
99
99
|
optimized_code: Optional[str] = Field(description="The entirely rewritten optimized code, ready to drop in", default=None)
|
|
100
100
|
|
|
101
101
|
|
|
102
|
-
_HARNESS_TEMPLATE = """
|
|
103
|
-
You are a strict QA engineer writing a standalone asymptotic scaling benchmark script in {language}.
|
|
104
|
-
|
|
105
|
-
ORIGINAL FUNCTION (Name: {func_name}):
|
|
106
|
-
{original}
|
|
107
|
-
|
|
108
|
-
OPTIMIZED FUNCTION:
|
|
109
|
-
{optimized}
|
|
110
|
-
|
|
111
|
-
GLOBAL DEPENDENCIES (Helper functions/structs required to run the code):
|
|
112
|
-
{context}
|
|
113
|
-
|
|
114
|
-
Write the complete executable script (e.g., `int main()` or `if __name__ == "__main__":`) that:
|
|
115
|
-
1. Includes necessary imports/headers.
|
|
116
|
-
2. Includes ALL required helper functions or structs from GLOBAL DEPENDENCIES so the script is fully standalone.
|
|
117
|
-
3. Defines BOTH the original and optimized functions exactly as provided above.
|
|
118
|
-
4. Tests multiple data sizes (e.g., N=10, 100, 1000, 5000).
|
|
119
|
-
5. Target Hardware: {hardware_target}. The largest N MUST cross cache boundaries but MUST NOT exceed 20% of available RAM to prevent OOM crashes.
|
|
120
|
-
6. Initializes realistic dummy data for each size N.
|
|
121
|
-
7. Times execution of original vs optimized using high-resolution timers.
|
|
122
|
-
|
|
123
|
-
CRITICAL TIMING:
|
|
124
|
-
- Python: use `time.perf_counter()`. C++: use `std::chrono::high_resolution_clock`.
|
|
125
|
-
- Clamp: `orig_time = max(end - start, 1e-9)` to prevent zero-division.
|
|
126
|
-
- Speedup: `speedup = orig_time / opt_time`.
|
|
127
|
-
|
|
128
|
-
ISOLATION RULES (CRITICAL):
|
|
129
|
-
- This runs in an empty Docker container. NO local files exist.
|
|
130
|
-
- DO NOT use local imports. Define everything inline.
|
|
131
|
-
- DO NOT rename the original function — call it exactly `{func_name}`.
|
|
132
|
-
|
|
133
|
-
OUTPUT FORMAT (CRITICAL):
|
|
134
|
-
Print ONLY this exact CSV to stdout, no other text:
|
|
135
|
-
N,Original_Time,Optimized_Time,Speedup
|
|
136
|
-
10,0.002,0.001,2.00
|
|
137
|
-
|
|
138
|
-
[PYTHON ONLY]: Also import matplotlib, plot results, and save as `benchmark_plot.png`.
|
|
139
|
-
|
|
140
|
-
FORMATTING RULE: Wrap your ENTIRE script in a single markdown code block. No text before or after.
|
|
141
|
-
"""
|
|
142
|
-
|
|
143
|
-
_FIX_TEMPLATE = """
|
|
144
|
-
You are an expert {language} developer. Your previous benchmark script FAILED in an isolated sandbox.
|
|
145
|
-
|
|
146
|
-
ORIGINAL FUNCTION (Name: {func_name}):
|
|
147
|
-
{original}
|
|
148
|
-
|
|
149
|
-
GLOBAL DEPENDENCIES:
|
|
150
|
-
{context}
|
|
151
|
-
|
|
152
|
-
YOUR FAILED SCRIPT:
|
|
153
|
-
{bad_harness}
|
|
154
|
-
|
|
155
|
-
EXECUTION ERROR LOGS:
|
|
156
|
-
{error_logs}
|
|
157
|
-
|
|
158
|
-
ISOLATION CONSTRAINTS (CRITICAL):
|
|
159
|
-
- Empty Docker container. No local files. NO local imports.
|
|
160
|
-
- Define `{func_name}` and all GLOBAL DEPENDENCIES inline.
|
|
161
|
-
|
|
162
|
-
FIX INSTRUCTIONS:
|
|
163
|
-
1. Diagnose the failure from the error logs above.
|
|
164
|
-
2. Fix imports, NameErrors, type mismatches, infinite loops, or OOM issues.
|
|
165
|
-
3. Maintain the CSV stdout format exactly: N,Original_Time,Optimized_Time,Speedup
|
|
166
|
-
4. Use high-resolution timers and clamp with `max(t, 1e-9)`.
|
|
167
|
-
5. [PYTHON ONLY]: Save benchmark plot as `benchmark_plot.png`.
|
|
168
|
-
|
|
169
|
-
FORMATTING RULE: Wrap your ENTIRE fixed script in a single markdown code block. No text before or after.
|
|
170
|
-
"""
|
|
171
|
-
|
|
172
|
-
_TEST_CASES_TEMPLATE = """
|
|
173
|
-
You are a QA engineer writing correctness test cases for a function.
|
|
174
|
-
|
|
175
|
-
FUNCTION NAME: {func_name}
|
|
176
|
-
LANGUAGE: {language}
|
|
177
|
-
|
|
178
|
-
FUNCTION SIGNATURE AND BODY:
|
|
179
|
-
{original}
|
|
180
|
-
|
|
181
|
-
GLOBAL DEPENDENCIES (helper functions / structs this function relies on):
|
|
182
|
-
{context}
|
|
183
|
-
|
|
184
|
-
Your task: generate {num_cases} diverse test cases that call `{func_name}` with different
|
|
185
|
-
arguments. The cases must cover:
|
|
186
|
-
- Small inputs (N ~ 10)
|
|
187
|
-
- Medium inputs (N ~ 100-500)
|
|
188
|
-
- Edge cases: empty collections, single-element, all-zeros, negative values (where applicable)
|
|
189
|
-
- Boundary conditions specific to this function's logic
|
|
190
|
-
|
|
191
|
-
OUTPUT FORMAT — respond with ONLY a valid JSON array, nothing else. No markdown fences,
|
|
192
|
-
no explanation. Each element must be a JSON object with exactly two keys:
|
|
193
|
-
"args" : a JSON array of positional arguments (use only JSON-serialisable types:
|
|
194
|
-
numbers, strings, booleans, arrays, objects — NO numpy, NO bytes)
|
|
195
|
-
"kwargs": a JSON object of keyword arguments (may be empty {{}})
|
|
196
|
-
|
|
197
|
-
Example (do NOT copy this — generate cases specific to {func_name}):
|
|
198
|
-
[
|
|
199
|
-
{{"args": [[1, 2, 3]], "kwargs": {{}}}},
|
|
200
|
-
{{"args": [[]], "kwargs": {{}}}},
|
|
201
|
-
{{"args": [[9, -1, 4, 0, 7]], "kwargs": {{"reverse": true}}}}
|
|
202
|
-
]
|
|
203
|
-
|
|
204
|
-
CONSTRAINTS:
|
|
205
|
-
- All values must be plain JSON types — no numpy arrays, no custom objects.
|
|
206
|
-
- If the function operates on a matrix, represent it as a list-of-lists.
|
|
207
|
-
- If the function takes a size integer N, generate concrete data of that size inline.
|
|
208
|
-
- Do NOT include function calls or expressions — only literal values.
|
|
209
|
-
- Produce exactly {num_cases} test cases.
|
|
210
|
-
"""
|
|
211
|
-
|
|
212
|
-
|
|
213
102
|
class AnalyzerAgent:
|
|
214
103
|
def __init__(self, provider="ollama", model_name="llama3.2", api_keys=None, model_tier="large"):
|
|
215
104
|
self.model_tier = model_tier
|
|
@@ -217,70 +106,15 @@ class AnalyzerAgent:
|
|
|
217
106
|
self.provider = provider
|
|
218
107
|
api_keys = api_keys or {}
|
|
219
108
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
api_key=api_keys["openai"],
|
|
226
|
-
temperature=0.1,
|
|
227
|
-
model_kwargs={"response_format": {"type": "json_object"}},
|
|
228
|
-
)
|
|
229
|
-
self.json_llm = self.base_llm
|
|
230
|
-
|
|
109
|
+
# Reuse shared LLM factory — handles lazy imports and provider validation
|
|
110
|
+
from coreinsight.prompts import ModelTier
|
|
111
|
+
if provider == "ollama":
|
|
112
|
+
api_keys["_ctx"] = 4096 if model_tier == ModelTier.SMALL else 8192
|
|
113
|
+
api_keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
|
|
231
114
|
elif provider == "local_server":
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
self.base_llm = ChatOpenAI(
|
|
236
|
-
model=model_name,
|
|
237
|
-
api_key="not-needed",
|
|
238
|
-
base_url=base_url,
|
|
239
|
-
temperature=0.1,
|
|
240
|
-
max_tokens=_max_tokens,
|
|
241
|
-
model_kwargs={"response_format": {"type": "json_object"}},
|
|
242
|
-
)
|
|
243
|
-
self.json_llm = self.base_llm
|
|
244
|
-
|
|
245
|
-
elif provider == "anthropic":
|
|
246
|
-
if not api_keys.get("anthropic"):
|
|
247
|
-
raise ValueError("Anthropic API Key required.")
|
|
248
|
-
self.base_llm = ChatAnthropic(
|
|
249
|
-
model=model_name,
|
|
250
|
-
api_key=api_keys["anthropic"],
|
|
251
|
-
temperature=0.1,
|
|
252
|
-
)
|
|
253
|
-
# Anthropic doesn't support response_format; JSON is enforced via prompt only
|
|
254
|
-
self.json_llm = self.base_llm
|
|
255
|
-
|
|
256
|
-
elif provider == "google":
|
|
257
|
-
if not api_keys.get("google"):
|
|
258
|
-
raise ValueError("Google Gemini API Key required.")
|
|
259
|
-
self.base_llm = ChatGoogleGenerativeAI(
|
|
260
|
-
model=model_name,
|
|
261
|
-
google_api_key=api_keys["google"],
|
|
262
|
-
temperature=0.1,
|
|
263
|
-
convert_system_message_to_human=True,
|
|
264
|
-
)
|
|
265
|
-
self.json_llm = self.base_llm
|
|
266
|
-
|
|
267
|
-
else: # Ollama default
|
|
268
|
-
from coreinsight.prompts import ModelTier
|
|
269
|
-
# Small models (7B) typically have 4096 native context.
|
|
270
|
-
# Asking for more causes silent degradation or OOM on the host.
|
|
271
|
-
# Medium/large local models can handle 8192 comfortably.
|
|
272
|
-
_ctx = 4096 if model_tier == ModelTier.SMALL else 8192
|
|
273
|
-
# num_predict: small models need room for JSON + code in one shot.
|
|
274
|
-
# Capping at 2048 for small prevents runaway generation that hits
|
|
275
|
-
# the limit mid-JSON and returns truncated garbage.
|
|
276
|
-
_predict = 2048 if model_tier == ModelTier.SMALL else 4096
|
|
277
|
-
self.base_llm = ChatOllama(
|
|
278
|
-
model=model_name,
|
|
279
|
-
temperature=0.1,
|
|
280
|
-
num_predict=_predict,
|
|
281
|
-
num_ctx=_ctx,
|
|
282
|
-
)
|
|
283
|
-
self.json_llm = self.base_llm.bind(format="json")
|
|
115
|
+
api_keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
|
|
116
|
+
|
|
117
|
+
self.base_llm, self.json_llm = _build_llm(provider, model_name, api_keys)
|
|
284
118
|
|
|
285
119
|
self.prompt = PromptTemplate(
|
|
286
120
|
template=ANALYSIS_TEMPLATE + "\n\n{format_instructions}",
|
|
@@ -556,16 +390,17 @@ class AnalyzerAgent:
|
|
|
556
390
|
# ---------------------------------------------------------------------------
|
|
557
391
|
|
|
558
392
|
def _build_llm(provider: str, model_name: str, api_keys: dict):
|
|
559
|
-
"""
|
|
560
|
-
Shared LLM factory for all multi-agent classes.
|
|
561
|
-
Returns (base_llm, json_llm) — same pattern as AnalyzerAgent.__init__.
|
|
562
|
-
Raises ValueError on missing credentials.
|
|
563
|
-
"""
|
|
564
393
|
api_keys = api_keys or {}
|
|
565
394
|
|
|
566
395
|
if provider == "openai":
|
|
567
396
|
if not api_keys.get("openai"):
|
|
568
397
|
raise ValueError("OpenAI API key required.")
|
|
398
|
+
try:
|
|
399
|
+
from langchain_openai import ChatOpenAI
|
|
400
|
+
except ImportError:
|
|
401
|
+
raise ImportError(
|
|
402
|
+
"OpenAI provider requires: pip install coreinsight-cli[openai]"
|
|
403
|
+
)
|
|
569
404
|
llm = ChatOpenAI(
|
|
570
405
|
model=model_name,
|
|
571
406
|
api_key=api_keys["openai"],
|
|
@@ -575,8 +410,14 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
|
|
|
575
410
|
return llm, llm
|
|
576
411
|
|
|
577
412
|
if provider == "local_server":
|
|
413
|
+
try:
|
|
414
|
+
from langchain_openai import ChatOpenAI
|
|
415
|
+
except ImportError:
|
|
416
|
+
raise ImportError(
|
|
417
|
+
"local_server provider requires: pip install coreinsight-cli[openai]"
|
|
418
|
+
)
|
|
578
419
|
base_url = api_keys.get("local_url", "http://localhost:1234/v1")
|
|
579
|
-
_max_tokens = api_keys.pop("_predict", 4096)
|
|
420
|
+
_max_tokens = api_keys.pop("_predict", 4096)
|
|
580
421
|
llm = ChatOpenAI(
|
|
581
422
|
model=model_name,
|
|
582
423
|
api_key="not-needed",
|
|
@@ -590,6 +431,12 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
|
|
|
590
431
|
if provider == "anthropic":
|
|
591
432
|
if not api_keys.get("anthropic"):
|
|
592
433
|
raise ValueError("Anthropic API key required.")
|
|
434
|
+
try:
|
|
435
|
+
from langchain_anthropic import ChatAnthropic
|
|
436
|
+
except ImportError:
|
|
437
|
+
raise ImportError(
|
|
438
|
+
"Anthropic provider requires: pip install coreinsight-cli[anthropic]"
|
|
439
|
+
)
|
|
593
440
|
llm = ChatAnthropic(
|
|
594
441
|
model=model_name,
|
|
595
442
|
api_key=api_keys["anthropic"],
|
|
@@ -600,6 +447,12 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
|
|
|
600
447
|
if provider == "google":
|
|
601
448
|
if not api_keys.get("google"):
|
|
602
449
|
raise ValueError("Google Gemini API key required.")
|
|
450
|
+
try:
|
|
451
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
452
|
+
except ImportError:
|
|
453
|
+
raise ImportError(
|
|
454
|
+
"Google provider requires: pip install coreinsight-cli[google]"
|
|
455
|
+
)
|
|
603
456
|
llm = ChatGoogleGenerativeAI(
|
|
604
457
|
model=model_name,
|
|
605
458
|
google_api_key=api_keys["google"],
|
|
@@ -608,9 +461,7 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
|
|
|
608
461
|
)
|
|
609
462
|
return llm, llm
|
|
610
463
|
|
|
611
|
-
# Ollama default
|
|
612
|
-
# calling agent which knows its own model_tier.
|
|
613
|
-
# Default to medium-safe values; callers override via kwargs if needed.
|
|
464
|
+
# Ollama default
|
|
614
465
|
_ctx = api_keys.pop("_ctx", 8192)
|
|
615
466
|
_predict = api_keys.pop("_predict", 4096)
|
|
616
467
|
base = ChatOllama(
|
|
@@ -650,13 +501,13 @@ class BottleneckAgent:
|
|
|
650
501
|
api_keys: dict,
|
|
651
502
|
model_tier: str,
|
|
652
503
|
) -> None:
|
|
653
|
-
from coreinsight.prompts import
|
|
504
|
+
from coreinsight.prompts import BOTTLENECK_TEMPLATES, SYSTEM_PROMPT
|
|
654
505
|
self.model_tier = model_tier
|
|
655
506
|
self.parser = JsonOutputParser(pydantic_object=AuditResult)
|
|
656
507
|
self._base_llm, self._json_llm = _build_llm_tiered(provider, model_name, api_keys, model_tier)
|
|
657
508
|
|
|
658
509
|
self._prompt = PromptTemplate(
|
|
659
|
-
template=
|
|
510
|
+
template=BOTTLENECK_TEMPLATES[model_tier],
|
|
660
511
|
input_variables=[
|
|
661
512
|
"language", "code_content", "context", "hardware_target",
|
|
662
513
|
],
|
|
@@ -736,10 +587,10 @@ class OptimizerAgent:
|
|
|
736
587
|
api_keys: dict,
|
|
737
588
|
model_tier: str,
|
|
738
589
|
) -> None:
|
|
739
|
-
from coreinsight.prompts import
|
|
590
|
+
from coreinsight.prompts import OPTIMIZER_TEMPLATES
|
|
740
591
|
self.model_tier = model_tier
|
|
741
592
|
self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
|
|
742
|
-
self._template =
|
|
593
|
+
self._template = OPTIMIZER_TEMPLATES[model_tier]
|
|
743
594
|
|
|
744
595
|
def _extract_code(self, raw: str) -> str:
|
|
745
596
|
"""Reuse the same extraction logic as AnalyzerAgent."""
|
|
@@ -898,7 +749,13 @@ class HarnessAgent:
|
|
|
898
749
|
except Exception as e:
|
|
899
750
|
return False, f"Harness generation failed: {e}", None, 0
|
|
900
751
|
|
|
901
|
-
|
|
752
|
+
# Catch missing int main() before hitting the sandbox
|
|
753
|
+
if language in ("cpp", "c++") and "int main(" not in harness and "int main (" not in harness:
|
|
754
|
+
logs = "Missing CSV output (exit 1).\nFull output:\nundefined reference to `main'"
|
|
755
|
+
success = False
|
|
756
|
+
plot_data = None
|
|
757
|
+
else:
|
|
758
|
+
success, logs, plot_data = sandbox.execute_benchmark(harness, language)
|
|
902
759
|
is_valid = self._check_speedup(success, logs)
|
|
903
760
|
retries = 0
|
|
904
761
|
|
|
@@ -921,7 +778,12 @@ class HarnessAgent:
|
|
|
921
778
|
logs += f"\nFix generation failed: {e}"
|
|
922
779
|
break
|
|
923
780
|
|
|
924
|
-
|
|
781
|
+
if language in ("cpp", "c++") and "int main(" not in harness and "int main (" not in harness:
|
|
782
|
+
logs = "Missing CSV output (exit 1).\nFull output:\nundefined reference to `main'"
|
|
783
|
+
success = False
|
|
784
|
+
plot_data = None
|
|
785
|
+
else:
|
|
786
|
+
success, logs, plot_data = sandbox.execute_benchmark(harness, language)
|
|
925
787
|
is_valid = self._check_speedup(success, logs)
|
|
926
788
|
retries += 1
|
|
927
789
|
|
|
@@ -161,7 +161,6 @@ def _run_multi_agent(
|
|
|
161
161
|
optimized_code = multi_agents["optimizer"].generate(
|
|
162
162
|
func_name, original_code, result,
|
|
163
163
|
language, context, hardware_target,
|
|
164
|
-
stream_callback=stream_callback, # readable code, stream it
|
|
165
164
|
)
|
|
166
165
|
if not optimized_code or optimized_code == original_code:
|
|
167
166
|
return result, None, False, "", None, False
|
|
@@ -205,21 +204,78 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
|
|
|
205
204
|
_log(func_name, "Fetching RAG context...")
|
|
206
205
|
context = indexer.get_context_for_code(original_code) if indexer else ""
|
|
207
206
|
|
|
208
|
-
# 0b. Memory lookup — skip LLM
|
|
207
|
+
# 0b. Memory lookup — skip LLM if we've seen this pattern before,
|
|
208
|
+
# but validate the stored result before trusting it:
|
|
209
|
+
# Gate A: no optimized code stored → previous run was incomplete, re-run LLM
|
|
210
|
+
# Gate B: correctness < 50% last run → keep analysis, re-run correctness only
|
|
211
|
+
# Gate C: result is good → return as-is
|
|
209
212
|
if memory:
|
|
210
213
|
memory_hit = memory.lookup(original_code, language)
|
|
211
214
|
if memory_hit:
|
|
212
215
|
label = "exact match" if memory_hit.is_exact else f"similarity {memory_hit.similarity:.1%}"
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
"
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
216
|
+
|
|
217
|
+
# Gate A: stored result has no optimized code — not useful, fall through to LLM
|
|
218
|
+
if not memory_hit.optimized_code:
|
|
219
|
+
_log(func_name, f"Memory hit ({label}) — no optimized code stored, re-running LLM", style="yellow")
|
|
220
|
+
memory_hit = None # fall through; LLM path runs below as normal
|
|
221
|
+
|
|
222
|
+
# Gate B: correctness was poor last time — re-run the correctness check only
|
|
223
|
+
elif memory_hit.total_cases > 0 and memory_hit.correctness_cases / memory_hit.total_cases < 0.5:
|
|
224
|
+
_log(
|
|
225
|
+
func_name,
|
|
226
|
+
f"Memory hit ({label}) — correctness was "
|
|
227
|
+
f"{memory_hit.correctness_cases}/{memory_hit.total_cases} last run, re-checking",
|
|
228
|
+
style="yellow",
|
|
229
|
+
)
|
|
230
|
+
recalled_result = {
|
|
231
|
+
"severity": memory_hit.severity,
|
|
232
|
+
"issue": memory_hit.issue,
|
|
233
|
+
"reasoning": memory_hit.reasoning,
|
|
234
|
+
"optimized_code": memory_hit.optimized_code,
|
|
235
|
+
"suggestion": "",
|
|
236
|
+
"bottlenecks": [],
|
|
237
|
+
}
|
|
238
|
+
new_verification = None
|
|
239
|
+
if not getattr(sandbox, "disabled", False):
|
|
240
|
+
stored_cases = memory.lookup_test_cases(original_code)
|
|
241
|
+
if stored_cases:
|
|
242
|
+
_log(func_name, "Re-running correctness sandbox with stored test cases...", style="dim")
|
|
243
|
+
correctness = sandbox.verify_correctness_only(
|
|
244
|
+
original_code=original_code,
|
|
245
|
+
optimized_code=memory_hit.optimized_code,
|
|
246
|
+
original_func_name=func_name,
|
|
247
|
+
optimized_func_name=func_name,
|
|
248
|
+
test_cases=stored_cases,
|
|
249
|
+
language=language,
|
|
250
|
+
context=context,
|
|
251
|
+
)
|
|
252
|
+
_log(func_name, f"Re-verification: {correctness.passed_cases}/{correctness.total_cases} passed", style="dim")
|
|
253
|
+
try:
|
|
254
|
+
from coreinsight.sandbox import VerificationResult, SpeedupVerification
|
|
255
|
+
new_verification = VerificationResult(
|
|
256
|
+
speedup=SpeedupVerification(
|
|
257
|
+
verified=True,
|
|
258
|
+
computed_speedups=[memory_hit.avg_speedup] if memory_hit.avg_speedup else [],
|
|
259
|
+
details=f"Speedup recalled from memory: {memory_hit.avg_speedup:.2f}x",
|
|
260
|
+
),
|
|
261
|
+
correctness=correctness,
|
|
262
|
+
)
|
|
263
|
+
except Exception:
|
|
264
|
+
pass # verification display is non-critical
|
|
265
|
+
return func_name, recalled_result, None, None, new_verification, None, memory_hit, False
|
|
266
|
+
|
|
267
|
+
# Gate C: stored result is complete and correctness is acceptable
|
|
268
|
+
else:
|
|
269
|
+
_log(func_name, f"⚡ Recalled from memory ({label}) — skipping LLM", style="bold cyan")
|
|
270
|
+
recalled_result = {
|
|
271
|
+
"severity": memory_hit.severity,
|
|
272
|
+
"issue": memory_hit.issue,
|
|
273
|
+
"reasoning": memory_hit.reasoning,
|
|
274
|
+
"optimized_code": memory_hit.optimized_code,
|
|
275
|
+
"suggestion": "",
|
|
276
|
+
"bottlenecks": [],
|
|
277
|
+
}
|
|
278
|
+
return func_name, recalled_result, None, None, None, None, memory_hit, False
|
|
223
279
|
|
|
224
280
|
# ── Route: single-agent vs multi-agent ──────────────────────────
|
|
225
281
|
if agent_mode == "multi" and multi_agents:
|
|
@@ -240,8 +296,37 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
|
|
|
240
296
|
if result is None:
|
|
241
297
|
return func_name, None, None, f"❌ Analysis error: {logs}", None, None, None, False
|
|
242
298
|
|
|
299
|
+
# Retry gate: Low severity or missing optimized code often means the model
|
|
300
|
+
# defaulted to "looks fine" rather than truly auditing.
|
|
301
|
+
# Retry up to 2 times before accepting the conclusion.
|
|
302
|
+
_MAX_ANALYSIS_RETRIES = 2
|
|
303
|
+
_retry = 0
|
|
304
|
+
while (result.get("severity") == "Low" or not optimized_code) and _retry < _MAX_ANALYSIS_RETRIES:
|
|
305
|
+
_retry += 1
|
|
306
|
+
_log(func_name, f"Low/missing result — retrying analysis ({_retry}/{_MAX_ANALYSIS_RETRIES})...", style="yellow")
|
|
307
|
+
if agent_mode == "multi" and multi_agents:
|
|
308
|
+
result, optimized_code, success, logs, plot_data, is_valid_optimization = \
|
|
309
|
+
_run_multi_agent(
|
|
310
|
+
func_name, original_code, language, context,
|
|
311
|
+
hardware_target, sandbox, multi_agents, tier_limits,
|
|
312
|
+
stream_callback=stream_callback,
|
|
313
|
+
)
|
|
314
|
+
else:
|
|
315
|
+
result, optimized_code, success, logs, plot_data, is_valid_optimization = \
|
|
316
|
+
_run_single_agent(
|
|
317
|
+
func_name, original_code, language, context,
|
|
318
|
+
hardware_target, sandbox, agent, tier_limits,
|
|
319
|
+
stream_callback=stream_callback,
|
|
320
|
+
)
|
|
321
|
+
if result is None:
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
if result is None:
|
|
325
|
+
return func_name, None, None, f"❌ Analysis error after {_retry} retries: {logs}", None, None, None, False
|
|
326
|
+
|
|
243
327
|
if result.get("severity") == "Low" or not optimized_code:
|
|
244
|
-
|
|
328
|
+
confirmed = f" (confirmed after {_retry} retries)" if _retry > 0 else ""
|
|
329
|
+
return func_name, None, None, f"✅ No significant bottlenecks found{confirmed}.", None, None, None, False
|
|
245
330
|
|
|
246
331
|
# 3. Verification + AI-free hardware profiling
|
|
247
332
|
verification = None
|
|
@@ -288,11 +373,29 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
|
|
|
288
373
|
|
|
289
374
|
except Exception as e:
|
|
290
375
|
err_str = str(e)
|
|
291
|
-
|
|
292
|
-
|
|
376
|
+
err_low = err_str.lower()
|
|
377
|
+
if "context" in err_low and "limit" in err_low:
|
|
378
|
+
_log(func_name, "Context limit hit", style="bold yellow")
|
|
379
|
+
return func_name, None, None, (
|
|
380
|
+
"⚠️ Context limit — try a model with a larger context window, "
|
|
381
|
+
"or split the function into smaller pieces."
|
|
382
|
+
), None, None, None, False
|
|
383
|
+
if any(k in err_low for k in ("cannot connect", "connection refused", "docker")):
|
|
384
|
+
_log(func_name, "Docker unavailable", style="bold yellow")
|
|
385
|
+
return func_name, None, None, (
|
|
386
|
+
"⚠️ Docker is not running — start Docker Desktop and try again.\n"
|
|
387
|
+
" Skip the sandbox with: coreinsight analyze --no-docker <file>"
|
|
388
|
+
), None, None, None, False
|
|
389
|
+
if "timeout" in err_low or "timed out" in err_low:
|
|
390
|
+
_log(func_name, "Sandbox timed out", style="bold yellow")
|
|
391
|
+
return func_name, None, None, (
|
|
392
|
+
"⚠️ Sandbox timed out — the benchmark likely contains an infinite loop.\n"
|
|
393
|
+
" The LLM analysis result above is still valid."
|
|
394
|
+
), None, None, None, False
|
|
395
|
+
if "out of memory" in err_low or "oom" in err_low:
|
|
396
|
+
_log(func_name, "Sandbox OOM", style="bold yellow")
|
|
293
397
|
return func_name, None, None, (
|
|
294
|
-
|
|
295
|
-
f"Try a model with a larger context window, or split the function."
|
|
398
|
+
"⚠️ Sandbox ran out of memory. Try --no-docker or reduce the file size."
|
|
296
399
|
), None, None, None, False
|
|
297
400
|
_log(func_name, f"Failed: {e}", style="bold red")
|
|
298
401
|
return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
|
|
@@ -763,7 +866,16 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
|
|
|
763
866
|
|
|
764
867
|
except Exception as exc:
|
|
765
868
|
with print_lock:
|
|
766
|
-
|
|
869
|
+
exc_low = str(exc).lower()
|
|
870
|
+
if any(k in exc_low for k in ("docker", "cannot connect", "connection refused")):
|
|
871
|
+
console.print(f"[bold yellow]⚠️ {func['name']}: Docker unavailable — start Docker Desktop and retry.[/bold yellow]")
|
|
872
|
+
elif "timeout" in exc_low or "timed out" in exc_low:
|
|
873
|
+
console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox timed out.[/bold yellow]")
|
|
874
|
+
elif "out of memory" in exc_low or "oom" in exc_low:
|
|
875
|
+
console.print(f"[bold yellow]⚠️ {func['name']}: Sandbox ran out of memory.[/bold yellow]")
|
|
876
|
+
else:
|
|
877
|
+
from rich.markup import escape
|
|
878
|
+
console.print(f"[bold red]❌ {func['name']}: Unexpected error — {escape(str(exc))}[/bold red]")
|
|
767
879
|
|
|
768
880
|
console.print(Panel.fit(f"✅ [bold green]Analysis Complete![/bold green] Final report saved to:\n{report_path.absolute()}"))
|
|
769
881
|
|
|
@@ -917,7 +1029,8 @@ def _run_test_cmd(func_name: str, no_docker: bool = False):
|
|
|
917
1029
|
num_cases=tier_limits["num_test_cases"],
|
|
918
1030
|
)
|
|
919
1031
|
except Exception as exc:
|
|
920
|
-
|
|
1032
|
+
from rich.markup import escape
|
|
1033
|
+
console.print(f"[red]LLM error generating test cases: {escape(str(exc))}[/red]")
|
|
921
1034
|
return
|
|
922
1035
|
|
|
923
1036
|
if not test_cases:
|
|
@@ -1034,7 +1147,8 @@ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv
|
|
|
1034
1147
|
metadatas = all_records.get("metadatas", []) or []
|
|
1035
1148
|
ids = all_records.get("ids", []) or []
|
|
1036
1149
|
except Exception as exc:
|
|
1037
|
-
|
|
1150
|
+
from rich.markup import escape
|
|
1151
|
+
console.print(f"[red]Failed to read memory store: {escape(str(exc))}[/red]")
|
|
1038
1152
|
return
|
|
1039
1153
|
|
|
1040
1154
|
# Build the detail table
|