coreinsight-cli 0.2.9__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. coreinsight_cli-0.3.0/PKG-INFO +173 -0
  2. coreinsight_cli-0.3.0/README.md +133 -0
  3. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/analyzer.py +155 -35
  4. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/config.py +27 -13
  5. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/main.py +47 -9
  6. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/sandbox.py +32 -4
  7. coreinsight_cli-0.3.0/coreinsight_cli.egg-info/PKG-INFO +173 -0
  8. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/pyproject.toml +1 -1
  9. coreinsight_cli-0.2.9/PKG-INFO +0 -290
  10. coreinsight_cli-0.2.9/README.md +0 -250
  11. coreinsight_cli-0.2.9/coreinsight_cli.egg-info/PKG-INFO +0 -290
  12. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/LICENSE +0 -0
  13. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/__init__.py +0 -0
  14. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/demo/__init__.py +0 -0
  15. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/demo/bad_loop.py +0 -0
  16. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/demo/data_processor.py +0 -0
  17. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/demo/slow.cpp +0 -0
  18. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/embeddings.py +0 -0
  19. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/hardware.py +0 -0
  20. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/indexer.py +0 -0
  21. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/memory.py +0 -0
  22. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/parser.py +0 -0
  23. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/profiler.py +0 -0
  24. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/prompts.py +0 -0
  25. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/scanner.py +0 -0
  26. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight/tui.py +0 -0
  27. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight_cli.egg-info/SOURCES.txt +0 -0
  28. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
  29. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight_cli.egg-info/entry_points.txt +0 -0
  30. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight_cli.egg-info/requires.txt +0 -0
  31. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/coreinsight_cli.egg-info/top_level.txt +0 -0
  32. {coreinsight_cli-0.2.9 → coreinsight_cli-0.3.0}/setup.cfg +0 -0
@@ -0,0 +1,173 @@
1
+ Metadata-Version: 2.4
2
+ Name: coreinsight-cli
3
+ Version: 0.3.0
4
+ Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
+ Author: Varun Jani
6
+ License: GPL-3.0-or-later
7
+ Project-URL: Homepage, https://github.com/Prais3/coreinsight_cli
8
+ Project-URL: Bug Tracker, https://github.com/Prais3/coreinsight_cli/issues
9
+ Keywords: performance,profiling,optimization,llm,cuda,cpp,python,hpc,benchmarking
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Topic :: Software Development :: Debuggers
18
+ Classifier: Topic :: System :: Hardware
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: rich>=13.0
23
+ Requires-Dist: docker>=6.0
24
+ Requires-Dist: tree-sitter==0.21.3
25
+ Requires-Dist: tree-sitter-languages
26
+ Requires-Dist: langchain>=0.2.0
27
+ Requires-Dist: langchain-core>=0.2.0
28
+ Requires-Dist: langchain-ollama>=0.1.0
29
+ Requires-Dist: langchain-google-genai>=1.0.0
30
+ Requires-Dist: langchain-openai>=0.1.0
31
+ Requires-Dist: langchain-anthropic>=0.1.0
32
+ Requires-Dist: pydantic>=2.0
33
+ Requires-Dist: chromadb>=0.5.0
34
+ Requires-Dist: sentence-transformers>=3.0.0
35
+ Requires-Dist: textual>=0.60.0
36
+ Requires-Dist: psutil>=5.9
37
+ Provides-Extra: compat
38
+ Requires-Dist: pysqlite3-binary>=0.5.0; extra == "compat"
39
+ Dynamic: license-file
40
+
41
+ # CoreInsight
42
+
43
+ **AI-powered performance profiler for Python, C++, and CUDA.**
44
+
45
+ CoreInsight finds hardware bottlenecks in your code, generates optimized replacements, and verifies the speedup mathematically inside an isolated Docker sandbox — all running locally on your machine.
46
+
47
+ ---
48
+
49
+ ## Install
50
+
51
+ ```bash
52
+ pip install coreinsight-cli
53
+ ```
54
+
55
+ **Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)
56
+
57
+ ---
58
+
59
+ ## Quick start
60
+
61
+ ```bash
62
+ # Configure your AI provider (defaults to Ollama + llama3.2)
63
+ coreinsight configure
64
+
65
+ # Run the built-in demo
66
+ coreinsight demo
67
+
68
+ # Analyse your own file
69
+ coreinsight analyze path/to/your_file.py
70
+ ```
71
+
72
+ ---
73
+
74
+ ## What it does
75
+
76
+ CoreInsight runs a full optimization pipeline on every function it extracts:
77
+
78
+ 1. **Bottleneck analysis**
79
+ 2. **Code generation**
80
+ 3. **Sandbox verification**
81
+ 4. **Hardware profiling**
82
+
83
+ Every result is stored in a local vector database. On repeat analyses, matching patterns are recalled instantly — no LLM call, no sandbox spin-up.
84
+
85
+ ---
86
+
87
+ ## Commands
88
+
89
+ | Command | Description |
90
+ |:--------|:------------|
91
+ | `coreinsight analyze <file>` | Analyse a `.py`, `.cpp`, or `.cu` file |
92
+ | `coreinsight demo [--lang cpp]` | Run on a built-in example |
93
+ | `coreinsight configure` | Set up AI provider and API keys |
94
+ | `coreinsight configure --pro-key <key>` | Activate Pro tier |
95
+ | `coreinsight memory` | Inspect stored optimizations |
96
+ | `coreinsight memory --clear` | Wipe the memory store |
97
+ | `coreinsight memory --export out.csv` | Export memory to CSV or Markdown |
98
+ | `coreinsight index [--dir <path>]` | Index a repo for cross-file RAG context |
99
+ | `coreinsight scan [--dir <path>]` | Rank hotspots by complexity without LLM |
100
+ | `coreinsight view` | Launch the interactive TUI |
101
+
102
+ All commands accept `--no-docker` to skip sandboxing when Docker is unavailable.
103
+
104
+ ---
105
+
106
+ ## Supported languages
107
+
108
+ | Language | Analysis | Benchmarking | Correctness |
109
+ |:---------|:--------:|:------------:|:-----------:|
110
+ | Python | ✅ | ✅ | ✅ |
111
+ | C++ | ✅ | ✅ | ✅ |
112
+ | CUDA | ✅ | ✅ | — |
113
+
114
+ ---
115
+
116
+ ## AI providers
117
+
118
+ | Provider | Tier | Notes |
119
+ |:---------|:----:|:------|
120
+ | Ollama | Free | `ollama pull llama3.2` |
121
+ | LM Studio / vLLM | Free | Any OpenAI-compatible server |
122
+ | OpenAI | Pro | GPT 5.3 recommended |
123
+ | Anthropic | Pro | Claude 4.6 Sonnet recommended |
124
+ | Google Gemini | Pro | Gemini 2.5 Pro recommended |
125
+
126
+ Local providers run entirely on-device. No code leaves your machine unless you configure a cloud provider.
127
+
128
+ ---
129
+
130
+ ## Pro — free during beta
131
+
132
+ Pro unlocks cloud providers and AI-free hardware profiling.
133
+ Keys are being distributed manually during the beta.
134
+
135
+ **Request a key → [tally.so/r/xXZ9YE](https://tally.so/r/xXZ9YE)**
136
+
137
+ ```bash
138
+ coreinsight configure --pro-key <your-key>
139
+ ```
140
+
141
+ ---
142
+
143
+ ## Privacy
144
+
145
+ - **Local providers** — nothing leaves your machine
146
+ - **Cloud providers** — only the function code you analyse is sent to the provider API, under your own key
147
+ - The memory store lives at `~/.coreinsight/memory_db` on your filesystem
148
+
149
+ ---
150
+
151
+ ## Troubleshooting
152
+
153
+ **Docker not running**
154
+ ```
155
+ open Docker Desktop, or: sudo systemctl start docker
156
+ ```
157
+
158
+ **Ollama model not found**
159
+ ```bash
160
+ ollama pull llama3.2
161
+ ```
162
+
163
+ **ChromaDB / SQLite error**
164
+ ```bash
165
+ pip install pysqlite3-binary
166
+ ```
167
+
168
+ ---
169
+
170
+ ## Links
171
+
172
+ - PyPI: [pypi.org/project/coreinsight-cli](https://pypi.org/project/coreinsight-cli/)
173
+ - GitHub: [github.com/Prais3/coreinsight_cli](https://github.com/Prais3/coreinsight_cli)
@@ -0,0 +1,133 @@
1
+ # CoreInsight
2
+
3
+ **AI-powered performance profiler for Python, C++, and CUDA.**
4
+
5
+ CoreInsight finds hardware bottlenecks in your code, generates optimized replacements, and verifies the speedup mathematically inside an isolated Docker sandbox — all running locally on your machine.
6
+
7
+ ---
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install coreinsight-cli
13
+ ```
14
+
15
+ **Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)
16
+
17
+ ---
18
+
19
+ ## Quick start
20
+
21
+ ```bash
22
+ # Configure your AI provider (defaults to Ollama + llama3.2)
23
+ coreinsight configure
24
+
25
+ # Run the built-in demo
26
+ coreinsight demo
27
+
28
+ # Analyse your own file
29
+ coreinsight analyze path/to/your_file.py
30
+ ```
31
+
32
+ ---
33
+
34
+ ## What it does
35
+
36
+ CoreInsight runs a full optimization pipeline on every function it extracts:
37
+
38
+ 1. **Bottleneck analysis**
39
+ 2. **Code generation**
40
+ 3. **Sandbox verification**
41
+ 4. **Hardware profiling**
42
+
43
+ Every result is stored in a local vector database. On repeat analyses, matching patterns are recalled instantly — no LLM call, no sandbox spin-up.
44
+
45
+ ---
46
+
47
+ ## Commands
48
+
49
+ | Command | Description |
50
+ |:--------|:------------|
51
+ | `coreinsight analyze <file>` | Analyse a `.py`, `.cpp`, or `.cu` file |
52
+ | `coreinsight demo [--lang cpp]` | Run on a built-in example |
53
+ | `coreinsight configure` | Set up AI provider and API keys |
54
+ | `coreinsight configure --pro-key <key>` | Activate Pro tier |
55
+ | `coreinsight memory` | Inspect stored optimizations |
56
+ | `coreinsight memory --clear` | Wipe the memory store |
57
+ | `coreinsight memory --export out.csv` | Export memory to CSV or Markdown |
58
+ | `coreinsight index [--dir <path>]` | Index a repo for cross-file RAG context |
59
+ | `coreinsight scan [--dir <path>]` | Rank hotspots by complexity without LLM |
60
+ | `coreinsight view` | Launch the interactive TUI |
61
+
62
+ All commands accept `--no-docker` to skip sandboxing when Docker is unavailable.
63
+
64
+ ---
65
+
66
+ ## Supported languages
67
+
68
+ | Language | Analysis | Benchmarking | Correctness |
69
+ |:---------|:--------:|:------------:|:-----------:|
70
+ | Python | ✅ | ✅ | ✅ |
71
+ | C++ | ✅ | ✅ | ✅ |
72
+ | CUDA | ✅ | ✅ | — |
73
+
74
+ ---
75
+
76
+ ## AI providers
77
+
78
+ | Provider | Tier | Notes |
79
+ |:---------|:----:|:------|
80
+ | Ollama | Free | `ollama pull llama3.2` |
81
+ | LM Studio / vLLM | Free | Any OpenAI-compatible server |
82
+ | OpenAI | Pro | GPT 5.3 recommended |
83
+ | Anthropic | Pro | Claude 4.6 Sonnet recommended |
84
+ | Google Gemini | Pro | Gemini 2.5 Pro recommended |
85
+
86
+ Local providers run entirely on-device. No code leaves your machine unless you configure a cloud provider.
87
+
88
+ ---
89
+
90
+ ## Pro — free during beta
91
+
92
+ Pro unlocks cloud providers and AI-free hardware profiling.
93
+ Keys are being distributed manually during the beta.
94
+
95
+ **Request a key → [tally.so/r/xXZ9YE](https://tally.so/r/xXZ9YE)**
96
+
97
+ ```bash
98
+ coreinsight configure --pro-key <your-key>
99
+ ```
100
+
101
+ ---
102
+
103
+ ## Privacy
104
+
105
+ - **Local providers** — nothing leaves your machine
106
+ - **Cloud providers** — only the function code you analyse is sent to the provider API, under your own key
107
+ - The memory store lives at `~/.coreinsight/memory_db` on your filesystem
108
+
109
+ ---
110
+
111
+ ## Troubleshooting
112
+
113
+ **Docker not running**
114
+ ```
115
+ open Docker Desktop, or: sudo systemctl start docker
116
+ ```
117
+
118
+ **Ollama model not found**
119
+ ```bash
120
+ ollama pull llama3.2
121
+ ```
122
+
123
+ **ChromaDB / SQLite error**
124
+ ```bash
125
+ pip install pysqlite3-binary
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Links
131
+
132
+ - PyPI: [pypi.org/project/coreinsight-cli](https://pypi.org/project/coreinsight-cli/)
133
+ - GitHub: [github.com/Prais3/coreinsight_cli](https://github.com/Prais3/coreinsight_cli)
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import logging
3
- from typing import Optional, List
3
+ from typing import Callable, Optional, List
4
4
  from pydantic import BaseModel, Field
5
5
 
6
6
  from langchain_core.output_parsers import JsonOutputParser
@@ -45,6 +45,43 @@ def _is_truncated(raw: str) -> bool:
45
45
 
46
46
  logger = logging.getLogger(__name__)
47
47
 
48
+ # -------------------------------------------------------------------------------------
49
+ # Prompt compression - SMALL-tier models (≤7B) within their 4 096-token context budget.
50
+ # -------------------------------------------------------------------------------------
51
+ _SMALL_CONTEXT_CHAR_LIMIT = 1_200 # ~300 tokens — enough for signatures
52
+ _SMALL_CODE_CHAR_LIMIT = 2_000 # ~500 tokens — function body cap
53
+
54
+ def _compress_for_small_model(
55
+ context: str,
56
+ code: str,
57
+ model_tier: str,
58
+ ) -> tuple:
59
+ """
60
+ Aggressively trims RAG context and target code for SMALL-tier models so
61
+ the entire prompt + format instructions + response fit within 4 096 tokens.
62
+ Returns (compressed_context, compressed_code). No-op for MEDIUM / LARGE.
63
+ """
64
+ from coreinsight.prompts import ModelTier
65
+ if model_tier != ModelTier.SMALL:
66
+ return context, code
67
+
68
+ if context and len(context) > _SMALL_CONTEXT_CHAR_LIMIT:
69
+ context = (
70
+ context[:_SMALL_CONTEXT_CHAR_LIMIT]
71
+ + "\n\n# [context truncated — top dependencies shown only]"
72
+ )
73
+
74
+ if code and len(code) > _SMALL_CODE_CHAR_LIMIT:
75
+ lines = code.splitlines()
76
+ kept = lines[:60]
77
+ if len(lines) > 60:
78
+ kept.append(
79
+ f"# ... [{len(lines) - 60} lines truncated for small model]"
80
+ )
81
+ code = "\n".join(kept)
82
+
83
+ return context, code
84
+
48
85
 
49
86
  class Bottleneck(BaseModel):
50
87
  line: int = Field(description="The approximate line number of the issue in the original code")
@@ -255,30 +292,59 @@ class AnalyzerAgent:
255
292
  )
256
293
  self.chain = self.prompt | self.json_llm | self.parser
257
294
 
258
- def analyze(self, code: str, language: str, context: str = "", hardware_target: str = "Generic CPU"):
295
+ def analyze(
296
+ self,
297
+ code: str,
298
+ language: str,
299
+ context: str = "",
300
+ hardware_target: str = "Generic CPU",
301
+ stream_callback: Optional[Callable[[str], None]] = None,
302
+ ):
303
+ context, code = _compress_for_small_model(context, code, self.model_tier)
259
304
  try:
305
+ if stream_callback is not None:
306
+ # Stream raw tokens → accumulate → parse at end.
307
+ # Keeps the cursor alive on slow local models instead of hanging.
308
+ raw_chain = self.prompt | self.json_llm
309
+ accumulated = ""
310
+ for chunk in raw_chain.stream({
311
+ "language": language,
312
+ "code_content": code,
313
+ "context": context,
314
+ "hardware_target": hardware_target,
315
+ }):
316
+ token = chunk.content if hasattr(chunk, "content") else str(chunk)
317
+ if isinstance(token, list):
318
+ token = "".join(
319
+ t.get("text", "") if isinstance(t, dict) else str(t)
320
+ for t in token
321
+ )
322
+ if token:
323
+ accumulated += token
324
+ stream_callback(token)
325
+ return self.parser.parse(accumulated)
260
326
  return self.chain.invoke({
261
- "language": language,
262
- "code_content": code,
263
- "context": context,
327
+ "language": language,
328
+ "code_content": code,
329
+ "context": context,
264
330
  "hardware_target": hardware_target,
265
331
  })
266
332
  except OutputParserException:
267
333
  return {
268
- "severity": "Error",
269
- "issue": "AI Output Parsing Failed",
270
- "reasoning": "The model failed to return valid JSON.",
271
- "suggestion": "Try running the analysis again or use a larger parameter model.",
272
- "bottlenecks": [],
334
+ "severity": "Error",
335
+ "issue": "AI Output Parsing Failed",
336
+ "reasoning": "The model failed to return valid JSON.",
337
+ "suggestion": "Try running the analysis again or use a larger parameter model.",
338
+ "bottlenecks": [],
273
339
  "optimized_code": None,
274
340
  }
275
341
  except Exception as e:
276
342
  return {
277
- "severity": "Error",
278
- "issue": str(e),
279
- "reasoning": "System error during analysis pipeline.",
280
- "suggestion": "Check LLM API keys and connectivity.",
281
- "bottlenecks": [],
343
+ "severity": "Error",
344
+ "issue": str(e),
345
+ "reasoning": "System error during analysis pipeline.",
346
+ "suggestion": "Check LLM API keys and connectivity.",
347
+ "bottlenecks": [],
282
348
  "optimized_code": None,
283
349
  }
284
350
 
@@ -296,11 +362,37 @@ class AnalyzerAgent:
296
362
  lines.pop(0)
297
363
  return "\n".join(lines).strip()
298
364
 
299
- def _invoke_code_chain(self, template: str, variables: dict, language: str) -> str:
365
+ def _invoke_code_chain(
366
+ self,
367
+ template: str,
368
+ variables: dict,
369
+ language: str,
370
+ stream_callback: Optional[Callable[[str], None]] = None,
371
+ ) -> str:
300
372
  """Shared invocation + extraction logic for harness and fix chains."""
301
373
  chain = PromptTemplate.from_template(template) | self.base_llm
302
374
  try:
303
- result = chain.invoke(variables)
375
+ if stream_callback is not None:
376
+ accumulated = ""
377
+ for chunk in chain.stream(variables):
378
+ token = chunk.content if hasattr(chunk, "content") else str(chunk)
379
+ if isinstance(token, list):
380
+ token = "".join(
381
+ t.get("text", "") if isinstance(t, dict) else str(t)
382
+ for t in token
383
+ )
384
+ if token:
385
+ accumulated += token
386
+ stream_callback(token)
387
+ raw = accumulated
388
+ else:
389
+ result = chain.invoke(variables)
390
+ raw = result.content if hasattr(result, "content") else str(result)
391
+ if isinstance(raw, list):
392
+ raw = "\n".join(
393
+ item["text"] if isinstance(item, dict) and "text" in item else str(item)
394
+ for item in raw
395
+ )
304
396
  except Exception as e:
305
397
  err = str(e).lower()
306
398
  if any(h in err for h in _TRUNCATION_HINTS):
@@ -309,12 +401,6 @@ class AnalyzerAgent:
309
401
  f"or a model with a larger context window. Detail: {e}"
310
402
  ) from e
311
403
  raise
312
- raw = result.content if hasattr(result, "content") else str(result)
313
- if isinstance(raw, list):
314
- raw = "\n".join(
315
- item["text"] if isinstance(item, dict) and "text" in item else str(item)
316
- for item in raw
317
- )
318
404
  if _is_truncated(raw):
319
405
  logger.warning(
320
406
  f"LLM output appears truncated (len={len(raw)}). "
@@ -334,21 +420,26 @@ class AnalyzerAgent:
334
420
  language: str,
335
421
  context: str = "",
336
422
  hardware_target: str = "Generic CPU",
423
+ stream_callback: Optional[Callable[[str], None]] = None,
337
424
  ) -> str:
338
425
  try:
426
+ context, original_code = _compress_for_small_model(
427
+ context, original_code, self.model_tier
428
+ )
339
429
  tiered_template = _HARNESS_TEMPLATE + HARNESS_ADDENDUM.get(self.model_tier, "")
340
-
430
+
341
431
  return self._invoke_code_chain(
342
432
  tiered_template,
343
433
  {
344
- "language": language,
345
- "func_name": func_name,
346
- "original": original_code,
347
- "optimized": optimized_code,
348
- "context": context,
434
+ "language": language,
435
+ "func_name": func_name,
436
+ "original": original_code,
437
+ "optimized": optimized_code,
438
+ "context": context,
349
439
  "hardware_target": hardware_target,
350
440
  },
351
441
  language,
442
+ stream_callback=stream_callback,
352
443
  )
353
444
  except Exception as e:
354
445
  is_python = language.lower() == "python"
@@ -365,21 +456,26 @@ class AnalyzerAgent:
365
456
  error_logs: str,
366
457
  language: str,
367
458
  context: str = "",
459
+ stream_callback: Optional[Callable[[str], None]] = None,
368
460
  ) -> str:
369
461
  try:
462
+ context, original_code = _compress_for_small_model(
463
+ context, original_code, self.model_tier
464
+ )
370
465
  tiered_template = _FIX_TEMPLATE + HARNESS_ADDENDUM.get(self.model_tier, "")
371
-
466
+
372
467
  return self._invoke_code_chain(
373
468
  tiered_template,
374
469
  {
375
- "language": language,
376
- "func_name": func_name,
377
- "original": original_code,
378
- "bad_harness": bad_harness,
470
+ "language": language,
471
+ "func_name": func_name,
472
+ "original": original_code,
473
+ "bad_harness":bad_harness,
379
474
  "error_logs": error_logs,
380
- "context": context,
475
+ "context": context,
381
476
  },
382
477
  language,
478
+ stream_callback=stream_callback,
383
479
  )
384
480
  except Exception as e:
385
481
  is_python = language.lower() == "python"
@@ -577,8 +673,29 @@ class BottleneckAgent:
577
673
  language: str,
578
674
  context: str = "",
579
675
  hardware_target: str = "Generic CPU",
676
+ stream_callback: Optional[Callable[[str], None]] = None,
580
677
  ) -> dict:
678
+ context, code = _compress_for_small_model(context, code, self.model_tier)
581
679
  try:
680
+ if stream_callback is not None:
681
+ raw_chain = self._prompt | self._json_llm
682
+ accumulated = ""
683
+ for chunk in raw_chain.stream({
684
+ "language": language,
685
+ "code_content": code,
686
+ "context": context,
687
+ "hardware_target": hardware_target,
688
+ }):
689
+ token = chunk.content if hasattr(chunk, "content") else str(chunk)
690
+ if isinstance(token, list):
691
+ token = "".join(
692
+ t.get("text", "") if isinstance(t, dict) else str(t)
693
+ for t in token
694
+ )
695
+ if token:
696
+ accumulated += token
697
+ stream_callback(token)
698
+ return self.parser.parse(accumulated)
582
699
  return self._chain.invoke({
583
700
  "language": language,
584
701
  "code_content": code,
@@ -651,6 +768,9 @@ class OptimizerAgent:
651
768
  Returns original_code on any failure so the pipeline can continue.
652
769
  """
653
770
  try:
771
+ context, original_code = _compress_for_small_model(
772
+ context or "", original_code, self.model_tier
773
+ )
654
774
  chain = PromptTemplate.from_template(self._template) | self._base_llm
655
775
  result = chain.invoke({
656
776
  "language": language,
@@ -1,4 +1,6 @@
1
1
  import json
2
+ import hashlib
3
+ import urllib
2
4
  from pathlib import Path
3
5
  from rich.console import Console
4
6
  from rich.prompt import Prompt, Confirm
@@ -8,17 +10,17 @@ CONFIG_FILE = Path.home() / ".coreinsight" / "config.json"
8
10
 
9
11
  PRO_WAITLIST_URL = "https://tally.so/r/xXZ9YE"
10
12
 
11
- # Raw URL of GitHub Gist - beta testing for new pro users
12
- PRO_KEYS_GIST_URL = "https://gist.githubusercontent.com/Prais3/4a57cf927734c6678602ff2066fc080c/raw/b4347c6ffea869490afb9a828802ec882ecd0eca/valid_keys.json"
13
+ # Cloudflare Worker endpoint for Pro key validation (v0.3.0+)
14
+ PRO_KEY_VALIDATION_URL = "https://coreinsight.coreinsight-dev.workers.dev/"
13
15
 
14
16
  CLOUD_PROVIDERS = ["openai", "anthropic", "google"]
15
17
 
16
18
  FREE_TIER_LIMITS = {
17
- "max_functions": 3,
18
- "max_retries": 2,
19
- "num_test_cases": 8,
19
+ "max_functions": None, # unlimited
20
+ "max_retries": 3,
21
+ "num_test_cases": 5,
20
22
  "hardware_profiling": False,
21
- "max_files": 2,
23
+ "max_files": None,
22
24
  }
23
25
 
24
26
  PRO_TIER_LIMITS = {
@@ -106,11 +108,20 @@ def run_configure(pro_key: str = None, agent_mode: str = None):
106
108
  key_hash = hashlib.sha256(pro_key.encode()).hexdigest()
107
109
 
108
110
  try:
109
- req = urllib.request.Request(PRO_KEYS_GIST_URL)
110
- with urllib.request.urlopen(req, timeout=5) as response:
111
- valid_hashes = json.loads(response.read().decode())
112
-
113
- if key_hash in valid_hashes:
111
+ payload = json.dumps({"hash": key_hash}).encode()
112
+ req = urllib.request.Request(
113
+ PRO_KEY_VALIDATION_URL,
114
+ data=payload,
115
+ headers={
116
+ "Content-Type": "application/json",
117
+ "User-Agent": "coreinsight-cli/0.3.0",
118
+ },
119
+ method="POST",
120
+ )
121
+ with urllib.request.urlopen(req, timeout=8) as response:
122
+ result = json.loads(response.read().decode())
123
+
124
+ if result.get("valid"):
114
125
  config["pro"] = True
115
126
  save_config(config)
116
127
  console.print("[bold green]✅ Pro tier activated![/bold green]")
@@ -118,8 +129,11 @@ def run_configure(pro_key: str = None, agent_mode: str = None):
118
129
  config["pro"] = False
119
130
  save_config(config)
120
131
  console.print("[red]❌ Invalid or revoked Pro key.[/red]")
121
- except Exception as e:
122
- console.print("[red]⚠️ Could not verify key. Please check your internet connection or try again later.[/red]")
132
+ except Exception:
133
+ console.print(
134
+ "[red]⚠️ Could not verify key — check your internet connection "
135
+ "or try again later.[/red]"
136
+ )
123
137
  return
124
138
 
125
139
  if agent_mode is not None: