coreinsight-cli 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coreinsight_cli-0.2.7/coreinsight_cli.egg-info → coreinsight_cli-0.2.9}/PKG-INFO +1 -1
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/analyzer.py +109 -16
- coreinsight_cli-0.2.9/coreinsight/embeddings.py +103 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/indexer.py +2 -53
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/main.py +40 -8
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/memory.py +131 -59
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/profiler.py +265 -13
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/sandbox.py +18 -12
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/tui.py +277 -53
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9/coreinsight_cli.egg-info}/PKG-INFO +1 -1
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/SOURCES.txt +1 -2
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/pyproject.toml +1 -1
- coreinsight_cli-0.2.7/coreinsight/Dockerfile.cpp-sandbox +0 -2
- coreinsight_cli-0.2.7/coreinsight/Dockerfile.python-sandbox +0 -3
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/LICENSE +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/README.md +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/__init__.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/config.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/__init__.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/bad_loop.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/data_processor.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/slow.cpp +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/hardware.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/parser.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/prompts.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/scanner.py +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/entry_points.txt +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/requires.txt +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/top_level.txt +0 -0
- {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/setup.cfg +0 -0
|
@@ -14,6 +14,35 @@ from langchain_anthropic import ChatAnthropic
|
|
|
14
14
|
|
|
15
15
|
from coreinsight.prompts import SYSTEM_PROMPT, ANALYSIS_TEMPLATE, HARNESS_ADDENDUM
|
|
16
16
|
|
|
17
|
+
# Phrases that appear at the start of a truncated LLM response
|
|
18
|
+
_TRUNCATION_HINTS = (
|
|
19
|
+
"context length",
|
|
20
|
+
"context_length_exceeded",
|
|
21
|
+
"maximum context",
|
|
22
|
+
"token limit",
|
|
23
|
+
"finish_reason: length",
|
|
24
|
+
"finish_reason\":\"length",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def _is_truncated(raw: str) -> bool:
|
|
28
|
+
"""
|
|
29
|
+
Returns True if the raw LLM output looks like it was cut off mid-generation.
|
|
30
|
+
Catches both explicit error messages and structural truncation signs.
|
|
31
|
+
"""
|
|
32
|
+
if not raw or len(raw.strip()) < 20:
|
|
33
|
+
return True
|
|
34
|
+
low = raw.lower()
|
|
35
|
+
if any(hint in low for hint in _TRUNCATION_HINTS):
|
|
36
|
+
return True
|
|
37
|
+
stripped = raw.strip()
|
|
38
|
+
# JSON truncation: opened but never closed
|
|
39
|
+
if stripped.startswith("{") and not stripped.endswith("}"):
|
|
40
|
+
return True
|
|
41
|
+
# Code truncation: opens a block but ends mid-statement
|
|
42
|
+
if stripped.endswith(("...", "/*", "//", "\"", "'")):
|
|
43
|
+
return True
|
|
44
|
+
return False
|
|
45
|
+
|
|
17
46
|
logger = logging.getLogger(__name__)
|
|
18
47
|
|
|
19
48
|
|
|
@@ -163,12 +192,15 @@ class AnalyzerAgent:
|
|
|
163
192
|
self.json_llm = self.base_llm
|
|
164
193
|
|
|
165
194
|
elif provider == "local_server":
|
|
166
|
-
|
|
195
|
+
from coreinsight.prompts import ModelTier
|
|
196
|
+
base_url = api_keys.get("local_url", "http://localhost:1234/v1")
|
|
197
|
+
_max_tokens = 2048 if model_tier == ModelTier.SMALL else 4096
|
|
167
198
|
self.base_llm = ChatOpenAI(
|
|
168
199
|
model=model_name,
|
|
169
200
|
api_key="not-needed",
|
|
170
201
|
base_url=base_url,
|
|
171
202
|
temperature=0.1,
|
|
203
|
+
max_tokens=_max_tokens,
|
|
172
204
|
model_kwargs={"response_format": {"type": "json_object"}},
|
|
173
205
|
)
|
|
174
206
|
self.json_llm = self.base_llm
|
|
@@ -196,11 +228,20 @@ class AnalyzerAgent:
|
|
|
196
228
|
self.json_llm = self.base_llm
|
|
197
229
|
|
|
198
230
|
else: # Ollama default
|
|
231
|
+
from coreinsight.prompts import ModelTier
|
|
232
|
+
# Small models (7B) typically have 4096 native context.
|
|
233
|
+
# Asking for more causes silent degradation or OOM on the host.
|
|
234
|
+
# Medium/large local models can handle 8192 comfortably.
|
|
235
|
+
_ctx = 4096 if model_tier == ModelTier.SMALL else 8192
|
|
236
|
+
# num_predict: small models need room for JSON + code in one shot.
|
|
237
|
+
# Capping at 2048 for small prevents runaway generation that hits
|
|
238
|
+
# the limit mid-JSON and returns truncated garbage.
|
|
239
|
+
_predict = 2048 if model_tier == ModelTier.SMALL else 4096
|
|
199
240
|
self.base_llm = ChatOllama(
|
|
200
241
|
model=model_name,
|
|
201
242
|
temperature=0.1,
|
|
202
|
-
num_predict=
|
|
203
|
-
num_ctx=
|
|
243
|
+
num_predict=_predict,
|
|
244
|
+
num_ctx=_ctx,
|
|
204
245
|
)
|
|
205
246
|
self.json_llm = self.base_llm.bind(format="json")
|
|
206
247
|
|
|
@@ -258,14 +299,31 @@ class AnalyzerAgent:
|
|
|
258
299
|
def _invoke_code_chain(self, template: str, variables: dict, language: str) -> str:
|
|
259
300
|
"""Shared invocation + extraction logic for harness and fix chains."""
|
|
260
301
|
chain = PromptTemplate.from_template(template) | self.base_llm
|
|
261
|
-
|
|
302
|
+
try:
|
|
303
|
+
result = chain.invoke(variables)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
err = str(e).lower()
|
|
306
|
+
if any(h in err for h in _TRUNCATION_HINTS):
|
|
307
|
+
raise RuntimeError(
|
|
308
|
+
f"Model hit its context limit. Try a smaller file, fewer functions, "
|
|
309
|
+
f"or a model with a larger context window. Detail: {e}"
|
|
310
|
+
) from e
|
|
311
|
+
raise
|
|
262
312
|
raw = result.content if hasattr(result, "content") else str(result)
|
|
263
|
-
# Handle Anthropic returning a list of content blocks
|
|
264
313
|
if isinstance(raw, list):
|
|
265
314
|
raw = "\n".join(
|
|
266
315
|
item["text"] if isinstance(item, dict) and "text" in item else str(item)
|
|
267
316
|
for item in raw
|
|
268
317
|
)
|
|
318
|
+
if _is_truncated(raw):
|
|
319
|
+
logger.warning(
|
|
320
|
+
f"LLM output appears truncated (len={len(raw)}). "
|
|
321
|
+
f"Model likely hit its context/predict limit."
|
|
322
|
+
)
|
|
323
|
+
raise RuntimeError(
|
|
324
|
+
"Model output was truncated — hit context or token limit. "
|
|
325
|
+
"Try a model with a larger context window, or reduce the function size."
|
|
326
|
+
)
|
|
269
327
|
return self._extract_executable_code(raw)
|
|
270
328
|
|
|
271
329
|
def generate_harness(
|
|
@@ -421,12 +479,14 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
|
|
|
421
479
|
return llm, llm
|
|
422
480
|
|
|
423
481
|
if provider == "local_server":
|
|
424
|
-
base_url
|
|
482
|
+
base_url = api_keys.get("local_url", "http://localhost:1234/v1")
|
|
483
|
+
_max_tokens = api_keys.pop("_predict", 4096) # reuse same key as Ollama path
|
|
425
484
|
llm = ChatOpenAI(
|
|
426
485
|
model=model_name,
|
|
427
486
|
api_key="not-needed",
|
|
428
487
|
base_url=base_url,
|
|
429
488
|
temperature=0.1,
|
|
489
|
+
max_tokens=_max_tokens,
|
|
430
490
|
model_kwargs={"response_format": {"type": "json_object"}},
|
|
431
491
|
)
|
|
432
492
|
return llm, llm
|
|
@@ -452,16 +512,33 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
|
|
|
452
512
|
)
|
|
453
513
|
return llm, llm
|
|
454
514
|
|
|
455
|
-
# Ollama default
|
|
515
|
+
# Ollama default — context and predict budget are passed in from the
|
|
516
|
+
# calling agent which knows its own model_tier.
|
|
517
|
+
# Default to medium-safe values; callers override via kwargs if needed.
|
|
518
|
+
_ctx = api_keys.pop("_ctx", 8192)
|
|
519
|
+
_predict = api_keys.pop("_predict", 4096)
|
|
456
520
|
base = ChatOllama(
|
|
457
521
|
model=model_name,
|
|
458
522
|
temperature=0.1,
|
|
459
|
-
num_predict=
|
|
460
|
-
num_ctx=
|
|
523
|
+
num_predict=_predict,
|
|
524
|
+
num_ctx=_ctx,
|
|
461
525
|
)
|
|
462
526
|
return base, base.bind(format="json")
|
|
463
527
|
|
|
464
528
|
|
|
529
|
+
def _build_llm_tiered(provider: str, model_name: str, api_keys: dict, model_tier: str):
|
|
530
|
+
"""Wraps _build_llm with tier-aware context settings for local providers."""
|
|
531
|
+
from coreinsight.prompts import ModelTier
|
|
532
|
+
keys = dict(api_keys or {})
|
|
533
|
+
if provider == "ollama":
|
|
534
|
+
keys["_ctx"] = 4096 if model_tier == ModelTier.SMALL else 8192
|
|
535
|
+
keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
|
|
536
|
+
elif provider == "local_server":
|
|
537
|
+
# max_tokens controls response length — context window is server-side
|
|
538
|
+
keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
|
|
539
|
+
return _build_llm(provider, model_name, keys)
|
|
540
|
+
|
|
541
|
+
|
|
465
542
|
class BottleneckAgent:
|
|
466
543
|
"""
|
|
467
544
|
Agent 1 — analysis only.
|
|
@@ -480,7 +557,7 @@ class BottleneckAgent:
|
|
|
480
557
|
from coreinsight.prompts import BOTTLENECK_TEMPLATE, SYSTEM_PROMPT
|
|
481
558
|
self.model_tier = model_tier
|
|
482
559
|
self.parser = JsonOutputParser(pydantic_object=AuditResult)
|
|
483
|
-
self._base_llm, self._json_llm =
|
|
560
|
+
self._base_llm, self._json_llm = _build_llm_tiered(provider, model_name, api_keys, model_tier)
|
|
484
561
|
|
|
485
562
|
self._prompt = PromptTemplate(
|
|
486
563
|
template=BOTTLENECK_TEMPLATE,
|
|
@@ -544,7 +621,7 @@ class OptimizerAgent:
|
|
|
544
621
|
) -> None:
|
|
545
622
|
from coreinsight.prompts import OPTIMIZER_TEMPLATE
|
|
546
623
|
self.model_tier = model_tier
|
|
547
|
-
self._base_llm, _ =
|
|
624
|
+
self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
|
|
548
625
|
self._template = OPTIMIZER_TEMPLATE
|
|
549
626
|
|
|
550
627
|
def _extract_code(self, raw: str) -> str:
|
|
@@ -620,7 +697,7 @@ class HarnessAgent:
|
|
|
620
697
|
HARNESS_ADDENDUM_MULTI,
|
|
621
698
|
)
|
|
622
699
|
self.model_tier = model_tier
|
|
623
|
-
self._base_llm, _ =
|
|
700
|
+
self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
|
|
624
701
|
self._harness_tmpl = HARNESS_TEMPLATE_MULTI + HARNESS_ADDENDUM_MULTI.get(model_tier, "")
|
|
625
702
|
self._fix_tmpl = FIX_TEMPLATE_MULTI + HARNESS_ADDENDUM_MULTI.get(model_tier, "")
|
|
626
703
|
|
|
@@ -638,14 +715,28 @@ class HarnessAgent:
|
|
|
638
715
|
|
|
639
716
|
def _invoke(self, template: str, variables: dict) -> str:
|
|
640
717
|
chain = PromptTemplate.from_template(template) | self._base_llm
|
|
641
|
-
|
|
642
|
-
|
|
718
|
+
try:
|
|
719
|
+
result = chain.invoke(variables)
|
|
720
|
+
except Exception as e:
|
|
721
|
+
err = str(e).lower()
|
|
722
|
+
if any(h in err for h in _TRUNCATION_HINTS):
|
|
723
|
+
raise RuntimeError(
|
|
724
|
+
f"Model hit its context limit during harness generation. "
|
|
725
|
+
f"Detail: {e}"
|
|
726
|
+
) from e
|
|
727
|
+
raise
|
|
728
|
+
raw = result.content if hasattr(result, "content") else str(result)
|
|
643
729
|
if isinstance(raw, list):
|
|
644
730
|
raw = "\n".join(
|
|
645
731
|
item["text"] if isinstance(item, dict) and "text" in item
|
|
646
732
|
else str(item)
|
|
647
733
|
for item in raw
|
|
648
734
|
)
|
|
735
|
+
if _is_truncated(raw):
|
|
736
|
+
raise RuntimeError(
|
|
737
|
+
"Harness output was truncated — model hit its token limit. "
|
|
738
|
+
"Switching to fix loop with truncation note."
|
|
739
|
+
)
|
|
649
740
|
return self._extract_code(raw)
|
|
650
741
|
|
|
651
742
|
def _check_speedup(self, success: bool, logs: str) -> bool:
|
|
@@ -714,7 +805,9 @@ class HarnessAgent:
|
|
|
714
805
|
is_valid = self._check_speedup(success, logs)
|
|
715
806
|
retries += 1
|
|
716
807
|
|
|
717
|
-
if
|
|
808
|
+
if getattr(sandbox, 'disabled', False):
|
|
809
|
+
pass # skipped intentionally — don't annotate as failed
|
|
810
|
+
elif is_valid and retries > 0:
|
|
718
811
|
logs = f"(Succeeded after {retries} retries)\n" + logs
|
|
719
812
|
elif not is_valid:
|
|
720
813
|
logs = f"(Failed after {retries} retries)\n" + logs
|
|
@@ -738,7 +831,7 @@ class TestCaseAgent:
|
|
|
738
831
|
model_tier: str,
|
|
739
832
|
) -> None:
|
|
740
833
|
self.model_tier = model_tier
|
|
741
|
-
self._base_llm, _ =
|
|
834
|
+
self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
|
|
742
835
|
|
|
743
836
|
def generate(
|
|
744
837
|
self,
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
coreinsight/embeddings.py — Shared embedding utility
|
|
3
|
+
|
|
4
|
+
Single source of truth for embedding model loading used by both
|
|
5
|
+
memory.py (OptimizationMemory) and indexer.py (RepoIndexer).
|
|
6
|
+
|
|
7
|
+
Tries to load all-MiniLM-L6-v2 from local cache first.
|
|
8
|
+
Falls back to a deterministic hash-based embedder when offline
|
|
9
|
+
or when the model has not yet been downloaded.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import hashlib
|
|
14
|
+
import logging
|
|
15
|
+
import math
|
|
16
|
+
import os
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Tuple
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# All models cached here — never hits the network if already present
|
|
23
|
+
MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
|
|
24
|
+
MODEL_NAME = "all-MiniLM-L6-v2"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class _HashEmbeddingFunction:
|
|
28
|
+
"""
|
|
29
|
+
Deterministic offline fallback embedder.
|
|
30
|
+
|
|
31
|
+
Produces a 384-dim float vector from token overlap — no downloads,
|
|
32
|
+
no GPU, no network. Semantic quality is lower than MiniLM but RAG
|
|
33
|
+
and memory lookup still work via keyword/structural matching.
|
|
34
|
+
|
|
35
|
+
Run `coreinsight index` once while online to cache the real model.
|
|
36
|
+
"""
|
|
37
|
+
DIM = 384
|
|
38
|
+
|
|
39
|
+
def __call__(self, input: List[str]) -> List[List[float]]:
|
|
40
|
+
results = []
|
|
41
|
+
for text in input:
|
|
42
|
+
tokens = text.lower().split()
|
|
43
|
+
vec = [0.0] * self.DIM
|
|
44
|
+
for tok in tokens:
|
|
45
|
+
h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
|
|
46
|
+
vec[h % self.DIM] += 1.0
|
|
47
|
+
# L2 normalise so cosine distance works correctly
|
|
48
|
+
mag = math.sqrt(sum(x * x for x in vec)) or 1.0
|
|
49
|
+
results.append([x / mag for x in vec])
|
|
50
|
+
return results
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def load_embedding_fn() -> Tuple[object, str]:
|
|
54
|
+
"""
|
|
55
|
+
Load the sentence-transformer embedding function.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
(embedding_fn, label) where label is a human-readable string
|
|
59
|
+
indicating which embedder is active — shown in CLI output.
|
|
60
|
+
|
|
61
|
+
Strategy:
|
|
62
|
+
1. Pin HuggingFace cache to ~/.coreinsight/models so the model
|
|
63
|
+
is never re-downloaded on subsequent runs.
|
|
64
|
+
2. Probe the model with a dummy call to force-load weights now
|
|
65
|
+
rather than silently failing later during indexing or lookup.
|
|
66
|
+
3. On any failure (network error, disk full, offline) fall back
|
|
67
|
+
to _HashEmbeddingFunction with a visible warning.
|
|
68
|
+
"""
|
|
69
|
+
MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
|
|
71
|
+
# Pin cache dirs — must be set before chromadb.utils imports torch
|
|
72
|
+
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(MODEL_CACHE_DIR))
|
|
73
|
+
os.environ.setdefault("HF_HUB_CACHE", str(MODEL_CACHE_DIR))
|
|
74
|
+
# Allow download when online; callers that want strict offline can
|
|
75
|
+
# set HF_HUB_OFFLINE=1 in their environment before importing.
|
|
76
|
+
os.environ.setdefault("HF_HUB_OFFLINE", "0")
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
from chromadb.utils import embedding_functions as _ef
|
|
80
|
+
|
|
81
|
+
fn = _ef.SentenceTransformerEmbeddingFunction(model_name=MODEL_NAME)
|
|
82
|
+
|
|
83
|
+
# Force-load now so we catch errors here, not mid-analysis.
|
|
84
|
+
fn(["probe"])
|
|
85
|
+
|
|
86
|
+
label = f"{MODEL_NAME} (cached)"
|
|
87
|
+
logger.debug(f"Embedding model loaded: {label}")
|
|
88
|
+
return fn, label
|
|
89
|
+
|
|
90
|
+
except Exception as exc:
|
|
91
|
+
logger.warning(
|
|
92
|
+
f"SentenceTransformer unavailable ({exc}). "
|
|
93
|
+
f"Using offline hash embedder — semantic quality reduced. "
|
|
94
|
+
f"Run `coreinsight index` once while online to cache the model."
|
|
95
|
+
)
|
|
96
|
+
from rich.console import Console as _Console
|
|
97
|
+
_Console().print(
|
|
98
|
+
"[yellow]⚠ Embedding model unavailable (offline or not yet downloaded). "
|
|
99
|
+
"Using keyword-based fallback — RAG and memory recall will work but with "
|
|
100
|
+
"reduced semantic accuracy. "
|
|
101
|
+
"Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
|
|
102
|
+
)
|
|
103
|
+
return _HashEmbeddingFunction(), "hash-based (offline fallback)"
|
|
@@ -9,62 +9,11 @@ import chromadb
|
|
|
9
9
|
from chromadb.utils import embedding_functions
|
|
10
10
|
|
|
11
11
|
from coreinsight.parser import CodeParser
|
|
12
|
+
from coreinsight.embeddings import load_embedding_fn
|
|
12
13
|
|
|
13
14
|
console = Console()
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
15
16
|
|
|
16
|
-
# Local model cache — never hits the network if model is already here
|
|
17
|
-
_MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class _HashEmbeddingFunction:
|
|
21
|
-
"""
|
|
22
|
-
Deterministic offline fallback embedder.
|
|
23
|
-
Produces a 384-dim float vector from token overlap — no downloads, no GPU.
|
|
24
|
-
Semantic quality is lower than MiniLM but RAG still works via keyword matching.
|
|
25
|
-
"""
|
|
26
|
-
DIM = 384
|
|
27
|
-
|
|
28
|
-
def __call__(self, input: list[str]) -> list[list[float]]:
|
|
29
|
-
results = []
|
|
30
|
-
for text in input:
|
|
31
|
-
tokens = text.lower().split()
|
|
32
|
-
vec = [0.0] * self.DIM
|
|
33
|
-
for tok in tokens:
|
|
34
|
-
h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
|
|
35
|
-
vec[h % self.DIM] += 1.0
|
|
36
|
-
# L2 normalise
|
|
37
|
-
mag = math.sqrt(sum(x * x for x in vec)) or 1.0
|
|
38
|
-
results.append([x / mag for x in vec])
|
|
39
|
-
return results
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _load_embedding_fn():
|
|
43
|
-
"""
|
|
44
|
-
Try to load SentenceTransformer from local cache.
|
|
45
|
-
Falls back to _HashEmbeddingFunction if offline or model not cached.
|
|
46
|
-
"""
|
|
47
|
-
_MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
48
|
-
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(_MODEL_CACHE_DIR))
|
|
49
|
-
os.environ.setdefault("HF_HUB_OFFLINE", "0") # allow download when online
|
|
50
|
-
|
|
51
|
-
try:
|
|
52
|
-
fn = embedding_functions.SentenceTransformerEmbeddingFunction(
|
|
53
|
-
model_name="all-MiniLM-L6-v2",
|
|
54
|
-
)
|
|
55
|
-
# Probe: actually load the model now so we catch network errors here
|
|
56
|
-
# rather than silently later during indexing.
|
|
57
|
-
fn(["probe"])
|
|
58
|
-
return fn, "all-MiniLM-L6-v2 (cached)"
|
|
59
|
-
except Exception as e:
|
|
60
|
-
logger.warning(f"SentenceTransformer unavailable ({e}). Using offline hash embedder — semantic quality reduced.")
|
|
61
|
-
console.print(
|
|
62
|
-
"[yellow]⚠ Embedding model unavailable (offline or not yet downloaded). "
|
|
63
|
-
"Using keyword-based fallback — RAG will work but with reduced semantic accuracy. "
|
|
64
|
-
"Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
|
|
65
|
-
)
|
|
66
|
-
return _HashEmbeddingFunction(), "hash-based (offline fallback)"
|
|
67
|
-
|
|
68
17
|
|
|
69
18
|
class RepoIndexer:
|
|
70
19
|
def __init__(self, repo_path: str):
|
|
@@ -82,7 +31,7 @@ class RepoIndexer:
|
|
|
82
31
|
return True
|
|
83
32
|
try:
|
|
84
33
|
self._chroma_client = chromadb.PersistentClient(path=str(self.db_path))
|
|
85
|
-
self._embedding_fn, self._embedding_label =
|
|
34
|
+
self._embedding_fn, self._embedding_label = load_embedding_fn()
|
|
86
35
|
self._collection = self._chroma_client.get_or_create_collection(
|
|
87
36
|
name="codebase_context",
|
|
88
37
|
embedding_function=self._embedding_fn,
|
|
@@ -272,8 +272,15 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
|
|
|
272
272
|
return func_name, result, (success, logs, plot_data), None, verification, profiler_result, None, is_valid_optimization
|
|
273
273
|
|
|
274
274
|
except Exception as e:
|
|
275
|
+
err_str = str(e)
|
|
276
|
+
if "context" in err_str.lower() and "limit" in err_str.lower():
|
|
277
|
+
_log(func_name, f"Context limit hit: {e}", style="bold yellow")
|
|
278
|
+
return func_name, None, None, (
|
|
279
|
+
f"⚠️ Context limit: {err_str}\n"
|
|
280
|
+
f"Try a model with a larger context window, or split the function."
|
|
281
|
+
), None, None, None, False
|
|
275
282
|
_log(func_name, f"Failed: {e}", style="bold red")
|
|
276
|
-
return func_name, None, None, f"❌ Analysis failed: {
|
|
283
|
+
return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
|
|
277
284
|
|
|
278
285
|
def parse_csv_logs(logs: str):
|
|
279
286
|
"""Safely extracts CSV data from the sandbox logs."""
|
|
@@ -734,7 +741,7 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None):
|
|
|
734
741
|
finally:
|
|
735
742
|
console = _prev_console
|
|
736
743
|
|
|
737
|
-
def run_demo(lang: str = "python", no_docker: bool = False):
|
|
744
|
+
def run_demo(lang: str = "python", no_docker: bool = False, tui_console=None):
|
|
738
745
|
import shutil
|
|
739
746
|
import importlib.resources
|
|
740
747
|
|
|
@@ -797,18 +804,34 @@ def run_demo(lang: str = "python", no_docker: bool = False):
|
|
|
797
804
|
# For Python: auto-index so RAG cross-file context is showcased
|
|
798
805
|
if lang == "python":
|
|
799
806
|
console.print("[dim]Auto-indexing demo files to showcase RAG cross-file context...[/dim]")
|
|
800
|
-
|
|
801
|
-
|
|
807
|
+
try:
|
|
808
|
+
from coreinsight.indexer import RepoIndexer as _RepoIndexer
|
|
809
|
+
_RepoIndexer(str(demo_dir)).index_repository()
|
|
810
|
+
except Exception as _idx_err:
|
|
811
|
+
# Non-fatal — SQLite write conflicts can occur when running
|
|
812
|
+
# through the TUI. RAG context will be empty for this run.
|
|
813
|
+
console.print(f"[dim yellow]Indexing skipped (will retry next run): {_idx_err}[/dim yellow]")
|
|
802
814
|
console.print()
|
|
803
815
|
|
|
804
|
-
run_analysis(str(demo_dir / entry_file), no_docker=no_docker)
|
|
816
|
+
run_analysis(str(demo_dir / entry_file), no_docker=no_docker, tui_console=tui_console)
|
|
805
817
|
|
|
806
|
-
def _run_memory_cmd(clear: bool):
|
|
818
|
+
def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv"):
|
|
807
819
|
from coreinsight.memory import OptimizationMemory, MEMORY_DIR
|
|
808
820
|
import shutil
|
|
809
821
|
|
|
810
822
|
mem = OptimizationMemory()
|
|
811
823
|
|
|
824
|
+
if export_path:
|
|
825
|
+
count = mem.export(export_path, fmt=export_fmt)
|
|
826
|
+
if count:
|
|
827
|
+
console.print(
|
|
828
|
+
f"[bold green]✅ Exported {count} optimization(s) to "
|
|
829
|
+
f"[cyan]{export_path}[/cyan][/bold green]"
|
|
830
|
+
)
|
|
831
|
+
else:
|
|
832
|
+
console.print("[yellow]Nothing to export — memory store is empty.[/yellow]")
|
|
833
|
+
return
|
|
834
|
+
|
|
812
835
|
if clear:
|
|
813
836
|
if MEMORY_DIR.exists():
|
|
814
837
|
shutil.rmtree(MEMORY_DIR, ignore_errors=True)
|
|
@@ -930,7 +953,12 @@ def main_cli():
|
|
|
930
953
|
index_parser.add_argument("--dir", default=".", help="Directory to index")
|
|
931
954
|
|
|
932
955
|
memory_parser = subparsers.add_parser("memory", help="Inspect or clear the local optimization memory")
|
|
933
|
-
memory_parser.add_argument("--clear",
|
|
956
|
+
memory_parser.add_argument("--clear", action="store_true", help="Wipe the memory store")
|
|
957
|
+
memory_parser.add_argument("--export", dest="export_path", default=None,
|
|
958
|
+
help="Export memory to file (e.g. --export optimizations.csv)")
|
|
959
|
+
memory_parser.add_argument("--format", dest="export_fmt", default="csv",
|
|
960
|
+
choices=["csv", "md"],
|
|
961
|
+
help="Export format: csv (default) or md")
|
|
934
962
|
|
|
935
963
|
view_parser = subparsers.add_parser("view", help="Launch the interactive TUI")
|
|
936
964
|
view_parser.add_argument("--dir", default=".", help="Starting directory (default: current)")
|
|
@@ -954,7 +982,11 @@ def main_cli():
|
|
|
954
982
|
elif args.command == "analyze":
|
|
955
983
|
run_analysis(args.file, no_docker=getattr(args, "no_docker", False))
|
|
956
984
|
elif args.command == "memory":
|
|
957
|
-
_run_memory_cmd(
|
|
985
|
+
_run_memory_cmd(
|
|
986
|
+
clear=getattr(args, "clear", False),
|
|
987
|
+
export_path=getattr(args, "export_path", None),
|
|
988
|
+
export_fmt=getattr(args, "export_fmt", "csv"),
|
|
989
|
+
)
|
|
958
990
|
elif args.command == "scan":
|
|
959
991
|
scanner = ProjectScanner(args.dir)
|
|
960
992
|
scanner.scan_project(max_results=args.top)
|