coreinsight-cli 0.2.7__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {coreinsight_cli-0.2.7/coreinsight_cli.egg-info → coreinsight_cli-0.2.9}/PKG-INFO +1 -1
  2. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/analyzer.py +109 -16
  3. coreinsight_cli-0.2.9/coreinsight/embeddings.py +103 -0
  4. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/indexer.py +2 -53
  5. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/main.py +40 -8
  6. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/memory.py +131 -59
  7. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/profiler.py +265 -13
  8. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/sandbox.py +18 -12
  9. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/tui.py +277 -53
  10. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9/coreinsight_cli.egg-info}/PKG-INFO +1 -1
  11. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/SOURCES.txt +1 -2
  12. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/pyproject.toml +1 -1
  13. coreinsight_cli-0.2.7/coreinsight/Dockerfile.cpp-sandbox +0 -2
  14. coreinsight_cli-0.2.7/coreinsight/Dockerfile.python-sandbox +0 -3
  15. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/LICENSE +0 -0
  16. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/README.md +0 -0
  17. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/__init__.py +0 -0
  18. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/config.py +0 -0
  19. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/__init__.py +0 -0
  20. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/bad_loop.py +0 -0
  21. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/data_processor.py +0 -0
  22. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/demo/slow.cpp +0 -0
  23. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/hardware.py +0 -0
  24. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/parser.py +0 -0
  25. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/prompts.py +0 -0
  26. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight/scanner.py +0 -0
  27. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
  28. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/entry_points.txt +0 -0
  29. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/requires.txt +0 -0
  30. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/coreinsight_cli.egg-info/top_level.txt +0 -0
  31. {coreinsight_cli-0.2.7 → coreinsight_cli-0.2.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coreinsight-cli
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
5
  Author: Varun Jani
6
6
  License: GPL-3.0-or-later
@@ -14,6 +14,35 @@ from langchain_anthropic import ChatAnthropic
14
14
 
15
15
  from coreinsight.prompts import SYSTEM_PROMPT, ANALYSIS_TEMPLATE, HARNESS_ADDENDUM
16
16
 
17
+ # Phrases that appear at the start of a truncated LLM response
18
+ _TRUNCATION_HINTS = (
19
+ "context length",
20
+ "context_length_exceeded",
21
+ "maximum context",
22
+ "token limit",
23
+ "finish_reason: length",
24
+ "finish_reason\":\"length",
25
+ )
26
+
27
+ def _is_truncated(raw: str) -> bool:
28
+ """
29
+ Returns True if the raw LLM output looks like it was cut off mid-generation.
30
+ Catches both explicit error messages and structural truncation signs.
31
+ """
32
+ if not raw or len(raw.strip()) < 20:
33
+ return True
34
+ low = raw.lower()
35
+ if any(hint in low for hint in _TRUNCATION_HINTS):
36
+ return True
37
+ stripped = raw.strip()
38
+ # JSON truncation: opened but never closed
39
+ if stripped.startswith("{") and not stripped.endswith("}"):
40
+ return True
41
+ # Code truncation: opens a block but ends mid-statement
42
+ if stripped.endswith(("...", "/*", "//", "\"", "'")):
43
+ return True
44
+ return False
45
+
17
46
  logger = logging.getLogger(__name__)
18
47
 
19
48
 
@@ -163,12 +192,15 @@ class AnalyzerAgent:
163
192
  self.json_llm = self.base_llm
164
193
 
165
194
  elif provider == "local_server":
166
- base_url = api_keys.get("local_url", "http://localhost:1234/v1")
195
+ from coreinsight.prompts import ModelTier
196
+ base_url = api_keys.get("local_url", "http://localhost:1234/v1")
197
+ _max_tokens = 2048 if model_tier == ModelTier.SMALL else 4096
167
198
  self.base_llm = ChatOpenAI(
168
199
  model=model_name,
169
200
  api_key="not-needed",
170
201
  base_url=base_url,
171
202
  temperature=0.1,
203
+ max_tokens=_max_tokens,
172
204
  model_kwargs={"response_format": {"type": "json_object"}},
173
205
  )
174
206
  self.json_llm = self.base_llm
@@ -196,11 +228,20 @@ class AnalyzerAgent:
196
228
  self.json_llm = self.base_llm
197
229
 
198
230
  else: # Ollama default
231
+ from coreinsight.prompts import ModelTier
232
+ # Small models (7B) typically have 4096 native context.
233
+ # Asking for more causes silent degradation or OOM on the host.
234
+ # Medium/large local models can handle 8192 comfortably.
235
+ _ctx = 4096 if model_tier == ModelTier.SMALL else 8192
236
+ # num_predict: small models need room for JSON + code in one shot.
237
+ # Capping at 2048 for small prevents runaway generation that hits
238
+ # the limit mid-JSON and returns truncated garbage.
239
+ _predict = 2048 if model_tier == ModelTier.SMALL else 4096
199
240
  self.base_llm = ChatOllama(
200
241
  model=model_name,
201
242
  temperature=0.1,
202
- num_predict=4096,
203
- num_ctx=8192,
243
+ num_predict=_predict,
244
+ num_ctx=_ctx,
204
245
  )
205
246
  self.json_llm = self.base_llm.bind(format="json")
206
247
 
@@ -258,14 +299,31 @@ class AnalyzerAgent:
258
299
  def _invoke_code_chain(self, template: str, variables: dict, language: str) -> str:
259
300
  """Shared invocation + extraction logic for harness and fix chains."""
260
301
  chain = PromptTemplate.from_template(template) | self.base_llm
261
- result = chain.invoke(variables)
302
+ try:
303
+ result = chain.invoke(variables)
304
+ except Exception as e:
305
+ err = str(e).lower()
306
+ if any(h in err for h in _TRUNCATION_HINTS):
307
+ raise RuntimeError(
308
+ f"Model hit its context limit. Try a smaller file, fewer functions, "
309
+ f"or a model with a larger context window. Detail: {e}"
310
+ ) from e
311
+ raise
262
312
  raw = result.content if hasattr(result, "content") else str(result)
263
- # Handle Anthropic returning a list of content blocks
264
313
  if isinstance(raw, list):
265
314
  raw = "\n".join(
266
315
  item["text"] if isinstance(item, dict) and "text" in item else str(item)
267
316
  for item in raw
268
317
  )
318
+ if _is_truncated(raw):
319
+ logger.warning(
320
+ f"LLM output appears truncated (len={len(raw)}). "
321
+ f"Model likely hit its context/predict limit."
322
+ )
323
+ raise RuntimeError(
324
+ "Model output was truncated — hit context or token limit. "
325
+ "Try a model with a larger context window, or reduce the function size."
326
+ )
269
327
  return self._extract_executable_code(raw)
270
328
 
271
329
  def generate_harness(
@@ -421,12 +479,14 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
421
479
  return llm, llm
422
480
 
423
481
  if provider == "local_server":
424
- base_url = api_keys.get("local_url", "http://localhost:1234/v1")
482
+ base_url = api_keys.get("local_url", "http://localhost:1234/v1")
483
+ _max_tokens = api_keys.pop("_predict", 4096) # reuse same key as Ollama path
425
484
  llm = ChatOpenAI(
426
485
  model=model_name,
427
486
  api_key="not-needed",
428
487
  base_url=base_url,
429
488
  temperature=0.1,
489
+ max_tokens=_max_tokens,
430
490
  model_kwargs={"response_format": {"type": "json_object"}},
431
491
  )
432
492
  return llm, llm
@@ -452,16 +512,33 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
452
512
  )
453
513
  return llm, llm
454
514
 
455
- # Ollama default
515
+ # Ollama default — context and predict budget are passed in from the
516
+ # calling agent which knows its own model_tier.
517
+ # Default to medium-safe values; callers override via kwargs if needed.
518
+ _ctx = api_keys.pop("_ctx", 8192)
519
+ _predict = api_keys.pop("_predict", 4096)
456
520
  base = ChatOllama(
457
521
  model=model_name,
458
522
  temperature=0.1,
459
- num_predict=4096,
460
- num_ctx=8192,
523
+ num_predict=_predict,
524
+ num_ctx=_ctx,
461
525
  )
462
526
  return base, base.bind(format="json")
463
527
 
464
528
 
529
+ def _build_llm_tiered(provider: str, model_name: str, api_keys: dict, model_tier: str):
530
+ """Wraps _build_llm with tier-aware context settings for local providers."""
531
+ from coreinsight.prompts import ModelTier
532
+ keys = dict(api_keys or {})
533
+ if provider == "ollama":
534
+ keys["_ctx"] = 4096 if model_tier == ModelTier.SMALL else 8192
535
+ keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
536
+ elif provider == "local_server":
537
+ # max_tokens controls response length — context window is server-side
538
+ keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
539
+ return _build_llm(provider, model_name, keys)
540
+
541
+
465
542
  class BottleneckAgent:
466
543
  """
467
544
  Agent 1 — analysis only.
@@ -480,7 +557,7 @@ class BottleneckAgent:
480
557
  from coreinsight.prompts import BOTTLENECK_TEMPLATE, SYSTEM_PROMPT
481
558
  self.model_tier = model_tier
482
559
  self.parser = JsonOutputParser(pydantic_object=AuditResult)
483
- self._base_llm, self._json_llm = _build_llm(provider, model_name, api_keys)
560
+ self._base_llm, self._json_llm = _build_llm_tiered(provider, model_name, api_keys, model_tier)
484
561
 
485
562
  self._prompt = PromptTemplate(
486
563
  template=BOTTLENECK_TEMPLATE,
@@ -544,7 +621,7 @@ class OptimizerAgent:
544
621
  ) -> None:
545
622
  from coreinsight.prompts import OPTIMIZER_TEMPLATE
546
623
  self.model_tier = model_tier
547
- self._base_llm, _ = _build_llm(provider, model_name, api_keys)
624
+ self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
548
625
  self._template = OPTIMIZER_TEMPLATE
549
626
 
550
627
  def _extract_code(self, raw: str) -> str:
@@ -620,7 +697,7 @@ class HarnessAgent:
620
697
  HARNESS_ADDENDUM_MULTI,
621
698
  )
622
699
  self.model_tier = model_tier
623
- self._base_llm, _ = _build_llm(provider, model_name, api_keys)
700
+ self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
624
701
  self._harness_tmpl = HARNESS_TEMPLATE_MULTI + HARNESS_ADDENDUM_MULTI.get(model_tier, "")
625
702
  self._fix_tmpl = FIX_TEMPLATE_MULTI + HARNESS_ADDENDUM_MULTI.get(model_tier, "")
626
703
 
@@ -638,14 +715,28 @@ class HarnessAgent:
638
715
 
639
716
  def _invoke(self, template: str, variables: dict) -> str:
640
717
  chain = PromptTemplate.from_template(template) | self._base_llm
641
- result = chain.invoke(variables)
642
- raw = result.content if hasattr(result, "content") else str(result)
718
+ try:
719
+ result = chain.invoke(variables)
720
+ except Exception as e:
721
+ err = str(e).lower()
722
+ if any(h in err for h in _TRUNCATION_HINTS):
723
+ raise RuntimeError(
724
+ f"Model hit its context limit during harness generation. "
725
+ f"Detail: {e}"
726
+ ) from e
727
+ raise
728
+ raw = result.content if hasattr(result, "content") else str(result)
643
729
  if isinstance(raw, list):
644
730
  raw = "\n".join(
645
731
  item["text"] if isinstance(item, dict) and "text" in item
646
732
  else str(item)
647
733
  for item in raw
648
734
  )
735
+ if _is_truncated(raw):
736
+ raise RuntimeError(
737
+ "Harness output was truncated — model hit its token limit. "
738
+ "Switching to fix loop with truncation note."
739
+ )
649
740
  return self._extract_code(raw)
650
741
 
651
742
  def _check_speedup(self, success: bool, logs: str) -> bool:
@@ -714,7 +805,9 @@ class HarnessAgent:
714
805
  is_valid = self._check_speedup(success, logs)
715
806
  retries += 1
716
807
 
717
- if is_valid and retries > 0:
808
+ if getattr(sandbox, 'disabled', False):
809
+ pass # skipped intentionally — don't annotate as failed
810
+ elif is_valid and retries > 0:
718
811
  logs = f"(Succeeded after {retries} retries)\n" + logs
719
812
  elif not is_valid:
720
813
  logs = f"(Failed after {retries} retries)\n" + logs
@@ -738,7 +831,7 @@ class TestCaseAgent:
738
831
  model_tier: str,
739
832
  ) -> None:
740
833
  self.model_tier = model_tier
741
- self._base_llm, _ = _build_llm(provider, model_name, api_keys)
834
+ self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
742
835
 
743
836
  def generate(
744
837
  self,
@@ -0,0 +1,103 @@
1
+ """
2
+ coreinsight/embeddings.py — Shared embedding utility
3
+
4
+ Single source of truth for embedding model loading used by both
5
+ memory.py (OptimizationMemory) and indexer.py (RepoIndexer).
6
+
7
+ Tries to load all-MiniLM-L6-v2 from local cache first.
8
+ Falls back to a deterministic hash-based embedder when offline
9
+ or when the model has not yet been downloaded.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import hashlib
14
+ import logging
15
+ import math
16
+ import os
17
+ from pathlib import Path
18
+ from typing import List, Tuple
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # All models cached here — never hits the network if already present
23
+ MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
24
+ MODEL_NAME = "all-MiniLM-L6-v2"
25
+
26
+
27
+ class _HashEmbeddingFunction:
28
+ """
29
+ Deterministic offline fallback embedder.
30
+
31
+ Produces a 384-dim float vector from token overlap — no downloads,
32
+ no GPU, no network. Semantic quality is lower than MiniLM but RAG
33
+ and memory lookup still work via keyword/structural matching.
34
+
35
+ Run `coreinsight index` once while online to cache the real model.
36
+ """
37
+ DIM = 384
38
+
39
+ def __call__(self, input: List[str]) -> List[List[float]]:
40
+ results = []
41
+ for text in input:
42
+ tokens = text.lower().split()
43
+ vec = [0.0] * self.DIM
44
+ for tok in tokens:
45
+ h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
46
+ vec[h % self.DIM] += 1.0
47
+ # L2 normalise so cosine distance works correctly
48
+ mag = math.sqrt(sum(x * x for x in vec)) or 1.0
49
+ results.append([x / mag for x in vec])
50
+ return results
51
+
52
+
53
+ def load_embedding_fn() -> Tuple[object, str]:
54
+ """
55
+ Load the sentence-transformer embedding function.
56
+
57
+ Returns:
58
+ (embedding_fn, label) where label is a human-readable string
59
+ indicating which embedder is active — shown in CLI output.
60
+
61
+ Strategy:
62
+ 1. Pin HuggingFace cache to ~/.coreinsight/models so the model
63
+ is never re-downloaded on subsequent runs.
64
+ 2. Probe the model with a dummy call to force-load weights now
65
+ rather than silently failing later during indexing or lookup.
66
+ 3. On any failure (network error, disk full, offline) fall back
67
+ to _HashEmbeddingFunction with a visible warning.
68
+ """
69
+ MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
70
+
71
+ # Pin cache dirs — must be set before chromadb.utils imports torch
72
+ os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(MODEL_CACHE_DIR))
73
+ os.environ.setdefault("HF_HUB_CACHE", str(MODEL_CACHE_DIR))
74
+ # Allow download when online; callers that want strict offline can
75
+ # set HF_HUB_OFFLINE=1 in their environment before importing.
76
+ os.environ.setdefault("HF_HUB_OFFLINE", "0")
77
+
78
+ try:
79
+ from chromadb.utils import embedding_functions as _ef
80
+
81
+ fn = _ef.SentenceTransformerEmbeddingFunction(model_name=MODEL_NAME)
82
+
83
+ # Force-load now so we catch errors here, not mid-analysis.
84
+ fn(["probe"])
85
+
86
+ label = f"{MODEL_NAME} (cached)"
87
+ logger.debug(f"Embedding model loaded: {label}")
88
+ return fn, label
89
+
90
+ except Exception as exc:
91
+ logger.warning(
92
+ f"SentenceTransformer unavailable ({exc}). "
93
+ f"Using offline hash embedder — semantic quality reduced. "
94
+ f"Run `coreinsight index` once while online to cache the model."
95
+ )
96
+ from rich.console import Console as _Console
97
+ _Console().print(
98
+ "[yellow]⚠ Embedding model unavailable (offline or not yet downloaded). "
99
+ "Using keyword-based fallback — RAG and memory recall will work but with "
100
+ "reduced semantic accuracy. "
101
+ "Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
102
+ )
103
+ return _HashEmbeddingFunction(), "hash-based (offline fallback)"
@@ -9,62 +9,11 @@ import chromadb
9
9
  from chromadb.utils import embedding_functions
10
10
 
11
11
  from coreinsight.parser import CodeParser
12
+ from coreinsight.embeddings import load_embedding_fn
12
13
 
13
14
  console = Console()
14
15
  logger = logging.getLogger(__name__)
15
16
 
16
- # Local model cache — never hits the network if model is already here
17
- _MODEL_CACHE_DIR = Path.home() / ".coreinsight" / "models"
18
-
19
-
20
- class _HashEmbeddingFunction:
21
- """
22
- Deterministic offline fallback embedder.
23
- Produces a 384-dim float vector from token overlap — no downloads, no GPU.
24
- Semantic quality is lower than MiniLM but RAG still works via keyword matching.
25
- """
26
- DIM = 384
27
-
28
- def __call__(self, input: list[str]) -> list[list[float]]:
29
- results = []
30
- for text in input:
31
- tokens = text.lower().split()
32
- vec = [0.0] * self.DIM
33
- for tok in tokens:
34
- h = int(hashlib.sha256(tok.encode()).hexdigest(), 16)
35
- vec[h % self.DIM] += 1.0
36
- # L2 normalise
37
- mag = math.sqrt(sum(x * x for x in vec)) or 1.0
38
- results.append([x / mag for x in vec])
39
- return results
40
-
41
-
42
- def _load_embedding_fn():
43
- """
44
- Try to load SentenceTransformer from local cache.
45
- Falls back to _HashEmbeddingFunction if offline or model not cached.
46
- """
47
- _MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
48
- os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(_MODEL_CACHE_DIR))
49
- os.environ.setdefault("HF_HUB_OFFLINE", "0") # allow download when online
50
-
51
- try:
52
- fn = embedding_functions.SentenceTransformerEmbeddingFunction(
53
- model_name="all-MiniLM-L6-v2",
54
- )
55
- # Probe: actually load the model now so we catch network errors here
56
- # rather than silently later during indexing.
57
- fn(["probe"])
58
- return fn, "all-MiniLM-L6-v2 (cached)"
59
- except Exception as e:
60
- logger.warning(f"SentenceTransformer unavailable ({e}). Using offline hash embedder — semantic quality reduced.")
61
- console.print(
62
- "[yellow]⚠ Embedding model unavailable (offline or not yet downloaded). "
63
- "Using keyword-based fallback — RAG will work but with reduced semantic accuracy. "
64
- "Run [cyan]coreinsight index[/cyan] once while online to cache the model.[/yellow]"
65
- )
66
- return _HashEmbeddingFunction(), "hash-based (offline fallback)"
67
-
68
17
 
69
18
  class RepoIndexer:
70
19
  def __init__(self, repo_path: str):
@@ -82,7 +31,7 @@ class RepoIndexer:
82
31
  return True
83
32
  try:
84
33
  self._chroma_client = chromadb.PersistentClient(path=str(self.db_path))
85
- self._embedding_fn, self._embedding_label = _load_embedding_fn()
34
+ self._embedding_fn, self._embedding_label = load_embedding_fn()
86
35
  self._collection = self._chroma_client.get_or_create_collection(
87
36
  name="codebase_context",
88
37
  embedding_function=self._embedding_fn,
@@ -272,8 +272,15 @@ def process_function(func: dict, language: str, agent: AnalyzerAgent, sandbox: C
272
272
  return func_name, result, (success, logs, plot_data), None, verification, profiler_result, None, is_valid_optimization
273
273
 
274
274
  except Exception as e:
275
+ err_str = str(e)
276
+ if "context" in err_str.lower() and "limit" in err_str.lower():
277
+ _log(func_name, f"Context limit hit: {e}", style="bold yellow")
278
+ return func_name, None, None, (
279
+ f"⚠️ Context limit: {err_str}\n"
280
+ f"Try a model with a larger context window, or split the function."
281
+ ), None, None, None, False
275
282
  _log(func_name, f"Failed: {e}", style="bold red")
276
- return func_name, None, None, f"❌ Analysis failed: {str(e)}", None, None, None, False
283
+ return func_name, None, None, f"❌ Analysis failed: {err_str}", None, None, None, False
277
284
 
278
285
  def parse_csv_logs(logs: str):
279
286
  """Safely extracts CSV data from the sandbox logs."""
@@ -734,7 +741,7 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None):
734
741
  finally:
735
742
  console = _prev_console
736
743
 
737
- def run_demo(lang: str = "python", no_docker: bool = False):
744
+ def run_demo(lang: str = "python", no_docker: bool = False, tui_console=None):
738
745
  import shutil
739
746
  import importlib.resources
740
747
 
@@ -797,18 +804,34 @@ def run_demo(lang: str = "python", no_docker: bool = False):
797
804
  # For Python: auto-index so RAG cross-file context is showcased
798
805
  if lang == "python":
799
806
  console.print("[dim]Auto-indexing demo files to showcase RAG cross-file context...[/dim]")
800
- from coreinsight.indexer import RepoIndexer as _RepoIndexer
801
- _RepoIndexer(str(demo_dir)).index_repository()
807
+ try:
808
+ from coreinsight.indexer import RepoIndexer as _RepoIndexer
809
+ _RepoIndexer(str(demo_dir)).index_repository()
810
+ except Exception as _idx_err:
811
+ # Non-fatal — SQLite write conflicts can occur when running
812
+ # through the TUI. RAG context will be empty for this run.
813
+ console.print(f"[dim yellow]Indexing skipped (will retry next run): {_idx_err}[/dim yellow]")
802
814
  console.print()
803
815
 
804
- run_analysis(str(demo_dir / entry_file), no_docker=no_docker)
816
+ run_analysis(str(demo_dir / entry_file), no_docker=no_docker, tui_console=tui_console)
805
817
 
806
- def _run_memory_cmd(clear: bool):
818
+ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv"):
807
819
  from coreinsight.memory import OptimizationMemory, MEMORY_DIR
808
820
  import shutil
809
821
 
810
822
  mem = OptimizationMemory()
811
823
 
824
+ if export_path:
825
+ count = mem.export(export_path, fmt=export_fmt)
826
+ if count:
827
+ console.print(
828
+ f"[bold green]✅ Exported {count} optimization(s) to "
829
+ f"[cyan]{export_path}[/cyan][/bold green]"
830
+ )
831
+ else:
832
+ console.print("[yellow]Nothing to export — memory store is empty.[/yellow]")
833
+ return
834
+
812
835
  if clear:
813
836
  if MEMORY_DIR.exists():
814
837
  shutil.rmtree(MEMORY_DIR, ignore_errors=True)
@@ -930,7 +953,12 @@ def main_cli():
930
953
  index_parser.add_argument("--dir", default=".", help="Directory to index")
931
954
 
932
955
  memory_parser = subparsers.add_parser("memory", help="Inspect or clear the local optimization memory")
933
- memory_parser.add_argument("--clear", action="store_true", help="Wipe the memory store")
956
+ memory_parser.add_argument("--clear", action="store_true", help="Wipe the memory store")
957
+ memory_parser.add_argument("--export", dest="export_path", default=None,
958
+ help="Export memory to file (e.g. --export optimizations.csv)")
959
+ memory_parser.add_argument("--format", dest="export_fmt", default="csv",
960
+ choices=["csv", "md"],
961
+ help="Export format: csv (default) or md")
934
962
 
935
963
  view_parser = subparsers.add_parser("view", help="Launch the interactive TUI")
936
964
  view_parser.add_argument("--dir", default=".", help="Starting directory (default: current)")
@@ -954,7 +982,11 @@ def main_cli():
954
982
  elif args.command == "analyze":
955
983
  run_analysis(args.file, no_docker=getattr(args, "no_docker", False))
956
984
  elif args.command == "memory":
957
- _run_memory_cmd(getattr(args, "clear", False))
985
+ _run_memory_cmd(
986
+ clear=getattr(args, "clear", False),
987
+ export_path=getattr(args, "export_path", None),
988
+ export_fmt=getattr(args, "export_fmt", "csv"),
989
+ )
958
990
  elif args.command == "scan":
959
991
  scanner = ProjectScanner(args.dir)
960
992
  scanner.scan_project(max_results=args.top)