coreinsight-cli 0.2.0__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/PKG-INFO +24 -3
  2. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/README.md +20 -1
  3. coreinsight_cli-0.2.6/coreinsight/__init__.py +6 -0
  4. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/analyzer.py +401 -0
  5. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/config.py +75 -3
  6. coreinsight_cli-0.2.6/coreinsight/indexer.py +171 -0
  7. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/main.py +215 -108
  8. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/memory.py +11 -2
  9. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/profiler.py +186 -8
  10. coreinsight_cli-0.2.6/coreinsight/prompts.py +299 -0
  11. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/sandbox.py +13 -1
  12. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight_cli.egg-info/PKG-INFO +24 -3
  13. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight_cli.egg-info/SOURCES.txt +1 -3
  14. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight_cli.egg-info/requires.txt +3 -0
  15. coreinsight_cli-0.2.6/coreinsight_cli.egg-info/top_level.txt +1 -0
  16. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/pyproject.toml +6 -3
  17. coreinsight_cli-0.2.0/coreinsight/demo/__init__.py +0 -0
  18. coreinsight_cli-0.2.0/coreinsight/indexer.py +0 -111
  19. coreinsight_cli-0.2.0/coreinsight/prompts.py +0 -97
  20. coreinsight_cli-0.2.0/coreinsight_cli.egg-info/top_level.txt +0 -2
  21. coreinsight_cli-0.2.0/coreinsight_demo/bad_loop.py +0 -38
  22. coreinsight_cli-0.2.0/coreinsight_demo/data_processor.py +0 -23
  23. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/LICENSE +0 -0
  24. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/Dockerfile.cpp-sandbox +0 -0
  25. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/Dockerfile.python-sandbox +0 -0
  26. {coreinsight_cli-0.2.0/coreinsight → coreinsight_cli-0.2.6/coreinsight/demo}/__init__.py +0 -0
  27. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/demo/bad_loop.py +0 -0
  28. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/demo/data_processor.py +0 -0
  29. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/demo/slow.cpp +0 -0
  30. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/hardware.py +0 -0
  31. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/parser.py +0 -0
  32. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight/scanner.py +0 -0
  33. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight_cli.egg-info/dependency_links.txt +0 -0
  34. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/coreinsight_cli.egg-info/entry_points.txt +0 -0
  35. {coreinsight_cli-0.2.0 → coreinsight_cli-0.2.6}/setup.cfg +0 -0
@@ -1,15 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coreinsight-cli
3
- Version: 0.2.0
3
+ Version: 0.2.6
4
4
  Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
5
5
  Author: Varun Jani
6
- License: MIT
6
+ License: GPL-3.0-or-later
7
7
  Project-URL: Homepage, https://github.com/Prais3/coreinsight_cli
8
8
  Project-URL: Bug Tracker, https://github.com/Prais3/coreinsight_cli/issues
9
9
  Keywords: performance,profiling,optimization,llm,cuda,cpp,python,hpc,benchmarking
10
10
  Classifier: Development Status :: 3 - Alpha
11
11
  Classifier: Intended Audience :: Developers
12
- Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
@@ -33,6 +33,8 @@ Requires-Dist: pydantic>=2.0
33
33
  Requires-Dist: chromadb>=0.5.0
34
34
  Requires-Dist: sentence-transformers>=3.0.0
35
35
  Requires-Dist: psutil>=5.9
36
+ Provides-Extra: compat
37
+ Requires-Dist: pysqlite3-binary>=0.5.0; extra == "compat"
36
38
  Dynamic: license-file
37
39
 
38
40
  # CoreInsight CLI
@@ -169,6 +171,18 @@ coreinsight configure
169
171
  coreinsight configure --pro-key <your-key>
170
172
  ```
171
173
 
174
+ ### `coreinsight configure [--agent-mode <mode>]`
175
+ Choose between single-agent or multi-agent mode.
176
+ Pass `--agent-mode multi` for multi-agent usage.
177
+ ```bash
178
+ # Explicit override
179
+ coreinsight configure --agent-mode multi
180
+ coreinsight configure --agent-mode single
181
+
182
+ # Reset to auto-selection
183
+ coreinsight configure --agent-mode auto
184
+ ```
185
+
172
186
  ---
173
187
 
174
188
  ## Supported languages
@@ -266,3 +280,10 @@ CoreInsight is local-first by design:
266
280
  analyse is sent to the provider's API, under your own key
267
281
  - The optimization memory store lives at `~/.coreinsight/memory_db` on
268
282
  your local filesystem
283
+
284
+ ## Troubleshooting
285
+
286
+ ChromaDB issue with old SQLite3 versions. To resolve:
287
+ ```bash
288
+ pip install pysqlite3-binary # >=0.5.0
289
+ ```
@@ -132,6 +132,18 @@ coreinsight configure
132
132
  coreinsight configure --pro-key <your-key>
133
133
  ```
134
134
 
135
+ ### `coreinsight configure [--agent-mode <mode>]`
136
+ Choose between single-agent or multi-agent mode.
137
+ Pass `--agent-mode multi` for multi-agent usage.
138
+ ```bash
139
+ # Explicit override
140
+ coreinsight configure --agent-mode multi
141
+ coreinsight configure --agent-mode single
142
+
143
+ # Reset to auto-selection
144
+ coreinsight configure --agent-mode auto
145
+ ```
146
+
135
147
  ---
136
148
 
137
149
  ## Supported languages
@@ -228,4 +240,11 @@ CoreInsight is local-first by design:
228
240
  - **Cloud providers** — only the function code and context you choose to
229
241
  analyse is sent to the provider's API, under your own key
230
242
  - The optimization memory store lives at `~/.coreinsight/memory_db` on
231
- your local filesystem
243
+ your local filesystem
244
+
245
+ ## Troubleshooting
246
+
247
+ ChromaDB issue with old SQLite3 versions. To resolve:
248
+ ```bash
249
+ pip install pysqlite3-binary # >=0.5.0
250
+ ```
@@ -0,0 +1,6 @@
1
+ try:
2
+ __import__('pysqlite3')
3
+ import sys
4
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
5
+ except ImportError:
6
+ pass # pysqlite3 not installed — rely on system sqlite3
@@ -395,4 +395,405 @@ class AnalyzerAgent:
395
395
 
396
396
  except Exception as e:
397
397
  logger.warning(f"generate_test_cases failed for '{func_name}': {e}")
398
+ return []
399
+
400
+ # ---------------------------------------------------------------------------
401
+ # Multi-agent support (v0.2.5)
402
+ # ---------------------------------------------------------------------------
403
+
404
+ def _build_llm(provider: str, model_name: str, api_keys: dict):
405
+ """
406
+ Shared LLM factory for all multi-agent classes.
407
+ Returns (base_llm, json_llm) — same pattern as AnalyzerAgent.__init__.
408
+ Raises ValueError on missing credentials.
409
+ """
410
+ api_keys = api_keys or {}
411
+
412
+ if provider == "openai":
413
+ if not api_keys.get("openai"):
414
+ raise ValueError("OpenAI API key required.")
415
+ llm = ChatOpenAI(
416
+ model=model_name,
417
+ api_key=api_keys["openai"],
418
+ temperature=0.1,
419
+ model_kwargs={"response_format": {"type": "json_object"}},
420
+ )
421
+ return llm, llm
422
+
423
+ if provider == "local_server":
424
+ base_url = api_keys.get("local_url", "http://localhost:1234/v1")
425
+ llm = ChatOpenAI(
426
+ model=model_name,
427
+ api_key="not-needed",
428
+ base_url=base_url,
429
+ temperature=0.1,
430
+ model_kwargs={"response_format": {"type": "json_object"}},
431
+ )
432
+ return llm, llm
433
+
434
+ if provider == "anthropic":
435
+ if not api_keys.get("anthropic"):
436
+ raise ValueError("Anthropic API key required.")
437
+ llm = ChatAnthropic(
438
+ model=model_name,
439
+ api_key=api_keys["anthropic"],
440
+ temperature=0.1,
441
+ )
442
+ return llm, llm
443
+
444
+ if provider == "google":
445
+ if not api_keys.get("google"):
446
+ raise ValueError("Google Gemini API key required.")
447
+ llm = ChatGoogleGenerativeAI(
448
+ model=model_name,
449
+ google_api_key=api_keys["google"],
450
+ temperature=0.1,
451
+ convert_system_message_to_human=True,
452
+ )
453
+ return llm, llm
454
+
455
+ # Ollama default
456
+ base = ChatOllama(
457
+ model=model_name,
458
+ temperature=0.1,
459
+ num_predict=4096,
460
+ num_ctx=8192,
461
+ )
462
+ return base, base.bind(format="json")
463
+
464
+
465
+ class BottleneckAgent:
466
+ """
467
+ Agent 1 — analysis only.
468
+ Identifies the single most critical bottleneck and returns the same
469
+ dict structure as AnalyzerAgent.analyze() so process_function cannot
470
+ tell the difference. optimized_code is always None from this agent.
471
+ """
472
+
473
+ def __init__(
474
+ self,
475
+ provider: str,
476
+ model_name: str,
477
+ api_keys: dict,
478
+ model_tier: str,
479
+ ) -> None:
480
+ from coreinsight.prompts import BOTTLENECK_TEMPLATE, SYSTEM_PROMPT
481
+ self.model_tier = model_tier
482
+ self.parser = JsonOutputParser(pydantic_object=AuditResult)
483
+ self._base_llm, self._json_llm = _build_llm(provider, model_name, api_keys)
484
+
485
+ self._prompt = PromptTemplate(
486
+ template=BOTTLENECK_TEMPLATE,
487
+ input_variables=[
488
+ "language", "code_content", "context", "hardware_target",
489
+ ],
490
+ partial_variables={
491
+ "system_prompt": SYSTEM_PROMPT,
492
+ "format_instructions": self.parser.get_format_instructions(),
493
+ },
494
+ )
495
+ self._chain = self._prompt | self._json_llm | self.parser
496
+
497
+ def analyze(
498
+ self,
499
+ code: str,
500
+ language: str,
501
+ context: str = "",
502
+ hardware_target: str = "Generic CPU",
503
+ ) -> dict:
504
+ try:
505
+ return self._chain.invoke({
506
+ "language": language,
507
+ "code_content": code,
508
+ "context": context,
509
+ "hardware_target": hardware_target,
510
+ })
511
+ except OutputParserException:
512
+ return {
513
+ "severity": "Error",
514
+ "issue": "AI Output Parsing Failed",
515
+ "reasoning": "The model failed to return valid JSON.",
516
+ "suggestion": "Try running again or use a larger model.",
517
+ "bottlenecks": [],
518
+ "optimized_code": None,
519
+ }
520
+ except Exception as e:
521
+ return {
522
+ "severity": "Error",
523
+ "issue": str(e),
524
+ "reasoning": "System error during bottleneck analysis.",
525
+ "suggestion": "Check LLM API keys and connectivity.",
526
+ "bottlenecks": [],
527
+ "optimized_code": None,
528
+ }
529
+
530
+
531
+ class OptimizerAgent:
532
+ """
533
+ Agent 2 — code generation only.
534
+ Receives the bottleneck analysis result and writes the optimized function.
535
+ Returns raw code as a string (no JSON, no harness).
536
+ """
537
+
538
+ def __init__(
539
+ self,
540
+ provider: str,
541
+ model_name: str,
542
+ api_keys: dict,
543
+ model_tier: str,
544
+ ) -> None:
545
+ from coreinsight.prompts import OPTIMIZER_TEMPLATE
546
+ self.model_tier = model_tier
547
+ self._base_llm, _ = _build_llm(provider, model_name, api_keys)
548
+ self._template = OPTIMIZER_TEMPLATE
549
+
550
+ def _extract_code(self, raw: str) -> str:
551
+ """Reuse the same extraction logic as AnalyzerAgent."""
552
+ blocks = re.findall(r"```[a-zA-Z+#]*\s*\n(.*?)```", raw, re.DOTALL)
553
+ if blocks:
554
+ return max(blocks, key=len).strip()
555
+ lines = raw.strip().split("\n")
556
+ lines = [l for l in lines if not re.match(r"^```", l)]
557
+ while lines and lines[0].lower().startswith(
558
+ ("here is", "sure", "certainly", "output:")
559
+ ) and not lines[0].strip().startswith(("#", "//")):
560
+ lines.pop(0)
561
+ return "\n".join(lines).strip()
562
+
563
+ def generate(
564
+ self,
565
+ func_name: str,
566
+ original_code: str,
567
+ analysis: dict,
568
+ language: str,
569
+ context: str = "",
570
+ hardware_target: str = "Generic CPU",
571
+ ) -> str:
572
+ """
573
+ Returns the optimized function as a raw code string.
574
+ Returns original_code on any failure so the pipeline can continue.
575
+ """
576
+ try:
577
+ chain = PromptTemplate.from_template(self._template) | self._base_llm
578
+ result = chain.invoke({
579
+ "language": language,
580
+ "func_name": func_name,
581
+ "hardware_target": hardware_target,
582
+ "severity": analysis.get("severity", ""),
583
+ "issue": analysis.get("issue", ""),
584
+ "reasoning": analysis.get("reasoning", ""),
585
+ "suggestion": analysis.get("suggestion",""),
586
+ "original": original_code,
587
+ "context": context or "None",
588
+ })
589
+ raw = result.content if hasattr(result, "content") else str(result)
590
+ if isinstance(raw, list):
591
+ raw = "\n".join(
592
+ item["text"] if isinstance(item, dict) and "text" in item
593
+ else str(item)
594
+ for item in raw
595
+ )
596
+ code = self._extract_code(raw)
597
+ return code if code else original_code
598
+ except Exception as e:
599
+ logger.warning(f"OptimizerAgent.generate failed: {e}")
600
+ return original_code
601
+
602
+
603
+ class HarnessAgent:
604
+ """
605
+ Agent 3 — harness generation and fix loop.
606
+ Owns the entire retry loop so process_function stays clean.
607
+ Returns (harness_code, success, logs, plot_data) after running in sandbox.
608
+ """
609
+
610
+ def __init__(
611
+ self,
612
+ provider: str,
613
+ model_name: str,
614
+ api_keys: dict,
615
+ model_tier: str,
616
+ ) -> None:
617
+ from coreinsight.prompts import (
618
+ HARNESS_TEMPLATE_MULTI,
619
+ FIX_TEMPLATE_MULTI,
620
+ HARNESS_ADDENDUM_MULTI,
621
+ )
622
+ self.model_tier = model_tier
623
+ self._base_llm, _ = _build_llm(provider, model_name, api_keys)
624
+ self._harness_tmpl = HARNESS_TEMPLATE_MULTI + HARNESS_ADDENDUM_MULTI.get(model_tier, "")
625
+ self._fix_tmpl = FIX_TEMPLATE_MULTI + HARNESS_ADDENDUM_MULTI.get(model_tier, "")
626
+
627
+ def _extract_code(self, raw: str) -> str:
628
+ blocks = re.findall(r"```[a-zA-Z+#]*\s*\n(.*?)```", raw, re.DOTALL)
629
+ if blocks:
630
+ return max(blocks, key=len).strip()
631
+ lines = raw.strip().split("\n")
632
+ lines = [l for l in lines if not re.match(r"^```", l)]
633
+ while lines and lines[0].lower().startswith(
634
+ ("here is", "sure", "certainly", "output:")
635
+ ) and not lines[0].strip().startswith(("#", "//")):
636
+ lines.pop(0)
637
+ return "\n".join(lines).strip()
638
+
639
+ def _invoke(self, template: str, variables: dict) -> str:
640
+ chain = PromptTemplate.from_template(template) | self._base_llm
641
+ result = chain.invoke(variables)
642
+ raw = result.content if hasattr(result, "content") else str(result)
643
+ if isinstance(raw, list):
644
+ raw = "\n".join(
645
+ item["text"] if isinstance(item, dict) and "text" in item
646
+ else str(item)
647
+ for item in raw
648
+ )
649
+ return self._extract_code(raw)
650
+
651
+ def _check_speedup(self, success: bool, logs: str) -> bool:
652
+ if not success:
653
+ return False
654
+ try:
655
+ for line in reversed(logs.strip().split("\n")):
656
+ parts = line.split(",")
657
+ if len(parts) == 4 and parts[0].strip().isdigit():
658
+ return float(parts[3]) >= 1.05
659
+ except Exception:
660
+ pass
661
+ return False
662
+
663
+ def run(
664
+ self,
665
+ func_name: str,
666
+ original_code: str,
667
+ optimized_code: str,
668
+ language: str,
669
+ context: str,
670
+ hardware_target: str,
671
+ sandbox, # CodeSandbox instance
672
+ max_retries: int = 2,
673
+ ) -> tuple:
674
+ """
675
+ Generates harness, runs in sandbox, retries on failure.
676
+ Returns (success, logs, plot_data, retry_count).
677
+ """
678
+ try:
679
+ harness = self._invoke(self._harness_tmpl, {
680
+ "language": language,
681
+ "func_name": func_name,
682
+ "original": original_code,
683
+ "optimized": optimized_code,
684
+ "context": context,
685
+ "hardware_target": hardware_target,
686
+ })
687
+ except Exception as e:
688
+ return False, f"Harness generation failed: {e}", None, 0
689
+
690
+ success, logs, plot_data = sandbox.execute_benchmark(harness, language)
691
+ is_valid = self._check_speedup(success, logs)
692
+ retries = 0
693
+
694
+ while not is_valid and retries < max_retries:
695
+ if success and "N,Original_Time" not in logs:
696
+ logs += "\nERROR: Script ran but did NOT print the CSV table. You MUST print the strict CSV format."
697
+ elif success:
698
+ logs += "\nERROR: Optimized code was SLOWER than original. Rewrite to be faster."
699
+
700
+ try:
701
+ harness = self._invoke(self._fix_tmpl, {
702
+ "language": language,
703
+ "func_name": func_name,
704
+ "original": original_code,
705
+ "bad_harness":harness,
706
+ "error_logs": logs,
707
+ "context": context,
708
+ })
709
+ except Exception as e:
710
+ logs += f"\nFix generation failed: {e}"
711
+ break
712
+
713
+ success, logs, plot_data = sandbox.execute_benchmark(harness, language)
714
+ is_valid = self._check_speedup(success, logs)
715
+ retries += 1
716
+
717
+ if is_valid and retries > 0:
718
+ logs = f"(Succeeded after {retries} retries)\n" + logs
719
+ elif not is_valid:
720
+ logs = f"(Failed after {retries} retries)\n" + logs
721
+ success = False
722
+
723
+ return success, logs, plot_data, retries
724
+
725
+
726
+ class TestCaseAgent:
727
+ """
728
+ Agent 4 — test case generation only.
729
+ Identical logic to AnalyzerAgent.generate_test_cases but as a
730
+ standalone class so it can be called from a separate thread.
731
+ """
732
+
733
+ def __init__(
734
+ self,
735
+ provider: str,
736
+ model_name: str,
737
+ api_keys: dict,
738
+ model_tier: str,
739
+ ) -> None:
740
+ self.model_tier = model_tier
741
+ self._base_llm, _ = _build_llm(provider, model_name, api_keys)
742
+
743
+ def generate(
744
+ self,
745
+ func_name: str,
746
+ original_code: str,
747
+ language: str,
748
+ context: str = "",
749
+ num_cases: int = 8,
750
+ ) -> list:
751
+ """
752
+ Same return contract as AnalyzerAgent.generate_test_cases:
753
+ list of {"args": [...], "kwargs": {...}} or [] on failure.
754
+ """
755
+ import json as _json
756
+
757
+ chain = PromptTemplate.from_template(_TEST_CASES_TEMPLATE) | self._base_llm
758
+ try:
759
+ result = chain.invoke({
760
+ "func_name": func_name,
761
+ "language": language,
762
+ "original": original_code,
763
+ "context": context or "None",
764
+ "num_cases": num_cases,
765
+ })
766
+ raw = result.content if hasattr(result, "content") else str(result)
767
+ if isinstance(raw, list):
768
+ raw = "\n".join(
769
+ item["text"] if isinstance(item, dict) and "text" in item
770
+ else str(item)
771
+ for item in raw
772
+ )
773
+
774
+ raw = re.sub(r"```[a-zA-Z]*\s*", "", raw).strip()
775
+ raw = re.sub(r"```", "", raw).strip()
776
+ raw = re.sub(r"\bNone\b", "null", raw)
777
+ raw = re.sub(r"\bTrue\b", "true", raw)
778
+ raw = re.sub(r"\bFalse\b", "false", raw)
779
+ raw = re.sub(r",\s*([\]}])", r"\1", raw)
780
+
781
+ match = re.search(r"\[.*\]", raw, re.DOTALL)
782
+ if match:
783
+ raw = match.group(0)
784
+
785
+ try:
786
+ cases = _json.loads(raw)
787
+ except _json.JSONDecodeError:
788
+ import ast
789
+ cases = ast.literal_eval(raw)
790
+
791
+ return [
792
+ c for c in cases
793
+ if isinstance(c, dict)
794
+ and isinstance(c.get("args"), list)
795
+ and isinstance(c.get("kwargs"), dict)
796
+ ]
797
+ except Exception as e:
798
+ logger.warning(f"TestCaseAgent.generate failed for '{func_name}': {e}")
398
799
  return []
@@ -51,6 +51,32 @@ def is_pro(config: dict) -> bool:
51
51
  def get_tier_limits(config: dict) -> dict:
52
52
  return PRO_TIER_LIMITS if is_pro(config) else FREE_TIER_LIMITS
53
53
 
54
+
55
+ def get_agent_mode(config: dict) -> str:
56
+ """
57
+ Returns "multi" or "single".
58
+
59
+ Priority:
60
+ 1. Explicit user override stored in config ("agent_mode" key)
61
+ 2. Auto-selection based on model tier:
62
+ - small / medium local models → "multi"
63
+ (focused prompts compensate for smaller context windows)
64
+ - large / cloud models → "single"
65
+ (large models handle full context fine; saves API cost)
66
+ """
67
+ explicit = config.get("agent_mode")
68
+ if explicit in ("single", "multi"):
69
+ return explicit
70
+
71
+ provider = config.get("provider", "ollama")
72
+ model_name = config.get("model_name", "llama3.2")
73
+ tier = get_model_tier(provider, model_name)
74
+
75
+ from coreinsight.prompts import ModelTier
76
+ if tier in (ModelTier.SMALL, ModelTier.MEDIUM):
77
+ return "multi"
78
+ return "single"
79
+
54
80
  def load_config():
55
81
  if not CONFIG_FILE.exists():
56
82
  return {"provider": "ollama", "model_name": "llama3.2", "api_keys": {}}
@@ -62,7 +88,7 @@ def save_config(config_data):
62
88
  with open(CONFIG_FILE, "w") as f:
63
89
  json.dump(config_data, f, indent=4)
64
90
 
65
- def run_configure(pro_key: str = None):
91
+ def run_configure(pro_key: str = None, agent_mode: str = None):
66
92
  """Interactive CLI to set up models and API keys."""
67
93
  console.print("[bold cyan]⚙️ CoreInsight Configuration[/bold cyan]")
68
94
 
@@ -93,6 +119,27 @@ def run_configure(pro_key: str = None):
93
119
  except Exception as e:
94
120
  console.print("[red]⚠️ Could not verify key. Please check your internet connection or try again later.[/red]")
95
121
  return
122
+
123
+ if agent_mode is not None:
124
+ if agent_mode in ("single", "multi"):
125
+ config["agent_mode"] = agent_mode
126
+ save_config(config)
127
+ console.print(
128
+ f"[bold green]✅ Agent mode set to [cyan]{agent_mode}[/cyan].[/bold green]\n"
129
+ f"[dim]Use [cyan]coreinsight configure --agent-mode auto[/cyan] "
130
+ f"to restore automatic selection.[/dim]"
131
+ )
132
+ elif agent_mode == "auto":
133
+ config.pop("agent_mode", None)
134
+ save_config(config)
135
+ console.print(
136
+ "[bold green]✅ Agent mode reset to automatic selection.[/bold green]"
137
+ )
138
+ else:
139
+ console.print(
140
+ "[red]Invalid agent mode. Choose from: single, multi, auto[/red]"
141
+ )
142
+ return
96
143
 
97
144
  provider = Prompt.ask(
98
145
  "Which AI provider do you want to use?",
@@ -121,8 +168,33 @@ def run_configure(pro_key: str = None):
121
168
  if provider == "ollama":
122
169
  config["model_name"] = Prompt.ask("Ollama model name", default=config.get("model_name", "llama3.2"))
123
170
  elif provider == "local_server":
124
- config["model_name"] = Prompt.ask("Local model name (optional)", default=config.get("model_name", "local-model"))
125
- config["api_keys"]["local_url"] = Prompt.ask("Local Server Base URL", default="http://localhost:1234/v1")
171
+ console.print(Panel(
172
+ "[bold]Local inference server setup[/bold]\n\n"
173
+ "CoreInsight talks to any OpenAI-compatible local server.\n"
174
+ "Choose the option that matches how you loaded your weights:\n\n"
175
+ "[bold cyan]Option A — GGUF weights (llama.cpp):[/bold cyan]\n"
176
+ " pip install llama-cpp-python\\[server]\n"
177
+ " python -m llama_cpp.server --model your_model.gguf --port 1234\n\n"
178
+ "[bold cyan]Option B — PyTorch / HuggingFace weights (vLLM):[/bold cyan]\n"
179
+ " pip install vllm\n"
180
+ " python -m vllm.entrypoints.openai.api_server \\\\\n"
181
+ " --model /path/to/weights --port 1234\n\n"
182
+ "[bold cyan]Option C — LM Studio (GUI, easiest):[/bold cyan]\n"
183
+ " 1. Load your model in LM Studio\n"
184
+ " 2. Click [bold]Start Server[/bold] (defaults to localhost:1234)\n"
185
+ " 3. Enter the URL below\n\n"
186
+ "[dim]All three expose an OpenAI-compatible API on the URL you provide.[/dim]",
187
+ title="⚙️ Local Inference Server",
188
+ border_style="cyan",
189
+ ))
190
+ config["model_name"] = Prompt.ask(
191
+ "Model name (shown in server logs, or 'local-model')",
192
+ default=config.get("model_name", "local-model"),
193
+ )
194
+ config["api_keys"]["local_url"] = Prompt.ask(
195
+ "Server base URL",
196
+ default=config.get("api_keys", {}).get("local_url", "http://localhost:1234/v1"),
197
+ )
126
198
  elif provider == "openai":
127
199
  config["model_name"] = Prompt.ask("OpenAI model name", default="gpt-4o")
128
200
  config["api_keys"]["openai"] = Prompt.ask("OpenAI API Key (hidden)", password=True)