PyPI - coreinsight-cli - Versions diffs - 0.3.2__tar.gz → 0.3.3__tar.gz - Mend

coreinsight-cli 0.3.2tar.gz → 0.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{coreinsight_cli-0.3.2/coreinsight_cli.egg-info → coreinsight_cli-0.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coreinsight-cli
-Version: 0.3.2
+Version: 0.3.3
 Summary: Local-first AI performance profiler that mathematically verifies optimizations for Python, C++, and CUDA
 Author: Varun Jani
 License: GPL-3.0-or-later
@@ -20,20 +20,30 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: rich>=13.0
+Requires-Dist: textual>=0.60.0
+Requires-Dist: psutil>=5.9
+Requires-Dist: pydantic>=2.0
 Requires-Dist: docker>=6.0
-Requires-Dist: tree-sitter==0.21.3
-Requires-Dist: tree-sitter-languages
-Requires-Dist: langchain>=0.2.0
 Requires-Dist: langchain-core>=0.2.0
+Requires-Dist: langchain>=0.2.0
 Requires-Dist: langchain-ollama>=0.1.0
-Requires-Dist: langchain-google-genai>=1.0.0
 Requires-Dist: langchain-openai>=0.1.0
-Requires-Dist: langchain-anthropic>=0.1.0
-Requires-Dist: pydantic>=2.0
-Requires-Dist: chromadb>=0.5.0
-Requires-Dist: sentence-transformers>=3.0.0
-Requires-Dist: textual>=0.60.0
-Requires-Dist: psutil>=5.9
+Requires-Dist: tree-sitter==0.21.3
+Requires-Dist: tree-sitter-languages
+Provides-Extra: openai
+Provides-Extra: google
+Requires-Dist: langchain-google-genai>=1.0.0; extra == "google"
+Provides-Extra: anthropic
+Requires-Dist: langchain-anthropic>=0.1.0; extra == "anthropic"
+Provides-Extra: memory
+Requires-Dist: chromadb>=0.5.0; extra == "memory"
+Requires-Dist: sentence-transformers>=3.0.0; extra == "memory"
+Provides-Extra: cloud
+Requires-Dist: langchain-openai>=0.1.0; extra == "cloud"
+Requires-Dist: langchain-google-genai>=1.0.0; extra == "cloud"
+Requires-Dist: langchain-anthropic>=0.1.0; extra == "cloud"
+Provides-Extra: all
+Requires-Dist: coreinsight-cli[cloud,memory]; extra == "all"
 Provides-Extra: compat
 Requires-Dist: pysqlite3-binary>=0.5.0; extra == "compat"
 Dynamic: license-file
@@ -49,7 +59,23 @@ CoreInsight finds hardware bottlenecks in your code, generates optimized replace
 ## Install
 ```bash
+# OpenAI key - quick install
+pip install coreinsight-cli[openai]
+# Gemini key - quick install
+pip install coreinsight-cli[google]
+# Claude key - quick install
+pip install coreinsight-cli[anthropic]
+# Local Ollama install
 pip install coreinsight-cli
+# Memory and additional usage install
+pip install coreinsight-cli[openai,memory]
+# Install everything
+pip install coreinsight-cli[all]
 ```
 **Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)

{coreinsight_cli-0.3.2 → coreinsight_cli-0.3.3}/README.md RENAMED Viewed

@@ -9,7 +9,23 @@ CoreInsight finds hardware bottlenecks in your code, generates optimized replace
 ## Install
 ```bash
+# OpenAI key - quick install
+pip install coreinsight-cli[openai]
+# Gemini key - quick install
+pip install coreinsight-cli[google]
+# Claude key - quick install
+pip install coreinsight-cli[anthropic]
+# Local Ollama install
 pip install coreinsight-cli
+# Memory and additional usage install
+pip install coreinsight-cli[openai,memory]
+# Install everything
+pip install coreinsight-cli[all]
 ```
 **Requirements:** Python 3.9+ · Docker Desktop · [Ollama](https://ollama.com/download) (for local inference)

{coreinsight_cli-0.3.2 → coreinsight_cli-0.3.3}/coreinsight/analyzer.py RENAMED Viewed

@@ -8,11 +8,11 @@ from langchain_core.prompts import PromptTemplate
 from langchain_core.exceptions import OutputParserException
 from langchain_ollama import ChatOllama
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_openai import ChatOpenAI
-from langchain_anthropic import ChatAnthropic
-from coreinsight.prompts import SYSTEM_PROMPT, ANALYSIS_TEMPLATE, HARNESS_ADDENDUM
+from coreinsight.prompts import (
+    SYSTEM_PROMPT, ANALYSIS_TEMPLATE, HARNESS_ADDENDUM,
+    _HARNESS_TEMPLATE, _FIX_TEMPLATE, _TEST_CASES_TEMPLATE,
+)
 # Phrases that appear at the start of a truncated LLM response
 _TRUNCATION_HINTS = (
@@ -99,117 +99,6 @@ class AuditResult(BaseModel):
     optimized_code: Optional[str] = Field(description="The entirely rewritten optimized code, ready to drop in", default=None)
-_HARNESS_TEMPLATE = """
-You are a strict QA engineer writing a standalone asymptotic scaling benchmark script in {language}.
-ORIGINAL FUNCTION (Name: {func_name}):
-{original}
-OPTIMIZED FUNCTION:
-{optimized}
-GLOBAL DEPENDENCIES (Helper functions/structs required to run the code):
-{context}
-Write the complete executable script (e.g., `int main()` or `if __name__ == "__main__":`) that:
-1. Includes necessary imports/headers.
-2. Includes ALL required helper functions or structs from GLOBAL DEPENDENCIES so the script is fully standalone.
-3. Defines BOTH the original and optimized functions exactly as provided above.
-4. Tests multiple data sizes (e.g., N=10, 100, 1000, 5000).
-5. Target Hardware: {hardware_target}. The largest N MUST cross cache boundaries but MUST NOT exceed 20% of available RAM to prevent OOM crashes.
-6. Initializes realistic dummy data for each size N.
-7. Times execution of original vs optimized using high-resolution timers.
-CRITICAL TIMING:
-- Python: use `time.perf_counter()`. C++: use `std::chrono::high_resolution_clock`.
-- Clamp: `orig_time = max(end - start, 1e-9)` to prevent zero-division.
-- Speedup: `speedup = orig_time / opt_time`.
-ISOLATION RULES (CRITICAL):
-- This runs in an empty Docker container. NO local files exist.
-- DO NOT use local imports. Define everything inline.
-- DO NOT rename the original function — call it exactly `{func_name}`.
-OUTPUT FORMAT (CRITICAL):
-Print ONLY this exact CSV to stdout, no other text:
-N,Original_Time,Optimized_Time,Speedup
-10,0.002,0.001,2.00
-[PYTHON ONLY]: Also import matplotlib, plot results, and save as `benchmark_plot.png`.
-FORMATTING RULE: Wrap your ENTIRE script in a single markdown code block. No text before or after.
-"""
-_FIX_TEMPLATE = """
-You are an expert {language} developer. Your previous benchmark script FAILED in an isolated sandbox.
-ORIGINAL FUNCTION (Name: {func_name}):
-{original}
-GLOBAL DEPENDENCIES:
-{context}
-YOUR FAILED SCRIPT:
-{bad_harness}
-EXECUTION ERROR LOGS:
-{error_logs}
-ISOLATION CONSTRAINTS (CRITICAL):
-- Empty Docker container. No local files. NO local imports.
-- Define `{func_name}` and all GLOBAL DEPENDENCIES inline.
-FIX INSTRUCTIONS:
-1. Diagnose the failure from the error logs above.
-2. Fix imports, NameErrors, type mismatches, infinite loops, or OOM issues.
-3. Maintain the CSV stdout format exactly: N,Original_Time,Optimized_Time,Speedup
-4. Use high-resolution timers and clamp with `max(t, 1e-9)`.
-5. [PYTHON ONLY]: Save benchmark plot as `benchmark_plot.png`.
-FORMATTING RULE: Wrap your ENTIRE fixed script in a single markdown code block. No text before or after.
-"""
-_TEST_CASES_TEMPLATE = """
-You are a QA engineer writing correctness test cases for a function.
-FUNCTION NAME: {func_name}
-LANGUAGE: {language}
-FUNCTION SIGNATURE AND BODY:
-{original}
-GLOBAL DEPENDENCIES (helper functions / structs this function relies on):
-{context}
-Your task: generate {num_cases} diverse test cases that call `{func_name}` with different
-arguments. The cases must cover:
-  - Small inputs (N ~ 10)
-  - Medium inputs (N ~ 100-500)
-  - Edge cases: empty collections, single-element, all-zeros, negative values (where applicable)
-  - Boundary conditions specific to this function's logic
-OUTPUT FORMAT — respond with ONLY a valid JSON array, nothing else. No markdown fences,
-no explanation. Each element must be a JSON object with exactly two keys:
-  "args"  : a JSON array of positional arguments (use only JSON-serialisable types:
-            numbers, strings, booleans, arrays, objects — NO numpy, NO bytes)
-  "kwargs": a JSON object of keyword arguments (may be empty {{}})
-Example (do NOT copy this — generate cases specific to {func_name}):
-[
-  {{"args": [[1, 2, 3]], "kwargs": {{}}}},
-  {{"args": [[]], "kwargs": {{}}}},
-  {{"args": [[9, -1, 4, 0, 7]], "kwargs": {{"reverse": true}}}}
-]
-CONSTRAINTS:
-- All values must be plain JSON types — no numpy arrays, no custom objects.
-- If the function operates on a matrix, represent it as a list-of-lists.
-- If the function takes a size integer N, generate concrete data of that size inline.
-- Do NOT include function calls or expressions — only literal values.
-- Produce exactly {num_cases} test cases.
-"""
 class AnalyzerAgent:
     def __init__(self, provider="ollama", model_name="llama3.2", api_keys=None, model_tier="large"):
         self.model_tier = model_tier
@@ -217,70 +106,15 @@ class AnalyzerAgent:
         self.provider = provider
         api_keys = api_keys or {}
-        if provider == "openai":
-            if not api_keys.get("openai"):
-                raise ValueError("OpenAI API Key required.")
-            self.base_llm = ChatOpenAI(
-                model=model_name,
-                api_key=api_keys["openai"],
-                temperature=0.1,
-                model_kwargs={"response_format": {"type": "json_object"}},
-            )
-            self.json_llm = self.base_llm
+        # Reuse shared LLM factory — handles lazy imports and provider validation
+        from coreinsight.prompts import ModelTier
+        if provider == "ollama":
+            api_keys["_ctx"]     = 4096 if model_tier == ModelTier.SMALL else 8192
+            api_keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
         elif provider == "local_server":
-            from coreinsight.prompts import ModelTier
-            base_url   = api_keys.get("local_url", "http://localhost:1234/v1")
-            _max_tokens = 2048 if model_tier == ModelTier.SMALL else 4096
-            self.base_llm = ChatOpenAI(
-                model=model_name,
-                api_key="not-needed",
-                base_url=base_url,
-                temperature=0.1,
-                max_tokens=_max_tokens,
-                model_kwargs={"response_format": {"type": "json_object"}},
-            )
-            self.json_llm = self.base_llm
-        elif provider == "anthropic":
-            if not api_keys.get("anthropic"):
-                raise ValueError("Anthropic API Key required.")
-            self.base_llm = ChatAnthropic(
-                model=model_name,
-                api_key=api_keys["anthropic"],
-                temperature=0.1,
-            )
-            # Anthropic doesn't support response_format; JSON is enforced via prompt only
-            self.json_llm = self.base_llm
-        elif provider == "google":
-            if not api_keys.get("google"):
-                raise ValueError("Google Gemini API Key required.")
-            self.base_llm = ChatGoogleGenerativeAI(
-                model=model_name,
-                google_api_key=api_keys["google"],
-                temperature=0.1,
-                convert_system_message_to_human=True,
-            )
-            self.json_llm = self.base_llm
-        else:  # Ollama default
-            from coreinsight.prompts import ModelTier
-            # Small models (7B) typically have 4096 native context.
-            # Asking for more causes silent degradation or OOM on the host.
-            # Medium/large local models can handle 8192 comfortably.
-            _ctx = 4096 if model_tier == ModelTier.SMALL else 8192
-            # num_predict: small models need room for JSON + code in one shot.
-            # Capping at 2048 for small prevents runaway generation that hits
-            # the limit mid-JSON and returns truncated garbage.
-            _predict = 2048 if model_tier == ModelTier.SMALL else 4096
-            self.base_llm = ChatOllama(
-                model=model_name,
-                temperature=0.1,
-                num_predict=_predict,
-                num_ctx=_ctx,
-            )
-            self.json_llm = self.base_llm.bind(format="json")
+            api_keys["_predict"] = 2048 if model_tier == ModelTier.SMALL else 4096
+        self.base_llm, self.json_llm = _build_llm(provider, model_name, api_keys)
         self.prompt = PromptTemplate(
             template=ANALYSIS_TEMPLATE + "\n\n{format_instructions}",
@@ -556,16 +390,17 @@ class AnalyzerAgent:
 # ---------------------------------------------------------------------------
 def _build_llm(provider: str, model_name: str, api_keys: dict):
-    """
-    Shared LLM factory for all multi-agent classes.
-    Returns (base_llm, json_llm) — same pattern as AnalyzerAgent.__init__.
-    Raises ValueError on missing credentials.
-    """
     api_keys = api_keys or {}
     if provider == "openai":
         if not api_keys.get("openai"):
             raise ValueError("OpenAI API key required.")
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            raise ImportError(
+                "OpenAI provider requires: pip install coreinsight-cli[openai]"
+            )
         llm = ChatOpenAI(
             model=model_name,
             api_key=api_keys["openai"],
@@ -575,8 +410,14 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
         return llm, llm
     if provider == "local_server":
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            raise ImportError(
+                "local_server provider requires: pip install coreinsight-cli[openai]"
+            )
         base_url    = api_keys.get("local_url", "http://localhost:1234/v1")
-        _max_tokens = api_keys.pop("_predict", 4096)  # reuse same key as Ollama path
+        _max_tokens = api_keys.pop("_predict", 4096)
         llm = ChatOpenAI(
             model=model_name,
             api_key="not-needed",
@@ -590,6 +431,12 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
     if provider == "anthropic":
         if not api_keys.get("anthropic"):
             raise ValueError("Anthropic API key required.")
+        try:
+            from langchain_anthropic import ChatAnthropic
+        except ImportError:
+            raise ImportError(
+                "Anthropic provider requires: pip install coreinsight-cli[anthropic]"
+            )
         llm = ChatAnthropic(
             model=model_name,
             api_key=api_keys["anthropic"],
@@ -600,6 +447,12 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
     if provider == "google":
         if not api_keys.get("google"):
             raise ValueError("Google Gemini API key required.")
+        try:
+            from langchain_google_genai import ChatGoogleGenerativeAI
+        except ImportError:
+            raise ImportError(
+                "Google provider requires: pip install coreinsight-cli[google]"
+            )
         llm = ChatGoogleGenerativeAI(
             model=model_name,
             google_api_key=api_keys["google"],
@@ -608,9 +461,7 @@ def _build_llm(provider: str, model_name: str, api_keys: dict):
         )
         return llm, llm
-    # Ollama default — context and predict budget are passed in from the
-    # calling agent which knows its own model_tier.
-    # Default to medium-safe values; callers override via kwargs if needed.
+    # Ollama default
     _ctx     = api_keys.pop("_ctx",     8192)
     _predict = api_keys.pop("_predict", 4096)
     base = ChatOllama(
@@ -650,13 +501,13 @@ class BottleneckAgent:
         api_keys:   dict,
         model_tier: str,
     ) -> None:
-        from coreinsight.prompts import BOTTLENECK_TEMPLATE, SYSTEM_PROMPT
+        from coreinsight.prompts import BOTTLENECK_TEMPLATES, SYSTEM_PROMPT
         self.model_tier = model_tier
         self.parser     = JsonOutputParser(pydantic_object=AuditResult)
         self._base_llm, self._json_llm = _build_llm_tiered(provider, model_name, api_keys, model_tier)
         self._prompt = PromptTemplate(
-            template=BOTTLENECK_TEMPLATE,
+            template=BOTTLENECK_TEMPLATES[model_tier],
             input_variables=[
                 "language", "code_content", "context", "hardware_target",
             ],
@@ -736,10 +587,10 @@ class OptimizerAgent:
         api_keys:   dict,
         model_tier: str,
     ) -> None:
-        from coreinsight.prompts import OPTIMIZER_TEMPLATE
+        from coreinsight.prompts import OPTIMIZER_TEMPLATES
         self.model_tier = model_tier
         self._base_llm, _ = _build_llm_tiered(provider, model_name, api_keys, model_tier)
-        self._template = OPTIMIZER_TEMPLATE
+        self._template = OPTIMIZER_TEMPLATES[model_tier]
     def _extract_code(self, raw: str) -> str:
         """Reuse the same extraction logic as AnalyzerAgent."""
@@ -898,7 +749,13 @@ class HarnessAgent:
         except Exception as e:
             return False, f"Harness generation failed: {e}", None, 0
-        success, logs, plot_data = sandbox.execute_benchmark(harness, language)
+        # Catch missing int main() before hitting the sandbox
+        if language in ("cpp", "c++") and "int main(" not in harness and "int main (" not in harness:
+            logs     = "Missing CSV output (exit 1).\nFull output:\nundefined reference to `main'"
+            success  = False
+            plot_data = None
+        else:
+            success, logs, plot_data = sandbox.execute_benchmark(harness, language)
         is_valid  = self._check_speedup(success, logs)
         retries   = 0
@@ -921,7 +778,12 @@ class HarnessAgent:
                 logs += f"\nFix generation failed: {e}"
                 break
-            success, logs, plot_data = sandbox.execute_benchmark(harness, language)
+            if language in ("cpp", "c++") and "int main(" not in harness and "int main (" not in harness:
+                logs      = "Missing CSV output (exit 1).\nFull output:\nundefined reference to `main'"
+                success   = False
+                plot_data = None
+            else:
+                success, logs, plot_data = sandbox.execute_benchmark(harness, language)
             is_valid = self._check_speedup(success, logs)
             retries += 1

{coreinsight_cli-0.3.2 → coreinsight_cli-0.3.3}/coreinsight/main.py RENAMED Viewed

@@ -161,7 +161,6 @@ def _run_multi_agent(
     optimized_code = multi_agents["optimizer"].generate(
         func_name, original_code, result,
         language, context, hardware_target,
-        stream_callback=stream_callback,  # readable code, stream it
     )
     if not optimized_code or optimized_code == original_code:
         return result, None, False, "", None, False
@@ -875,7 +874,8 @@ def run_analysis(file_path: str, no_docker: bool = False, tui_console=None, stre
                         elif "out of memory" in exc_low or "oom" in exc_low:
                             console.print(f"[bold yellow]⚠️  {func['name']}: Sandbox ran out of memory.[/bold yellow]")
                         else:
-                            console.print(f"[bold red]❌ {func['name']}: Unexpected error — {exc}[/bold red]")
+                            from rich.markup import escape
+                            console.print(f"[bold red]❌ {func['name']}: Unexpected error — {escape(str(exc))}[/bold red]")
         console.print(Panel.fit(f"✅ [bold green]Analysis Complete![/bold green] Final report saved to:\n{report_path.absolute()}"))
@@ -1029,7 +1029,8 @@ def _run_test_cmd(func_name: str, no_docker: bool = False):
                 num_cases=tier_limits["num_test_cases"],
             )
         except Exception as exc:
-            console.print(f"[red]LLM error generating test cases: {exc}[/red]")
+            from rich.markup import escape
+            console.print(f"[red]LLM error generating test cases: {escape(str(exc))}[/red]")
             return
         if not test_cases:
@@ -1146,7 +1147,8 @@ def _run_memory_cmd(clear: bool, export_path: str = None, export_fmt: str = "csv
         metadatas   = all_records.get("metadatas", []) or []
         ids         = all_records.get("ids",       []) or []
     except Exception as exc:
-        console.print(f"[red]Failed to read memory store: {exc}[/red]")
+        from rich.markup import escape
+        console.print(f"[red]Failed to read memory store: {escape(str(exc))}[/red]")
         return
     # Build the detail table

{coreinsight_cli-0.3.2 → coreinsight_cli-0.3.3}/coreinsight/memory.py RENAMED Viewed

@@ -85,9 +85,10 @@ class OptimizationMemory:
                 import chromadb
             except Exception as sqlite_exc:
                 self._init_error = (
-                    f"ChromaDB unavailable (likely outdated SQLite): {sqlite_exc}. "
+                    f"ChromaDB unavailable: {sqlite_exc}. "
                     "Optimization memory disabled. "
-                    "Fix: pip install coreinsight-cli[compat]"
+                    "Fix: pip install coreinsight-cli[memory] "
+                    "(or [compat] if you have an outdated SQLite)"
                 )
                 return False

coreinsight_cli-0.3.3/coreinsight/prompts/__init__.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""
+coreinsight/prompts/__init__.py
+Public API — all existing imports continue to work unchanged.
+"""
+from coreinsight.prompts._base import ModelTier, SYSTEM_PROMPT
+from coreinsight.prompts.bottleneck import (
+    BOTTLENECK_TEMPLATE,
+    BOTTLENECK_TEMPLATE_SMALL,
+    BOTTLENECK_TEMPLATE_MEDIUM,
+    BOTTLENECK_TEMPLATE_LARGE,
+    BOTTLENECK_TEMPLATES,
+)
+from coreinsight.prompts.optimizer import (
+    OPTIMIZER_TEMPLATE,
+    OPTIMIZER_TEMPLATE_SMALL,
+    OPTIMIZER_TEMPLATE_MEDIUM,
+    OPTIMIZER_TEMPLATE_LARGE,
+    OPTIMIZER_TEMPLATES,
+)
+from coreinsight.prompts.harness import (
+    ANALYSIS_TEMPLATE,
+    HARNESS_TEMPLATE,
+    FIX_TEMPLATE,
+    HARNESS_TEMPLATE_MULTI,
+    FIX_TEMPLATE_MULTI,
+    HARNESS_ADDENDUM,
+    HARNESS_ADDENDUM_MULTI,
+)
+from coreinsight.prompts.test_cases import TEST_CASES_TEMPLATE
+# Backward-compatible alias — analyzer.py uses _TEST_CASES_TEMPLATE
+_TEST_CASES_TEMPLATE = TEST_CASES_TEMPLATE
+__all__ = [
+    "ModelTier",
+    "SYSTEM_PROMPT",
+    "BOTTLENECK_TEMPLATE",
+    "BOTTLENECK_TEMPLATE_SMALL",
+    "BOTTLENECK_TEMPLATE_MEDIUM",
+    "BOTTLENECK_TEMPLATE_LARGE",
+    "BOTTLENECK_TEMPLATES",
+    "OPTIMIZER_TEMPLATE",
+    "OPTIMIZER_TEMPLATE_SMALL",
+    "OPTIMIZER_TEMPLATE_MEDIUM",
+    "OPTIMIZER_TEMPLATE_LARGE",
+    "OPTIMIZER_TEMPLATES",
+    "HARNESS_TEMPLATE_MULTI",
+    "FIX_TEMPLATE_MULTI",
+    "HARNESS_ADDENDUM",
+    "HARNESS_ADDENDUM_MULTI",
+    "TEST_CASES_TEMPLATE",
+    "_TEST_CASES_TEMPLATE",
+]
+# Backward-compatible private aliases used in analyzer.py
+_HARNESS_TEMPLATE = HARNESS_TEMPLATE
+_FIX_TEMPLATE     = FIX_TEMPLATE
+_TEST_CASES_TEMPLATE = TEST_CASES_TEMPLATE

coreinsight_cli-0.3.3/coreinsight/prompts/_base.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+coreinsight/prompts/_base.py
+Shared constants used across all prompt modules.
+"""
+class ModelTier:
+    SMALL  = "small"   # 7B and under: codellama:7b, llama3.2:3b
+    MEDIUM = "medium"  # 13B-34B: mistral, codellama:13b, local_server models
+    LARGE  = "large"   # 70B+, cloud: GPT-4, Claude, Gemini
+SYSTEM_PROMPT = """
+You are a Senior HPC Performance Engineer, an elite, strict HPC Performance Architect, an elite Algorithmic Expert, and a strict Code Reviewer.
+Your goal is to optimize Python, C++, and CUDA code for maximum throughput and low latency, and perfect hardware utilization.
+You know that the greatest hardware bottleneck is a mathematically inefficient algorithm. You ruthlessly identify O(N^2) nested loops, memory inefficiencies, performance bottlenecks and suboptimal data structures, upgrading them to O(N) or O(1) solutions using vectorization, hash maps, or low-level C-backed libraries.
+"""

coreinsight-cli 0.3.2__tar.gz → 0.3.3__tar.gz

coreinsight-cli 0.3.2tar.gz → 0.3.3tar.gz