PyPI - npcpy - Versions diffs - 1.2.34__py3-none-any.whl → 1.2.36__py3-none-any.whl - Mend

npcpy 1.2.34py3-none-any.whl → 1.2.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

npcpy/data/audio.py +35 -1
npcpy/data/load.py +149 -7
npcpy/data/video.py +72 -0
npcpy/ft/diff.py +332 -71
npcpy/gen/image_gen.py +120 -23
npcpy/gen/ocr.py +187 -0
npcpy/memory/command_history.py +231 -40
npcpy/npc_compiler.py +64 -22
npcpy/serve.py +1712 -607
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/METADATA +1 -1
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/RECORD +14 -13
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/WHEEL +0 -0
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/licenses/LICENSE +0 -0
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/top_level.txt +0 -0

npcpy/serve.py CHANGED Viewed

@@ -8,6 +8,10 @@ import sys
 import traceback
 import glob
 import re
+import time
+import asyncio
+from typing import Optional, List, Dict, Callable, Any
+from contextlib import AsyncExitStack
 import io
 from flask_cors import CORS
@@ -17,6 +21,8 @@ import json
 from pathlib import Path
 import yaml
 from dotenv import load_dotenv
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
 from PIL import Image
 from PIL import ImageFile
@@ -43,13 +49,14 @@ from npcpy.memory.knowledge_graph import load_kg_from_db
 from npcpy.memory.search import execute_rag_command, execute_brainblast_command
 from npcpy.data.load import load_file_contents
 from npcpy.data.web import search_web
 from npcsh._state import get_relevant_memories, search_kg_facts
 import base64
 import shutil
 import uuid
-from npcpy.llm_funcs import gen_image
+from npcpy.llm_funcs import gen_image, breathe
 from sqlalchemy import create_engine, text
 from sqlalchemy.orm import sessionmaker
@@ -60,14 +67,12 @@ from npcpy.memory.command_history import (
     save_conversation_message,
     generate_message_id,
 )
-from npcpy.npc_compiler import  Jinx, NPC, Team
+from npcpy.npc_compiler import  Jinx, NPC, Team, load_jinxs_from_directory, build_jinx_tool_catalog
 from npcpy.llm_funcs import (
     get_llm_response, check_llm_command
 )
-from npcpy.npc_compiler import NPC
-import base64
+from termcolor import cprint
 from npcpy.tools import auto_tools
 import json
@@ -84,6 +89,235 @@ cancellation_flags = {}
 cancellation_lock = threading.Lock()
+# Minimal MCP client (inlined from npcsh corca to avoid corca import)
+class MCPClientNPC:
+    def __init__(self, debug: bool = True):
+        self.debug = debug
+        self.session: Optional[ClientSession] = None
+        try:
+            self._loop = asyncio.get_event_loop()
+            if self._loop.is_closed():
+                self._loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(self._loop)
+        except RuntimeError:
+            self._loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._loop)
+        self._exit_stack = self._loop.run_until_complete(AsyncExitStack().__aenter__())
+        self.available_tools_llm: List[Dict[str, Any]] = []
+        self.tool_map: Dict[str, Callable] = {}
+        self.server_script_path: Optional[str] = None
+    def _log(self, message: str, color: str = "cyan") -> None:
+        if self.debug:
+            cprint(f"[MCP Client] {message}", color, file=sys.stderr)
+    async def _connect_async(self, server_script_path: str) -> None:
+        self._log(f"Attempting to connect to MCP server: {server_script_path}")
+        self.server_script_path = server_script_path
+        abs_path = os.path.abspath(server_script_path)
+        if not os.path.exists(abs_path):
+            raise FileNotFoundError(f"MCP server script not found: {abs_path}")
+        if abs_path.endswith('.py'):
+            cmd_parts = [sys.executable, abs_path]
+        elif os.access(abs_path, os.X_OK):
+            cmd_parts = [abs_path]
+        else:
+            raise ValueError(f"Unsupported MCP server script type or not executable: {abs_path}")
+        server_params = StdioServerParameters(
+            command=cmd_parts[0],
+            args=[abs_path],
+            env=os.environ.copy(),
+            cwd=os.path.dirname(abs_path) or "."
+        )
+        if self.session:
+            await self._exit_stack.aclose()
+        self._exit_stack = AsyncExitStack()
+        stdio_transport = await self._exit_stack.enter_async_context(stdio_client(server_params))
+        self.session = await self._exit_stack.enter_async_context(ClientSession(*stdio_transport))
+        await self.session.initialize()
+        response = await self.session.list_tools()
+        self.available_tools_llm = []
+        self.tool_map = {}
+        if response.tools:
+            for mcp_tool in response.tools:
+                tool_def = {
+                    "type": "function",
+                    "function": {
+                        "name": mcp_tool.name,
+                        "description": mcp_tool.description or f"MCP tool: {mcp_tool.name}",
+                        "parameters": getattr(mcp_tool, "inputSchema", {"type": "object", "properties": {}})
+                    }
+                }
+                self.available_tools_llm.append(tool_def)
+                def make_tool_func(tool_name_closure):
+                    async def tool_func(**kwargs):
+                        if not self.session:
+                            return {"error": "No MCP session"}
+                        self._log(f"About to call MCP tool {tool_name_closure}")
+                        try:
+                            cleaned_kwargs = {k: (None if v == 'None' else v) for k, v in kwargs.items()}
+                            result = await asyncio.wait_for(
+                                self.session.call_tool(tool_name_closure, cleaned_kwargs),
+                                timeout=30.0
+                            )
+                            self._log(f"MCP tool {tool_name_closure} returned: {type(result)}")
+                            return result
+                        except asyncio.TimeoutError:
+                            self._log(f"Tool {tool_name_closure} timed out after 30 seconds", "red")
+                            return {"error": f"Tool {tool_name_closure} timed out"}
+                        except Exception as e:
+                            self._log(f"Tool {tool_name_closure} error: {e}", "red")
+                            return {"error": str(e)}
+                    def sync_wrapper(**kwargs):
+                        self._log(f"Sync wrapper called for {tool_name_closure}")
+                        return self._loop.run_until_complete(tool_func(**kwargs))
+                    return sync_wrapper
+                self.tool_map[mcp_tool.name] = make_tool_func(mcp_tool.name)
+        tool_names = list(self.tool_map.keys())
+        self._log(f"Connection successful. Tools: {', '.join(tool_names) if tool_names else 'None'}")
+    def connect_sync(self, server_script_path: str) -> bool:
+        loop = self._loop
+        if loop.is_closed():
+            self._loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._loop)
+            loop = self._loop
+        try:
+            loop.run_until_complete(self._connect_async(server_script_path))
+            return True
+        except Exception as e:
+            cprint(f"MCP connection failed: {e}", "red", file=sys.stderr)
+            return False
+    def disconnect_sync(self):
+        if self.session:
+            self._log("Disconnecting MCP session.")
+            loop = self._loop
+            if not loop.is_closed():
+                try:
+                    async def close_session():
+                        await self.session.close()
+                        await self._exit_stack.aclose()
+                    loop.run_until_complete(close_session())
+                except RuntimeError:
+                    pass
+                except Exception as e:
+                    print(f"Error during MCP client disconnect: {e}", file=sys.stderr)
+            self.session = None
+            self._exit_stack = None
+def get_llm_response_with_handling(prompt, npc, messages, tools, stream, team, context=None):
+    """Unified LLM response with basic exception handling (inlined from corca to avoid that dependency)."""
+    try:
+        return get_llm_response(
+            prompt=prompt,
+            npc=npc,
+            messages=messages,
+            tools=tools,
+            auto_process_tool_calls=False,
+            stream=stream,
+            team=team,
+            context=context
+        )
+    except Exception:
+        # Fallback retry without context compression logic to keep it simple here.
+        return get_llm_response(
+            prompt=prompt,
+            npc=npc,
+            messages=messages,
+            tools=tools,
+            auto_process_tool_calls=False,
+            stream=stream,
+            team=team,
+            context=context
+        )
+class MCPServerManager:
+    """
+    Simple in-process tracker for launching/stopping MCP servers.
+    Currently uses subprocess.Popen to start a Python stdio MCP server script.
+    """
+    def __init__(self):
+        self._procs = {}
+        self._lock = threading.Lock()
+    def start(self, server_path: str):
+        server_path = os.path.expanduser(server_path)
+        abs_path = os.path.abspath(server_path)
+        if not os.path.exists(abs_path):
+            raise FileNotFoundError(f"MCP server script not found at {abs_path}")
+        with self._lock:
+            existing = self._procs.get(abs_path)
+            if existing and existing.poll() is None:
+                return {"status": "running", "pid": existing.pid, "serverPath": abs_path}
+            cmd = [sys.executable, abs_path]
+            proc = subprocess.Popen(
+                cmd,
+                cwd=os.path.dirname(abs_path) or ".",
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            self._procs[abs_path] = proc
+            return {"status": "started", "pid": proc.pid, "serverPath": abs_path}
+    def stop(self, server_path: str):
+        server_path = os.path.expanduser(server_path)
+        abs_path = os.path.abspath(server_path)
+        with self._lock:
+            proc = self._procs.get(abs_path)
+            if not proc:
+                return {"status": "not_found", "serverPath": abs_path}
+            if proc.poll() is None:
+                proc.terminate()
+                try:
+                    proc.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    proc.kill()
+            del self._procs[abs_path]
+            return {"status": "stopped", "serverPath": abs_path}
+    def status(self, server_path: str):
+        server_path = os.path.expanduser(server_path)
+        abs_path = os.path.abspath(server_path)
+        with self._lock:
+            proc = self._procs.get(abs_path)
+            if not proc:
+                return {"status": "not_started", "serverPath": abs_path}
+            running = proc.poll() is None
+            return {
+                "status": "running" if running else "exited",
+                "serverPath": abs_path,
+                "pid": proc.pid,
+                "returncode": None if running else proc.returncode,
+            }
+    def running(self):
+        with self._lock:
+            return {
+                path: {
+                    "pid": proc.pid,
+                    "status": "running" if proc.poll() is None else "exited",
+                    "returncode": None if proc.poll() is None else proc.returncode,
+                }
+                for path, proc in self._procs.items()
+            }
+mcp_server_manager = MCPServerManager()
 def get_project_npc_directory(current_path=None):
     """
     Get the project NPC directory based on the current path
@@ -186,6 +420,34 @@ def get_db_session():
     Session = sessionmaker(bind=engine)
     return Session()
+def resolve_mcp_server_path(current_path=None, explicit_path=None, force_global=False):
+    """
+    Resolve an MCP server path using npcsh.corca's helper when available.
+    Falls back to ~/.npcsh/npc_team/mcp_server.py.
+    """
+    if explicit_path:
+        abs_path = os.path.abspath(os.path.expanduser(explicit_path))
+        if os.path.exists(abs_path):
+            return abs_path
+    try:
+        from npcsh.corca import _resolve_and_copy_mcp_server_path
+        resolved = _resolve_and_copy_mcp_server_path(
+            explicit_path=explicit_path,
+            current_path=current_path,
+            team_ctx_mcp_servers=None,
+            interactive=False,
+            auto_copy_bypass=True,
+            force_global=force_global,
+        )
+        if resolved:
+            return os.path.abspath(resolved)
+    except Exception as e:
+        print(f"resolve_mcp_server_path: fallback path due to error: {e}")
+    fallback = os.path.expanduser("~/.npcsh/npc_team/mcp_server.py")
+    return fallback
 extension_map = {
     "PNG": "images",
     "JPG": "images",
@@ -441,8 +703,6 @@ def capture():
         return None
     return jsonify({"screenshot": screenshot})
 @app.route("/api/settings/global", methods=["GET", "OPTIONS"])
 def get_global_settings():
     if request.method == "OPTIONS":
@@ -451,22 +711,22 @@ def get_global_settings():
     try:
         npcshrc_path = os.path.expanduser("~/.npcshrc")
         global_settings = {
             "model": "llama3.2",
             "provider": "ollama",
             "embedding_model": "nomic-embed-text",
             "embedding_provider": "ollama",
             "search_provider": "perplexity",
-            "NPC_STUDIO_LICENSE_KEY": "",
             "default_folder": os.path.expanduser("~/.npcsh/"),
+            "is_predictive_text_enabled": False, # Default value for the new setting
+            "predictive_text_model": "llama3.2", # Default predictive text model
+            "predictive_text_provider": "ollama", # Default predictive text provider
         }
         global_vars = {}
         if os.path.exists(npcshrc_path):
             with open(npcshrc_path, "r") as f:
                 for line in f:
                     line = line.split("#")[0].strip()
                     if not line:
                         continue
@@ -474,33 +734,35 @@ def get_global_settings():
                     if "=" not in line:
                         continue
                     key, value = line.split("=", 1)
                     key = key.strip()
                     if key.startswith("export "):
                         key = key[7:]
                     value = value.strip()
                     if value.startswith('"') and value.endswith('"'):
                         value = value[1:-1]
                     elif value.startswith("'") and value.endswith("'"):
                         value = value[1:-1]
                     key_mapping = {
                         "NPCSH_MODEL": "model",
                         "NPCSH_PROVIDER": "provider",
                         "NPCSH_EMBEDDING_MODEL": "embedding_model",
                         "NPCSH_EMBEDDING_PROVIDER": "embedding_provider",
                         "NPCSH_SEARCH_PROVIDER": "search_provider",
-                        "NPC_STUDIO_LICENSE_KEY": "NPC_STUDIO_LICENSE_KEY",
                         "NPCSH_STREAM_OUTPUT": "NPCSH_STREAM_OUTPUT",
                         "NPC_STUDIO_DEFAULT_FOLDER": "default_folder",
+                        "NPC_STUDIO_PREDICTIVE_TEXT_ENABLED": "is_predictive_text_enabled", # New mapping
+                        "NPC_STUDIO_PREDICTIVE_TEXT_MODEL": "predictive_text_model",         # New mapping
+                        "NPC_STUDIO_PREDICTIVE_TEXT_PROVIDER": "predictive_text_provider",   # New mapping
                     }
                     if key in key_mapping:
-                        global_settings[key_mapping[key]] = value
+                        if key == "NPC_STUDIO_PREDICTIVE_TEXT_ENABLED":
+                            global_settings[key_mapping[key]] = value.lower() == 'true'
+                        else:
+                            global_settings[key_mapping[key]] = value
                     else:
                         global_vars[key] = value
@@ -517,6 +779,7 @@ def get_global_settings():
     except Exception as e:
         print(f"Error in get_global_settings: {str(e)}")
         return jsonify({"error": str(e)}), 500
 def _get_jinx_files_recursively(directory):
     """Helper to recursively find all .jinx file paths."""
     jinx_paths = []
@@ -550,58 +813,7 @@ def get_available_jinxs():
         traceback.print_exc()
         return jsonify({'jinxs': [], 'error': str(e)}), 500
-@app.route('/api/jinxs/global', methods=['GET'])
-def get_global_jinxs():
-    global_jinxs_dir = os.path.expanduser('~/.npcsh/npc_team/jinxs')
-    # Directories to exclude entirely
-    excluded_dirs = ['core', 'npc_studio']
-    code_jinxs = []
-    mode_jinxs = []
-    util_jinxs = []
-    if os.path.exists(global_jinxs_dir):
-        for root, dirs, files in os.walk(global_jinxs_dir):
-            # Filter out excluded directories
-            dirs[:] = [d for d in dirs if d not in excluded_dirs]
-            for filename in files:
-                if filename.endswith('.jinx'):
-                    try:
-                        jinx_path = os.path.join(root, filename)
-                        with open(jinx_path, 'r') as f:
-                            jinx_data = yaml.safe_load(f)
-                        if jinx_data:
-                            jinx_name = jinx_data.get('jinx_name', filename[:-5])
-                            jinx_obj = {
-                                'name': jinx_name,
-                                'display_name': jinx_data.get('description', jinx_name),
-                                'description': jinx_data.get('description', ''),
-                                'inputs': jinx_data.get('inputs', []),
-                                'path': jinx_path
-                            }
-                            # Categorize based on directory
-                            rel_path = os.path.relpath(root, global_jinxs_dir)
-                            if rel_path.startswith('code'):
-                                code_jinxs.append(jinx_obj)
-                            elif rel_path.startswith('modes'):
-                                mode_jinxs.append(jinx_obj)
-                            elif rel_path.startswith('utils'):
-                                util_jinxs.append(jinx_obj)
-                    except Exception as e:
-                        print(f"Error loading jinx {filename}: {e}")
-    return jsonify({
-        'code': code_jinxs,
-        'modes': mode_jinxs,
-        'utils': util_jinxs
-    })
 @app.route("/api/jinx/execute", methods=["POST"])
 def execute_jinx():
     """
@@ -823,8 +1035,6 @@ def execute_jinx():
         return Response(final_output_string, mimetype="text/html")
     else:
         return Response(final_output_string, mimetype="text/plain")
 @app.route("/api/settings/global", methods=["POST", "OPTIONS"])
 def save_global_settings():
     if request.method == "OPTIONS":
@@ -840,35 +1050,41 @@ def save_global_settings():
             "embedding_model": "NPCSH_EMBEDDING_MODEL",
             "embedding_provider": "NPCSH_EMBEDDING_PROVIDER",
             "search_provider": "NPCSH_SEARCH_PROVIDER",
-            "NPC_STUDIO_LICENSE_KEY": "NPC_STUDIO_LICENSE_KEY",
             "NPCSH_STREAM_OUTPUT": "NPCSH_STREAM_OUTPUT",
             "default_folder": "NPC_STUDIO_DEFAULT_FOLDER",
+            "is_predictive_text_enabled": "NPC_STUDIO_PREDICTIVE_TEXT_ENABLED", # New mapping
+            "predictive_text_model": "NPC_STUDIO_PREDICTIVE_TEXT_MODEL",         # New mapping
+            "predictive_text_provider": "NPC_STUDIO_PREDICTIVE_TEXT_PROVIDER",   # New mapping
         }
         os.makedirs(os.path.dirname(npcshrc_path), exist_ok=True)
         print(data)
         with open(npcshrc_path, "w") as f:
             for key, value in data.get("global_settings", {}).items():
-                if key in key_mapping and value:
-                    if " " in str(value):
-                        value = f'"{value}"'
-                    f.write(f"export {key_mapping[key]}={value}\n")
+                if key in key_mapping and value is not None: # Check for None explicitly
+                    # Handle boolean conversion for saving
+                    if key == "is_predictive_text_enabled":
+                        value_to_write = str(value).upper()
+                    elif " " in str(value):
+                        value_to_write = f'"{value}"'
+                    else:
+                        value_to_write = str(value)
+                    f.write(f"export {key_mapping[key]}={value_to_write}\n")
             for key, value in data.get("global_vars", {}).items():
-                if key and value:
+                if key and value is not None: # Check for None explicitly
                     if " " in str(value):
-                        value = f'"{value}"'
-                    f.write(f"export {key}={value}\n")
+                        value_to_write = f'"{value}"'
+                    else:
+                        value_to_write = str(value)
+                    f.write(f"export {key}={value_to_write}\n")
         return jsonify({"message": "Global settings saved successfully", "error": None})
     except Exception as e:
         print(f"Error in save_global_settings: {str(e)}")
         return jsonify({"error": str(e)}), 500
 @app.route("/api/settings/project", methods=["GET", "OPTIONS"])
 def get_project_settings():
     if request.method == "OPTIONS":
@@ -1050,8 +1266,542 @@ def save_jinx():
         return jsonify({"status": "success"})
     except Exception as e:
         return jsonify({"error": str(e)}), 500
+def serialize_jinx_inputs(inputs):
+    result = []
+    for inp in inputs:
+        if isinstance(inp, str):
+            result.append(inp)
+        elif isinstance(inp, dict):
+            key = list(inp.keys())[0]
+            result.append(key)
+        else:
+            result.append(str(inp))
+    return result
+@app.route("/api/jinx/test", methods=["POST"])
+def test_jinx():
+    data = request.json
+    jinx_data = data.get("jinx")
+    test_inputs = data.get("inputs", {})
+    current_path = data.get("currentPath")
+    if current_path:
+        load_project_env(current_path)
+    jinx = Jinx(jinx_data=jinx_data)
+    from jinja2 import Environment
+    temp_env = Environment()
+    jinx.render_first_pass(temp_env, {})
+    conversation_id = f"jinx_test_{uuid.uuid4().hex[:8]}"
+    command_history = CommandHistory(app.config.get('DB_PATH'))
+    # 1. Save user's test command to conversation_history to get a message_id
+    user_test_command = f"Testing jinx /{jinx.jinx_name} with inputs: {test_inputs}"
+    user_message_id = generate_message_id()
+    save_conversation_message(
+        command_history,
+        conversation_id,
+        "user",
+        user_test_command,
+        wd=current_path,
+        model=None, # Or appropriate model/provider for the test context
+        provider=None,
+        npc=None,
+        message_id=user_message_id
+    )
+    # Jinx execution status and output are now part of the assistant's response
+    jinx_execution_status = "success"
+    jinx_error_message = None
+    output = "Jinx execution did not complete." # Default output
+    try:
+        result = jinx.execute(
+            input_values=test_inputs,
+            npc=None,
+            messages=[],
+            extra_globals={},
+            jinja_env=temp_env
+        )
+        output = result.get('output', str(result))
+        if result.get('error'): # Assuming jinx.execute might return an 'error' key
+            jinx_execution_status = "failed"
+            jinx_error_message = str(result.get('error'))
+    except Exception as e:
+        jinx_execution_status = "failed"
+        jinx_error_message = str(e)
+        output = f"Jinx execution failed: {e}"
+    # The jinx_executions table is populated by a trigger from conversation_history.
+    # The details of the execution (inputs, output, status) are now expected to be
+    # derived by analyzing the user's command and the subsequent assistant's response.
+    # No explicit update to jinx_executions is needed here.
+    # 2. Save assistant's response to conversation_history
+    assistant_response_message_id = generate_message_id() # ID for the assistant's response
+    save_conversation_message(
+        command_history,
+        conversation_id,
+        "assistant",
+        output, # The jinx output is the assistant's response for the test
+        wd=current_path,
+        model=None,
+        provider=None,
+        npc=None,
+        message_id=assistant_response_message_id
+    )
+    return jsonify({
+        "output": output,
+        "conversation_id": conversation_id,
+        "execution_id": user_message_id, # Return the user's message_id as the execution_id
+        "error": jinx_error_message
+    })
+from npcpy.ft.diff import train_diffusion, DiffusionConfig
+import threading
+from npcpy.memory.knowledge_graph import (
+    load_kg_from_db,
+    save_kg_to_db # ADD THIS LINE to import the correct function
+)
+from collections import defaultdict # ADD THIS LINE for collecting links if not already present
+finetune_jobs = {}
+def extract_and_store_memories(
+    conversation_text,
+    conversation_id,
+    command_history,
+    npc_name,
+    team_name,
+    current_path,
+    model,
+    provider,
+    npc_object=None
+):
+    from npcpy.llm_funcs import get_facts
+    from npcpy.memory.command_history import format_memory_context
+    # Your CommandHistory.get_memory_examples_for_context returns a dict with 'approved' and 'rejected'
+    memory_examples_dict = command_history.get_memory_examples_for_context(
+        npc=npc_name,
+        team=team_name,
+        directory_path=current_path
+    )
+    memory_context = format_memory_context(memory_examples_dict)
+    facts = get_facts(
+        conversation_text,
+        model=npc_object.model if npc_object else model,
+        provider=npc_object.provider if npc_object else provider,
+        npc=npc_object,
+        context=memory_context
+    )
+    memories_for_approval = []
+    # Initialize structures to collect KG data for a single save_kg_to_db call
+    kg_facts_to_save = []
+    kg_concepts_to_save = []
+    fact_to_concept_links_temp = defaultdict(list)
+    if facts:
+        for i, fact in enumerate(facts):
+            # Store memory in memory_lifecycle table
+            memory_id = command_history.add_memory_to_database(
+                message_id=f"{conversation_id}_{datetime.datetime.now().strftime('%H%M%S')}_{i}",
+                conversation_id=conversation_id,
+                npc=npc_name or "default",
+                team=team_name or "default",
+                directory_path=current_path or "/",
+                initial_memory=fact.get('statement', str(fact)),
+                status="pending_approval",
+                model=npc_object.model if npc_object else model,
+                provider=npc_object.provider if npc_object else provider,
+                final_memory=None # Explicitly None for pending memories
+            )
+            memories_for_approval.append({
+                "memory_id": memory_id,
+                "content": fact.get('statement', str(fact)),
+                "type": fact.get('type', 'unknown'),
+                "context": fact.get('source_text', ''),
+                "npc": npc_name or "default"
+            })
+            # Collect facts and concepts for the Knowledge Graph
+            #if fact.get('type') == 'concept':
+            #    kg_concepts_to_save.append({
+            #        "name": fact.get('statement'),
+            #        "generation": current_kg_generation,
+            #        "origin": "organic" # Assuming 'organic' for extracted facts
+            #    })
+            #else: # It's a fact (or unknown type, treat as fact for KG)
+            #    kg_facts_to_save.append({
+            #        "statement": fact.get('statement'),
+            #        "source_text": fact.get('source_text', conversation_text), # Use source_text if available, else conversation_text
+            #        "type": fact.get('type', 'fact'), # Default to 'fact' if type is unknown
+            #        "generation": current_kg_generation,
+            #        "origin": "organic"
+            #    })
+            #    if fact.get('concepts'): # If this fact has related concepts
+            #        for concept_name in fact.get('concepts'):
+            #            fact_to_concept_links_temp[fact.get('statement')].append(concept_name)
+    # After processing all facts, save them to the KG database in one go
+    if kg_facts_to_save or kg_concepts_to_save:
+        temp_kg_data = {
+            "facts": kg_facts_to_save,
+            "concepts": kg_concepts_to_save,
+            "generation": current_kg_generation,
+            "fact_to_concept_links": fact_to_concept_links_temp,
+            "concept_links": [], # Assuming no concept-to-concept links from direct extraction
+            "fact_to_fact_links": [] # Assuming no fact-to-fact links from direct extraction
+        }
+        # Get the SQLAlchemy engine using your existing helper function
+        db_engine = get_db_connection(app.config.get('DB_PATH'))
+        # Call the existing save_kg_to_db function
+        save_kg_to_db(
+            engine=db_engine,
+            kg_data=temp_kg_data,
+            team_name=team_name or "default",
+            npc_name=npc_name or "default",
+            directory_path=current_path or "/"
+        )
+    return memories_for_approval
+@app.route('/api/finetuned_models', methods=['GET'])
+def get_finetuned_models():
+    current_path = request.args.get("currentPath")
+    # Define a list of potential root directories where fine-tuned models might be saved.
+    # We'll be very generous here, including both 'models' and 'images' directories
+    # at both global and project levels, as the user's logs indicate saving to 'images'.
+    potential_root_paths = [
+        os.path.expanduser('~/.npcsh/models'),  # Standard global models directory
+        os.path.expanduser('~/.npcsh/images'),  # Global images directory (where user's model was saved)
+    ]
+    if current_path:
+        # Add project-specific model directories if a current_path is provided
+        project_models_path = os.path.join(current_path, 'models')
+        project_images_path = os.path.join(current_path, 'images') # Also check project images directory
+        potential_root_paths.extend([project_models_path, project_images_path])
+    finetuned_models = []
+    print(f"🌋 Searching for fine-tuned models in potential root paths: {set(potential_root_paths)}") # Use set for unique paths
+    for root_path in set(potential_root_paths): # Iterate through unique potential root paths
+        if not os.path.exists(root_path) or not os.path.isdir(root_path):
+            print(f"🌋 Skipping non-existent or non-directory root path: {root_path}")
+            continue
+        print(f"🌋 Scanning root path: {root_path}")
+        for model_dir_name in os.listdir(root_path):
+            full_model_path = os.path.join(root_path, model_dir_name)
+            if not os.path.isdir(full_model_path):
+                print(f"🌋 Skipping {full_model_path}: Not a directory.")
+                continue
+            # NEW STRATEGY: Check for user's specific output files
+            # Look for 'model_final.pt' or the 'checkpoints' directory
+            has_model_final_pt = os.path.exists(os.path.join(full_model_path, 'model_final.pt'))
+            has_checkpoints_dir = os.path.isdir(os.path.join(full_model_path, 'checkpoints'))
+            if has_model_final_pt or has_checkpoints_dir:
+                print(f"🌋 Identified fine-tuned model: {model_dir_name} at {full_model_path} (found model_final.pt or checkpoints dir)")
+                finetuned_models.append({
+                    "value": full_model_path, # This is the path to the directory containing the .pt files
+                    "provider": "diffusers",   # Provider is still "diffusers"
+                    "display_name": f"{model_dir_name} | Fine-tuned Diffuser"
+                })
+                continue # Move to the next model_dir_name found in this root_path
+            print(f"🌋 Skipping {full_model_path}: No model_final.pt or checkpoints directory found at root.")
+    print(f"🌋 Finished scanning. Found {len(finetuned_models)} fine-tuned models.")
+    return jsonify({"models": finetuned_models, "error": None})
+@app.route('/api/finetune_diffusers', methods=['POST'])
+def finetune_diffusers():
+    data = request.json
+    images = data.get('images', [])
+    captions = data.get('captions', [])
+    output_name = data.get('outputName', 'my_diffusion_model')
+    num_epochs = data.get('epochs', 100)
+    batch_size = data.get('batchSize', 4)
+    learning_rate = data.get('learningRate', 1e-4)
+    output_path = data.get('outputPath', '~/.npcsh/models')
+    print(f"🌋 Finetune Diffusers Request Received!")
+    print(f"  Images: {len(images)} files")
+    print(f"  Output Name: {output_name}")
+    print(f"  Epochs: {num_epochs}, Batch Size: {batch_size}, Learning Rate: {learning_rate}")
+    if not images:
+        print("🌋 Error: No images provided for finetuning.")
+        return jsonify({'error': 'No images provided'}), 400
+    if not captions or len(captions) != len(images):
+        print("🌋 Warning: Captions not provided or mismatching image count. Using empty captions.")
+        captions = [''] * len(images)
+    expanded_images = [os.path.expanduser(p) for p in images]
+    output_dir = os.path.expanduser(
+        os.path.join(output_path, output_name)
+    )
+    job_id = f"ft_{int(time.time())}"
+    finetune_jobs[job_id] = {
+        'status': 'running',
+        'output_dir': output_dir,
+        'epochs': num_epochs,
+        'current_epoch': 0,
+        'start_time': datetime.datetime.now().isoformat()
+    }
+    print(f"🌋 Finetuning job {job_id} initialized. Output directory: {output_dir}")
+    def run_training_async():
+        print(f"🌋 Finetuning job {job_id}: Starting asynchronous training thread...")
+        try:
+            config = DiffusionConfig(
+                num_epochs=num_epochs,
+                batch_size=batch_size,
+                learning_rate=learning_rate,
+                output_model_path=output_dir
+            )
+            print(f"🌋 Finetuning job {job_id}: Calling train_diffusion with config: {config}")
+            # Assuming train_diffusion might print its own progress or allow callbacks
+            # For more granular logging, you'd need to modify train_diffusion itself
+            model_path = train_diffusion(
+                expanded_images,
+                captions,
+                config=config
+            )
+            finetune_jobs[job_id]['status'] = 'complete'
+            finetune_jobs[job_id]['model_path'] = model_path
+            finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
+            print(f"🌋 Finetuning job {job_id}: Training complete! Model saved to: {model_path}")
+        except Exception as e:
+            finetune_jobs[job_id]['status'] = 'error'
+            finetune_jobs[job_id]['error_msg'] = str(e)
+            finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
+            print(f"🌋 Finetuning job {job_id}: ERROR during training: {e}")
+            traceback.print_exc()
+        print(f"🌋 Finetuning job {job_id}: Asynchronous training thread finished.")
+    # Start the training in a separate thread
+    thread = threading.Thread(target=run_training_async)
+    thread.daemon = True # Allow the main program to exit even if this thread is still running
+    thread.start()
+    print(f"🌋 Finetuning job {job_id} successfully launched in background. Returning initial status.")
+    return jsonify({
+        'status': 'started',
+        'jobId': job_id,
+        'message': f"Finetuning job '{job_id}' started. Check /api/finetune_status/{job_id} for updates."
+    })
+@app.route('/api/finetune_status/<job_id>', methods=['GET'])
+def finetune_status(job_id):
+    if job_id not in finetune_jobs:
+        return jsonify({'error': 'Job not found'}), 404
+    job = finetune_jobs[job_id]
+    if job['status'] == 'complete':
+        return jsonify({
+            'complete': True,
+            'outputPath': job.get('model_path', job['output_dir'])
+        })
+    elif job['status'] == 'error':
+        return jsonify({'error': job.get('error_msg', 'Unknown error')})
+    return jsonify({
+        'step': job.get('current_epoch', 0),
+        'total': job['epochs'],
+        'status': 'running'
+    })
+@app.route("/api/ml/train", methods=["POST"])
+def train_ml_model():
+    import pickle
+    import numpy as np
+    from sklearn.linear_model import LinearRegression, LogisticRegression
+    from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+    from sklearn.tree import DecisionTreeRegressor
+    from sklearn.cluster import KMeans
+    from sklearn.model_selection import train_test_split
+    from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
+    data = request.json
+    model_name = data.get("name")
+    model_type = data.get("type")
+    target = data.get("target")
+    features = data.get("features")
+    training_data = data.get("data")
+    hyperparams = data.get("hyperparameters", {})
+    df = pd.DataFrame(training_data)
+    X = df[features].values
+    metrics = {}
+    model = None
+    if model_type == "linear_regression":
+        y = df[target].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+        model = LinearRegression()
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+        metrics = {
+            "r2_score": r2_score(y_test, y_pred),
+            "rmse": np.sqrt(mean_squared_error(y_test, y_pred))
+        }
+    elif model_type == "logistic_regression":
+        y = df[target].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+        model = LogisticRegression(max_iter=1000)
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+        metrics = {"accuracy": accuracy_score(y_test, y_pred)}
+    elif model_type == "random_forest":
+        y = df[target].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+        model = RandomForestRegressor(n_estimators=100)
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+        metrics = {
+            "r2_score": r2_score(y_test, y_pred),
+            "rmse": np.sqrt(mean_squared_error(y_test, y_pred))
+        }
+    elif model_type == "clustering":
+        n_clusters = hyperparams.get("n_clusters", 3)
+        model = KMeans(n_clusters=n_clusters)
+        labels = model.fit_predict(X)
+        metrics = {"inertia": model.inertia_, "n_clusters": n_clusters}
+    elif model_type == "gradient_boost":
+        y = df[target].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+        model = GradientBoostingRegressor()
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+        metrics = {
+            "r2_score": r2_score(y_test, y_pred),
+            "rmse": np.sqrt(mean_squared_error(y_test, y_pred))
+        }
+    model_id = f"{model_name}_{int(time.time())}"
+    model_path = os.path.expanduser(f"~/.npcsh/models/{model_id}.pkl")
+    os.makedirs(os.path.dirname(model_path), exist_ok=True)
+    with open(model_path, 'wb') as f:
+        pickle.dump({
+            "model": model,
+            "features": features,
+            "target": target,
+            "type": model_type
+        }, f)
+    return jsonify({
+        "model_id": model_id,
+        "metrics": metrics,
+        "error": None
+    })
+@app.route("/api/ml/predict", methods=["POST"])
+def ml_predict():
+    import pickle
+    data = request.json
+    model_name = data.get("model_name")
+    input_data = data.get("input_data")
+    model_dir = os.path.expanduser("~/.npcsh/models/")
+    model_files = [f for f in os.listdir(model_dir) if f.startswith(model_name)]
+    if not model_files:
+        return jsonify({"error": f"Model {model_name} not found"})
+    model_path = os.path.join(model_dir, model_files[0])
+    with open(model_path, 'rb') as f:
+        model_data = pickle.load(f)
+    model = model_data["model"]
+    prediction = model.predict([input_data])
+    return jsonify({
+        "prediction": prediction.tolist(),
+        "error": None
+    })
+@app.route("/api/jinx/executions/label", methods=["POST"])
+def label_jinx_execution():
+    data = request.json
+    execution_id = data.get("executionId")
+    label = data.get("label")
+    command_history = CommandHistory(app.config.get('DB_PATH'))
+    command_history.label_jinx_execution(execution_id, label)
+    return jsonify({"success": True, "error": None})
+@app.route("/api/npc/executions", methods=["GET"])
+def get_npc_executions():
+    npc_name = request.args.get("npcName")
+    command_history = CommandHistory(app.config.get('DB_PATH'))
+    executions = command_history.get_npc_executions(npc_name)
+    return jsonify({"executions": executions, "error": None})
+@app.route("/api/npc/executions/label", methods=["POST"])
+def label_npc_execution():
+    data = request.json
+    execution_id = data.get("executionId")
+    label = data.get("label")
+    command_history = CommandHistory(app.config.get('DB_PATH'))
+    command_history.label_npc_execution(execution_id, label)
+    return jsonify({"success": True, "error": None})
+@app.route("/api/training/dataset", methods=["POST"])
+def build_training_dataset():
+    data = request.json
+    filters = data.get("filters", {})
+    command_history = CommandHistory(app.config.get('DB_PATH'))
+    dataset = command_history.get_training_dataset(
+        include_jinxs=filters.get("jinxs", True),
+        include_npcs=filters.get("npcs", True),
+        npc_names=filters.get("npc_names")
+    )
+    return jsonify({
+        "dataset": dataset,
+        "count": len(dataset),
+        "error": None
+    })
 @app.route("/api/save_npc", methods=["POST"])
 def save_npc():
     try:
@@ -1092,137 +1842,147 @@ use_global_jinxs: {str(npc_data.get('use_global_jinxs', True)).lower()}
         print(f"Error saving NPC: {str(e)}")
         return jsonify({"error": str(e)}), 500
-@app.route("/api/npc_team_global")
-def get_npc_team_global():
-    try:
-        db_conn = get_db_connection()
-        global_npc_directory = os.path.expanduser("~/.npcsh/npc_team")
-        npc_data = []
+@app.route("/api/jinxs/global")
+def get_jinxs_global():
+    global_jinx_directory = os.path.expanduser("~/.npcsh/npc_team/jinxs")
+    jinx_data = []
-        # Ensure the directory exists before listing
-        if not os.path.exists(global_npc_directory):
-            print(f"Global NPC directory not found: {global_npc_directory}", file=sys.stderr)
-            return jsonify({"npcs": [], "error": f"Global NPC directory not found: {global_npc_directory}"})
+    if not os.path.exists(global_jinx_directory):
+        return jsonify({"jinxs": [], "error": None})
-        for file in os.listdir(global_npc_directory):
-            if file.endswith(".npc"):
-                npc_path = os.path.join(global_npc_directory, file)
-                try:
-                    npc = NPC(file=npc_path, db_conn=db_conn)
-                    # Ensure jinxs are initialized after NPC creation if not already
-                    # This is crucial for populating npc.jinxs_dict
-                    if not npc.jinxs_dict and hasattr(npc, 'initialize_jinxs'):
-                        npc.initialize_jinxs()
-                    serialized_npc = {
-                        "name": npc.name,
-                        "primary_directive": npc.primary_directive,
-                        "model": npc.model,
-                        "provider": npc.provider,
-                        "api_url": npc.api_url,
-                        "use_global_jinxs": npc.use_global_jinxs,
-                        # CRITICAL FIX: Iterate over npc.jinxs_dict.values() which contains Jinx objects
-                        "jinxs": [
-                            {
-                                "jinx_name": jinx.jinx_name,
-                                "inputs": jinx.inputs,
-                                "steps": [
-                                    {
-                                        "name": step.get("name", f"step_{i}"),
-                                        "engine": step.get("engine", "natural"),
-                                        "code": step.get("code", "")
-                                    }
-                                    for i, step in enumerate(jinx.steps)
-                                ]
-                            }
-                            for jinx in npc.jinxs_dict.values() # Use jinxs_dict here
-                        ] if hasattr(npc, 'jinxs_dict') else [], # Defensive check
-                    }
-                    npc_data.append(serialized_npc)
-                except Exception as e:
-                    print(f"Error loading or serializing NPC {file}: {str(e)}", file=sys.stderr)
-                    traceback.print_exc(file=sys.stderr)
+    for root, dirs, files in os.walk(global_jinx_directory):
+        for file in files:
+            if file.endswith(".jinx"):
+                jinx_path = os.path.join(root, file)
+                with open(jinx_path, 'r') as f:
+                    raw_data = yaml.safe_load(f)
+                inputs = []
+                for inp in raw_data.get("inputs", []):
+                    if isinstance(inp, str):
+                        inputs.append(inp)
+                    elif isinstance(inp, dict):
+                        inputs.append(list(inp.keys())[0])
+                    else:
+                        inputs.append(str(inp))
+                rel_path = os.path.relpath(jinx_path, global_jinx_directory)
+                path_without_ext = rel_path[:-5]
+                jinx_data.append({
+                    "jinx_name": raw_data.get("jinx_name", file[:-5]),
+                    "path": path_without_ext,
+                    "description": raw_data.get("description", ""),
+                    "inputs": inputs,
+                    "steps": raw_data.get("steps", [])
+                })
+    return jsonify({"jinxs": jinx_data, "error": None})
-        return jsonify({"npcs": npc_data, "error": None})
+@app.route("/api/jinxs/project", methods=["GET"])
+def get_jinxs_project():
+    project_dir = request.args.get("currentPath")
+    if not project_dir:
+        return jsonify({"jinxs": [], "error": "currentPath required"}), 400
-    except Exception as e:
-        print(f"Error fetching global NPC team: {str(e)}", file=sys.stderr)
-        traceback.print_exc(file=sys.stderr)
-        return jsonify({"npcs": [], "error": str(e)})
+    if not project_dir.endswith("jinxs"):
+        project_dir = os.path.join(project_dir, "jinxs")
+    jinx_data = []
+    if not os.path.exists(project_dir):
+        return jsonify({"jinxs": [], "error": None})
-@app.route("/api/npc_team_project", methods=["GET"])
-def get_npc_team_project():
-    try:
-        db_conn = get_db_connection()
+    for root, dirs, files in os.walk(project_dir):
+        for file in files:
+            if file.endswith(".jinx"):
+                jinx_path = os.path.join(root, file)
+                with open(jinx_path, 'r') as f:
+                    raw_data = yaml.safe_load(f)
+                inputs = []
+                for inp in raw_data.get("inputs", []):
+                    if isinstance(inp, str):
+                        inputs.append(inp)
+                    elif isinstance(inp, dict):
+                        inputs.append(list(inp.keys())[0])
+                    else:
+                        inputs.append(str(inp))
+                rel_path = os.path.relpath(jinx_path, project_dir)
+                path_without_ext = rel_path[:-5]
+                jinx_data.append({
+                    "jinx_name": raw_data.get("jinx_name", file[:-5]),
+                    "path": path_without_ext,
+                    "description": raw_data.get("description", ""),
+                    "inputs": inputs,
+                    "steps": raw_data.get("steps", [])
+                })
+    print(jinx_data)
+    return jsonify({"jinxs": jinx_data, "error": None})
-        project_npc_directory = request.args.get("currentPath")
-        if not project_npc_directory:
-            return jsonify({"npcs": [], "error": "currentPath is required for project NPCs"}), 400
+@app.route("/api/npc_team_global")
+def get_npc_team_global():
+    global_npc_directory = os.path.expanduser("~/.npcsh/npc_team")
+    npc_data = []
-        if not project_npc_directory.endswith("npc_team"):
-            project_npc_directory = os.path.join(project_npc_directory, "npc_team")
+    if not os.path.exists(global_npc_directory):
+        return jsonify({"npcs": [], "error": None})
-        npc_data = []
+    for file in os.listdir(global_npc_directory):
+        if file.endswith(".npc"):
+            npc_path = os.path.join(global_npc_directory, file)
+            with open(npc_path, 'r') as f:
+                raw_data = yaml.safe_load(f)
+            npc_data.append({
+                "name": raw_data.get("name", file[:-4]),
+                "primary_directive": raw_data.get("primary_directive", ""),
+                "model": raw_data.get("model", ""),
+                "provider": raw_data.get("provider", ""),
+                "api_url": raw_data.get("api_url", ""),
+                "use_global_jinxs": raw_data.get("use_global_jinxs", True),
+                "jinxs": raw_data.get("jinxs", "*"),
+            })
-        # Ensure the directory exists before listing
-        if not os.path.exists(project_npc_directory):
-            print(f"Project NPC directory not found: {project_npc_directory}", file=sys.stderr)
-            return jsonify({"npcs": [], "error": f"Project NPC directory not found: {project_npc_directory}"})
+    return jsonify({"npcs": npc_data, "error": None})
-        for file in os.listdir(project_npc_directory):
-            print(f"Processing project NPC file: {file}", file=sys.stderr) # Diagnostic print
-            if file.endswith(".npc"):
-                npc_path = os.path.join(project_npc_directory, file)
-                try:
-                    npc = NPC(file=npc_path, db_conn=db_conn)
-                    # Ensure jinxs are initialized after NPC creation if not already
-                    # This is crucial for populating npc.jinxs_dict
-                    if not npc.jinxs_dict and hasattr(npc, 'initialize_jinxs'):
-                        npc.initialize_jinxs()
-                    serialized_npc = {
-                        "name": npc.name,
-                        "primary_directive": npc.primary_directive,
-                        "model": npc.model,
-                        "provider": npc.provider,
-                        "api_url": npc.api_url,
-                        "use_global_jinxs": npc.use_global_jinxs,
-                        # CRITICAL FIX: Iterate over npc.jinxs_dict.values() which contains Jinx objects
-                        "jinxs": [
-                            {
-                                "jinx_name": jinx.jinx_name,
-                                "inputs": jinx.inputs,
-                                "steps": [
-                                    {
-                                        "name": step.get("name", f"step_{i}"),
-                                        "engine": step.get("engine", "natural"),
-                                        "code": step.get("code", "")
-                                    }
-                                    for i, step in enumerate(jinx.steps)
-                                ]
-                            }
-                            for jinx in npc.jinxs_dict.values() # Use jinxs_dict here
-                        ] if hasattr(npc, 'jinxs_dict') else [], # Defensive check
-                    }
-                    npc_data.append(serialized_npc)
-                except Exception as e:
-                    print(f"Error loading or serializing NPC {file}: {str(e)}", file=sys.stderr)
-                    traceback.print_exc(file=sys.stderr)
+@app.route("/api/npc_team_project", methods=["GET"])
+def get_npc_team_project():
+    project_npc_directory = request.args.get("currentPath")
+    if not project_npc_directory:
+        return jsonify({"npcs": [], "error": "currentPath required"}), 400
+    if not project_npc_directory.endswith("npc_team"):
+        project_npc_directory = os.path.join(
+            project_npc_directory,
+            "npc_team"
+        )
-        print(f"Project NPC data: {npc_data}", file=sys.stderr) # Diagnostic print
-        return jsonify({"npcs": npc_data, "error": None})
+    npc_data = []
-    except Exception as e:
-        print(f"Error fetching NPC team: {str(e)}", file=sys.stderr)
-        traceback.print_exc(file=sys.stderr)
-        return jsonify({"npcs": [], "error": str(e)})
+    if not os.path.exists(project_npc_directory):
+        return jsonify({"npcs": [], "error": None})
+    for file in os.listdir(project_npc_directory):
+        if file.endswith(".npc"):
+            npc_path = os.path.join(project_npc_directory, file)
+            with open(npc_path, 'r') as f:
+                raw_npc_data = yaml.safe_load(f)
+            serialized_npc = {
+                "name": raw_npc_data.get("name", file[:-4]),
+                "primary_directive": raw_npc_data.get("primary_directive", ""),
+                "model": raw_npc_data.get("model", ""),
+                "provider": raw_npc_data.get("provider", ""),
+                "api_url": raw_npc_data.get("api_url", ""),
+                "use_global_jinxs": raw_npc_data.get("use_global_jinxs", True),
+                "jinxs": raw_npc_data.get("jinxs", "*"),
+            }
+            npc_data.append(serialized_npc)
+    return jsonify({"npcs": npc_data, "error": None})
 def get_last_used_model_and_npc_in_directory(directory_path):
     """
@@ -1542,11 +2302,62 @@ IMAGE_MODELS = {
         {"value": "runwayml/stable-diffusion-v1-5", "display_name": "Stable Diffusion v1.5"},
     ],
 }
+# In npcpy/serve.py, find the @app.route('/api/finetuned_models', methods=['GET'])
+# and replace the entire function with this:
+# This is now an internal helper function, not a Flask route.
+def _get_finetuned_models_internal(current_path=None): # Renamed to indicate internal use
+    # Define a list of potential root directories where fine-tuned models might be saved.
+    potential_root_paths = [
+        os.path.expanduser('~/.npcsh/models'),  # Standard global models directory
+        os.path.expanduser('~/.npcsh/images'),  # Global images directory (where user's model was saved)
+    ]
+    if current_path:
+        # Add project-specific model directories if a current_path is provided
+        project_models_path = os.path.join(current_path, 'models')
+        project_images_path = os.path.join(current_path, 'images') # Also check project images directory
+        potential_root_paths.extend([project_models_path, project_images_path])
+    finetuned_models = []
+    print(f"🌋 (Internal) Searching for fine-tuned models in potential root paths: {set(potential_root_paths)}")
+    for root_path in set(potential_root_paths):
+        if not os.path.exists(root_path) or not os.path.isdir(root_path):
+            print(f"🌋 (Internal) Skipping non-existent or non-directory root path: {root_path}")
+            continue
+        print(f"🌋 (Internal) Scanning root path: {root_path}")
+        for model_dir_name in os.listdir(root_path):
+            full_model_path = os.path.join(root_path, model_dir_name)
+            if not os.path.isdir(full_model_path):
+                print(f"🌋 (Internal) Skipping {full_model_path}: Not a directory.")
+                continue
+            # Check for 'model_final.pt' or the 'checkpoints' directory
+            has_model_final_pt = os.path.exists(os.path.join(full_model_path, 'model_final.pt'))
+            has_checkpoints_dir = os.path.isdir(os.path.join(full_model_path, 'checkpoints'))
+            if has_model_final_pt or has_checkpoints_dir:
+                print(f"🌋 (Internal) Identified fine-tuned model: {model_dir_name} at {full_model_path} (found model_final.pt or checkpoints dir)")
+                finetuned_models.append({
+                    "value": full_model_path, # This is the path to the directory containing the .pt files
+                    "provider": "diffusers",   # Provider is still "diffusers"
+                    "display_name": f"{model_dir_name} | Fine-tuned Diffuser"
+                })
+                continue
+            print(f"🌋 (Internal) Skipping {full_model_path}: No model_final.pt or checkpoints directory found at root.")
+    print(f"🌋 (Internal) Finished scanning. Found {len(finetuned_models)} fine-tuned models.")
+    # <--- CRITICAL FIX: Directly return the list of models, not a Flask Response
+    return {"models": finetuned_models, "error": None} # Return a dict for consistency
 def get_available_image_models(current_path=None):
     """
     Retrieves available image generation models based on environment variables
-    and predefined configurations.
+    and predefined configurations, including locally fine-tuned Diffusers models.
     """
     if current_path:
@@ -1554,7 +2365,7 @@ def get_available_image_models(current_path=None):
     all_image_models = []
+    # Add models configured via environment variables
     env_image_model = os.getenv("NPCSH_IMAGE_MODEL")
     env_image_provider = os.getenv("NPCSH_IMAGE_PROVIDER")
@@ -1565,9 +2376,8 @@ def get_available_image_models(current_path=None):
             "display_name": f"{env_image_model} | {env_image_provider} (Configured)"
         })
+    # Add predefined models (OpenAI, Gemini, and standard Diffusers)
     for provider_key, models_list in IMAGE_MODELS.items():
         if provider_key == "openai":
             if os.environ.get("OPENAI_API_KEY"):
                 all_image_models.extend([
@@ -1580,16 +2390,25 @@ def get_available_image_models(current_path=None):
                     {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
                     for model in models_list
                 ])
-        elif provider_key == "diffusers":
+        elif provider_key == "diffusers": # This entry in IMAGE_MODELS is for standard diffusers
             all_image_models.extend([
                 {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
                 for model in models_list
             ])
+    # <--- CRITICAL FIX: Directly call the internal helper function for fine-tuned models
+    try:
+        finetuned_data_result = _get_finetuned_models_internal(current_path)
+        if finetuned_data_result and finetuned_data_result.get("models"):
+            all_image_models.extend(finetuned_data_result["models"])
+        else:
+            print(f"No fine-tuned models returned by internal helper or an error occurred internally.")
+            if finetuned_data_result.get("error"):
+                print(f"Internal error in _get_finetuned_models_internal: {finetuned_data_result['error']}")
+    except Exception as e:
+        print(f"Error calling _get_finetuned_models_internal: {e}")
+    # Deduplicate models
     seen_models = set()
     unique_models = []
     for model_entry in all_image_models:
@@ -1598,6 +2417,7 @@ def get_available_image_models(current_path=None):
             seen_models.add(key)
             unique_models.append(model_entry)
+    # Return the combined, deduplicated list of models as a dictionary with a 'models' key
     return unique_models
 @app.route('/api/generative_fill', methods=['POST'])
@@ -1824,11 +2644,13 @@ def generate_images():
                     if os.path.exists(image_path):
                         try:
                             pil_img = Image.open(image_path)
+                            pil_img = pil_img.convert("RGB")
+                            pil_img.thumbnail((1024, 1024))
                             input_images.append(pil_img)
-                            with open(image_path, 'rb') as f:
-                                img_data = f.read()
+                            compressed_bytes = BytesIO()
+                            pil_img.save(compressed_bytes, format="JPEG", quality=85, optimize=True)
+                            img_data = compressed_bytes.getvalue()
                             attachments_loaded.append({
                                 "name": os.path.basename(image_path),
                                 "type": "images",
@@ -1932,20 +2754,31 @@ def get_mcp_tools():
     It will try to use an existing client from corca_states if available and matching,
     otherwise it creates a temporary client.
     """
-    server_path = request.args.get("mcpServerPath")
+    raw_server_path = request.args.get("mcpServerPath")
+    current_path_arg = request.args.get("currentPath")
     conversation_id = request.args.get("conversationId")
     npc_name = request.args.get("npc")
+    selected_filter = request.args.get("selected", "")
+    selected_names = [s.strip() for s in selected_filter.split(",") if s.strip()]
-    if not server_path:
+    if not raw_server_path:
         return jsonify({"error": "mcpServerPath parameter is required."}), 400
+    # Normalize/expand the provided path so cwd/tilde don't break imports
+    resolved_path = resolve_mcp_server_path(
+        current_path=current_path_arg,
+        explicit_path=raw_server_path,
+        force_global=False
+    )
+    server_path = os.path.abspath(os.path.expanduser(resolved_path))
     try:
         from npcsh.corca import MCPClientNPC
     except ImportError:
         return jsonify({"error": "MCP Client (npcsh.corca) not available. Ensure npcsh.corca is installed and importable."}), 500
     temp_mcp_client = None
+    jinx_tools = []
     try:
         if conversation_id and npc_name and hasattr(app, 'corca_states'):
@@ -1956,13 +2789,38 @@ def get_mcp_tools():
                    and existing_corca_state.mcp_client.server_script_path == server_path:
                     print(f"Using existing MCP client for {state_key} to fetch tools.")
                     temp_mcp_client = existing_corca_state.mcp_client
-                    return jsonify({"tools": temp_mcp_client.available_tools_llm, "error": None})
+                    tools = temp_mcp_client.available_tools_llm
+                    if selected_names:
+                        tools = [t for t in tools if t.get("function", {}).get("name") in selected_names]
+                    return jsonify({"tools": tools, "error": None})
         print(f"Creating a temporary MCP client to fetch tools for {server_path}.")
         temp_mcp_client = MCPClientNPC()
         if temp_mcp_client.connect_sync(server_path):
-            return jsonify({"tools": temp_mcp_client.available_tools_llm, "error": None})
+            tools = temp_mcp_client.available_tools_llm
+            # Append Jinx-derived tools discovered from global/project jinxs
+            try:
+                jinx_dirs = []
+                if current_path_arg:
+                    proj_jinx_dir = os.path.join(os.path.abspath(current_path_arg), "npc_team", "jinxs")
+                    if os.path.isdir(proj_jinx_dir):
+                        jinx_dirs.append(proj_jinx_dir)
+                global_jinx_dir = os.path.expanduser("~/.npcsh/npc_team/jinxs")
+                if os.path.isdir(global_jinx_dir):
+                    jinx_dirs.append(global_jinx_dir)
+                all_jinxs = []
+                for d in jinx_dirs:
+                    all_jinxs.extend(load_jinxs_from_directory(d))
+                if all_jinxs:
+                    jinx_tools = list(build_jinx_tool_catalog({j.jinx_name: j for j in all_jinxs}).values())
+                    print(f"[MCP] Discovered {len(jinx_tools)} Jinx tools for listing.")
+                    tools = tools + jinx_tools
+            except Exception as e:
+                print(f"[MCP] Error discovering Jinx tools for listing: {e}")
+            if selected_names:
+                tools = [t for t in tools if t.get("function", {}).get("name") in selected_names]
+            return jsonify({"tools": tools, "error": None})
         else:
             return jsonify({"error": f"Failed to connect to MCP server at {server_path}."}), 500
     except FileNotFoundError as e:
@@ -1981,6 +2839,64 @@ def get_mcp_tools():
             temp_mcp_client.disconnect_sync()
+@app.route("/api/mcp/server/resolve", methods=["GET"])
+def api_mcp_resolve():
+    current_path = request.args.get("currentPath")
+    explicit = request.args.get("serverPath")
+    try:
+        resolved = resolve_mcp_server_path(current_path=current_path, explicit_path=explicit)
+        return jsonify({"serverPath": resolved, "error": None})
+    except Exception as e:
+        return jsonify({"serverPath": None, "error": str(e)}), 500
+@app.route("/api/mcp/server/start", methods=["POST"])
+def api_mcp_start():
+    data = request.get_json() or {}
+    current_path = data.get("currentPath")
+    explicit = data.get("serverPath")
+    try:
+        server_path = resolve_mcp_server_path(current_path=current_path, explicit_path=explicit)
+        result = mcp_server_manager.start(server_path)
+        return jsonify({**result, "error": None})
+    except Exception as e:
+        print(f"Error starting MCP server: {e}")
+        traceback.print_exc()
+        return jsonify({"error": str(e)}), 500
+@app.route("/api/mcp/server/stop", methods=["POST"])
+def api_mcp_stop():
+    data = request.get_json() or {}
+    explicit = data.get("serverPath")
+    if not explicit:
+        return jsonify({"error": "serverPath is required to stop a server."}), 400
+    try:
+        result = mcp_server_manager.stop(explicit)
+        return jsonify({**result, "error": None})
+    except Exception as e:
+        print(f"Error stopping MCP server: {e}")
+        traceback.print_exc()
+        return jsonify({"error": str(e)}), 500
+@app.route("/api/mcp/server/status", methods=["GET"])
+def api_mcp_status():
+    explicit = request.args.get("serverPath")
+    current_path = request.args.get("currentPath")
+    try:
+        if explicit:
+            result = mcp_server_manager.status(explicit)
+        else:
+            resolved = resolve_mcp_server_path(current_path=current_path, explicit_path=explicit)
+            result = mcp_server_manager.status(resolved)
+        return jsonify({**result, "running": result.get("status") == "running", "all": mcp_server_manager.running(), "error": None})
+    except Exception as e:
+        print(f"Error checking MCP server status: {e}")
+        traceback.print_exc()
+        return jsonify({"error": str(e)}), 500
 @app.route("/api/image_models", methods=["GET"])
 def get_image_models_api():
     """
@@ -1989,6 +2905,7 @@ def get_image_models_api():
     current_path = request.args.get("currentPath")
     try:
         image_models = get_available_image_models(current_path)
+        print('image models', image_models)
         return jsonify({"models": image_models, "error": None})
     except Exception as e:
         print(f"Error getting available image models: {str(e)}")
@@ -2000,6 +2917,195 @@ def get_image_models_api():
+def _run_stream_post_processing(
+    conversation_turn_text,
+    conversation_id,
+    command_history,
+    npc_name,
+    team_name,
+    current_path,
+    model,
+    provider,
+    npc_object,
+    messages # For context compression
+):
+    """
+    Runs memory extraction and context compression in a background thread.
+    These operations will not block the main stream.
+    """
+    print(f"🌋 Background task started for conversation {conversation_id}!")
+    # Memory extraction and KG fact insertion
+    try:
+        if len(conversation_turn_text) > 50: # Only extract memories if the turn is substantial
+            memories_for_approval = extract_and_store_memories(
+                conversation_turn_text,
+                conversation_id,
+                command_history,
+                npc_name,
+                team_name,
+                current_path,
+                model,
+                provider,
+                npc_object
+            )
+            if memories_for_approval:
+                print(f"🔥 Background: Extracted {len(memories_for_approval)} memories for approval for conversation {conversation_id}. Stored as pending in the database (table: memory_lifecycle).")
+        else:
+            print(f"Background: Conversation turn too short ({len(conversation_turn_text)} chars) for memory extraction. Skipping.")
+    except Exception as e:
+        print(f"🌋 Background: Error during memory extraction and KG insertion for conversation {conversation_id}: {e}")
+        traceback.print_exc()
+    # Context compression using breathe from llm_funcs
+    try:
+        if len(messages) > 30: # Use the threshold specified in your request
+            # Directly call breathe for summarization
+            breathe_result = breathe(
+                messages=messages,
+                model=model,
+                provider=provider,
+                npc=npc_object # Pass npc for context if available
+            )
+            compressed_output = breathe_result.get('output', '')
+            if compressed_output:
+                # Save the compressed context as a new system message in conversation_history
+                compressed_message_id = generate_message_id()
+                save_conversation_message(
+                    command_history,
+                    conversation_id,
+                    "system", # Role for compressed context
+                    f"[AUTOMATIC CONTEXT COMPRESSION]: {compressed_output}",
+                    wd=current_path,
+                    model=model, # Use the same model/provider that generated the summary
+                    provider=provider,
+                    npc=npc_name, # Associate with the NPC
+                    team=team_name, # Associate with the team
+                    message_id=compressed_message_id
+                )
+                print(f"💨 Background: Compressed context for conversation {conversation_id} saved as new system message: {compressed_output[:100]}...")
+            else:
+                print(f"Background: Context compression returned no output for conversation {conversation_id}. Skipping saving.")
+        else:
+            print(f"Background: Conversation messages count ({len(messages)}) below threshold for context compression. Skipping.")
+    except Exception as e:
+        print(f"🌋 Background: Error during context compression with breathe for conversation {conversation_id}: {e}")
+        traceback.print_exc()
+    print(f"🌋 Background task finished for conversation {conversation_id}!")
+@app.route("/api/text_predict", methods=["POST"])
+def text_predict():
+    data = request.json
+    stream_id = data.get("streamId")
+    if not stream_id:
+        stream_id = str(uuid.uuid4())
+    with cancellation_lock:
+        cancellation_flags[stream_id] = False
+    print(f"Starting text prediction stream with ID: {stream_id}")
+    print('data')
+    text_content = data.get("text_content", "")
+    cursor_position = data.get("cursor_position", len(text_content))
+    current_path = data.get("currentPath")
+    model = data.get("model")
+    provider = data.get("provider")
+    context_type = data.get("context_type", "general") # e.g., 'code', 'chat', 'general'
+    file_path = data.get("file_path") # Optional: for code context
+    if current_path:
+        load_project_env(current_path)
+    text_before_cursor = text_content[:cursor_position]
+    if context_type == 'code':
+        prompt_for_llm = f"You are an AI code completion assistant. Your task is to complete the provided code snippet.\nYou MUST ONLY output the code that directly completes the snippet.\nDO NOT include any explanations, comments, or additional text.\nDO NOT wrap the completion in markdown code blocks.\n\nHere is the code context where the completion should occur (file: {file_path or 'unknown'}):\n\n{text_before_cursor}\n\nPlease provide the completion starting from the end of the last line shown.\n"
+        system_prompt = "You are an AI code completion assistant. Only provide code. Do not add explanations or any other text."
+    elif context_type == 'chat':
+        prompt_for_llm = f"You are an AI chat assistant. Your task is to provide a natural and helpful completion to the user's ongoing message.\nYou MUST ONLY output the text that directly completes the message.\nDO NOT include any explanations or additional text.\n\nHere is the message context where the completion should occur:\n\n{text_before_cursor}\n\nPlease provide the completion starting from the end of the last line shown.\n"
+        system_prompt = "You are an AI chat assistant. Only provide natural language completion. Do not add explanations or any other text."
+    else: # general text prediction
+        prompt_for_llm = f"You are an AI text completion assistant. Your task is to provide a natural and helpful completion to the user's ongoing text.\nYou MUST ONLY output the text that directly completes the snippet.\nDO NOT include any explanations or additional text.\n\nHere is the text context where the completion should occur:\n\n{text_before_cursor}\n\nPlease provide the completion starting from the end of the last line shown.\n"
+        system_prompt = "You are an AI text completion assistant. Only provide natural language completion. Do not add explanations or any other text."
+    npc_object = None # For prediction, we don't necessarily use a specific NPC
+    messages_for_llm = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt_for_llm}
+    ]
+    def event_stream_text_predict(current_stream_id):
+        complete_prediction = []
+        try:
+            stream_response_generator = get_llm_response(
+                prompt_for_llm,
+                messages=messages_for_llm,
+                model=model,
+                provider=provider,
+                npc=npc_object,
+                stream=True,
+            )
+            # get_llm_response returns a dict with 'response' as a generator when stream=True
+            if isinstance(stream_response_generator, dict) and 'response' in stream_response_generator:
+                stream_generator = stream_response_generator['response']
+            else:
+                # Fallback for non-streaming LLM responses or errors
+                output_content = ""
+                if isinstance(stream_response_generator, dict) and 'output' in stream_response_generator:
+                    output_content = stream_response_generator['output']
+                elif isinstance(stream_response_generator, str):
+                    output_content = stream_response_generator
+                yield f"data: {json.dumps({'choices': [{'delta': {'content': output_content}}]})}\n\n"
+                yield f"data: [DONE]\n\n"
+                return
+            for response_chunk in stream_generator:
+                with cancellation_lock:
+                    if cancellation_flags.get(current_stream_id, False):
+                        print(f"Cancellation flag triggered for {current_stream_id}. Breaking loop.")
+                        break
+                chunk_content = ""
+                # Handle different LLM API response formats
+                if "hf.co" in model or (provider == 'ollama' and 'gpt-oss' not in model): # Heuristic for Ollama/HF models
+                    chunk_content = response_chunk["message"]["content"] if "message" in response_chunk and "content" in response_chunk["message"] else ""
+                else: # Assume OpenAI-like streaming format
+                    chunk_content = "".join(choice.delta.content for choice in response_chunk.choices if choice.delta.content is not None)
+                print(chunk_content, end='')
+                if chunk_content:
+                    complete_prediction.append(chunk_content)
+                    yield f"data: {json.dumps({'choices': [{'delta': {'content': chunk_content}}]})}\n\n"
+        except Exception as e:
+            print(f"\nAn exception occurred during text prediction streaming for {current_stream_id}: {e}")
+            traceback.print_exc()
+            yield f"data: {json.dumps({'error': str(e)})}\n\n"
+        finally:
+            print(f"\nText prediction stream {current_stream_id} finished.")
+            yield f"data: [DONE]\n\n" # Signal end of stream
+            with cancellation_lock:
+                if current_stream_id in cancellation_flags:
+                    del cancellation_flags[current_stream_id]
+                    print(f"Cleaned up cancellation flag for stream ID: {current_stream_id}")
+    return Response(event_stream_text_predict(stream_id), mimetype="text/event-stream")
 @app.route("/api/stream", methods=["POST"])
 def stream():
@@ -2016,6 +3122,8 @@ def stream():
     commandstr = data.get("commandstr")
     conversation_id = data.get("conversationId")
+    if not conversation_id:
+        return jsonify({"error": "conversationId is required"}), 400
     model = data.get("model", None)
     provider = data.get("provider", None)
     if provider is None:
@@ -2033,6 +3141,7 @@ def stream():
     npc_object = None
     team_object = None
     team = None
+    tool_results_for_db = []
     if npc_name:
         if hasattr(app, 'registered_teams'):
             for team_name, team_object in app.registered_teams.items():
@@ -2195,7 +3304,9 @@ def stream():
         if 'tools' in tool_args and tool_args['tools']:
             tool_args['tool_choice'] = {"type": "auto"}
+    # Default stream response so closures below always have a value
+    stream_response = {"output": "", "messages": messages}
     exe_mode = data.get('executionMode','chat')
     if exe_mode == 'chat':
@@ -2269,91 +3380,260 @@ def stream():
         )
         messages = state.messages
-    elif exe_mode == 'corca':
-        try:
-            from npcsh.corca import execute_command_corca, create_corca_state_and_mcp_client, MCPClientNPC
-            from npcsh._state import initial_state as state
-        except ImportError:
-            print("ERROR: npcsh.corca or MCPClientNPC not found. Corca mode is disabled.", file=sys.stderr)
-            state = None
-            stream_response = {"output": "Corca mode is not available due to missing dependencies.", "messages": messages}
-        if state is not None:
-            mcp_server_path_from_request = data.get("mcpServerPath")
-            selected_mcp_tools_from_request = data.get("selectedMcpTools", [])
-            effective_mcp_server_path = mcp_server_path_from_request
-            if not effective_mcp_server_path and team_object and hasattr(team_object, 'team_ctx') and team_object.team_ctx:
-                mcp_servers_list = team_object.team_ctx.get('mcp_servers', [])
-                if mcp_servers_list and isinstance(mcp_servers_list, list):
-                    first_server_obj = next((s for s in mcp_servers_list if isinstance(s, dict) and 'value' in s), None)
-                    if first_server_obj:
-                        effective_mcp_server_path = first_server_obj['value']
-                elif isinstance(team_object.team_ctx.get('mcp_server'), str):
-                    effective_mcp_server_path = team_object.team_ctx.get('mcp_server')
+    elif exe_mode == 'tool_agent':
+        mcp_server_path_from_request = data.get("mcpServerPath")
+        selected_mcp_tools_from_request = data.get("selectedMcpTools", [])
+        # Resolve MCP server path (explicit -> team ctx -> default resolver)
+        effective_mcp_server_path = mcp_server_path_from_request
+        if not effective_mcp_server_path and team_object and hasattr(team_object, 'team_ctx') and team_object.team_ctx:
+            mcp_servers_list = team_object.team_ctx.get('mcp_servers', [])
+            if mcp_servers_list and isinstance(mcp_servers_list, list):
+                first_server_obj = next((s for s in mcp_servers_list if isinstance(s, dict) and 'value' in s), None)
+                if first_server_obj:
+                    effective_mcp_server_path = first_server_obj['value']
+            elif isinstance(team_object.team_ctx.get('mcp_server'), str):
+                effective_mcp_server_path = team_object.team_ctx.get('mcp_server')
+        effective_mcp_server_path = resolve_mcp_server_path(
+            current_path=current_path,
+            explicit_path=effective_mcp_server_path,
+            force_global=False
+        )
+        print(f"[MCP] effective server path: {effective_mcp_server_path}")
+        if not hasattr(app, 'mcp_clients'):
+            app.mcp_clients = {}
+        state_key = f"{conversation_id}_{npc_name or 'default'}"
+        client_entry = app.mcp_clients.get(state_key)
+        if not client_entry or not client_entry.get("client") or not client_entry["client"].session \
+           or client_entry.get("server_path") != effective_mcp_server_path:
+            mcp_client = MCPClientNPC()
+            if effective_mcp_server_path and mcp_client.connect_sync(effective_mcp_server_path):
+                print(f"[MCP] connected client for {state_key} to {effective_mcp_server_path}")
+                app.mcp_clients[state_key] = {
+                    "client": mcp_client,
+                    "server_path": effective_mcp_server_path,
+                    "messages": messages
+                }
+            else:
+                print(f"[MCP] Failed to connect client for {state_key} to {effective_mcp_server_path}")
+                app.mcp_clients[state_key] = {
+                    "client": None,
+                    "server_path": effective_mcp_server_path,
+                    "messages": messages
+                }
-            if not hasattr(app, 'corca_states'):
-                app.corca_states = {}
-            state_key = f"{conversation_id}_{npc_name or 'default'}"
-            corca_state = None
-            if state_key not in app.corca_states:
-                corca_state = create_corca_state_and_mcp_client(
-                    conversation_id=conversation_id,
-                    command_history=command_history,
+        mcp_client = app.mcp_clients[state_key]["client"]
+        messages = app.mcp_clients[state_key].get("messages", messages)
+        def stream_mcp_sse():
+            nonlocal messages
+            iteration = 0
+            prompt = commandstr
+            while iteration < 10:
+                iteration += 1
+                print(f"[MCP] iteration {iteration} prompt len={len(prompt)}")
+                jinx_tool_catalog = {}
+                if npc_object and hasattr(npc_object, "jinx_tool_catalog"):
+                    jinx_tool_catalog = npc_object.jinx_tool_catalog or {}
+                tools_for_llm = []
+                if mcp_client:
+                    tools_for_llm.extend(mcp_client.available_tools_llm)
+                # append Jinx-derived tools
+                tools_for_llm.extend(list(jinx_tool_catalog.values()))
+                if selected_mcp_tools_from_request:
+                    tools_for_llm = [t for t in tools_for_llm if t["function"]["name"] in selected_mcp_tools_from_request]
+                print(f"[MCP] tools_for_llm: {[t['function']['name'] for t in tools_for_llm]}")
+                llm_response = get_llm_response_with_handling(
+                    prompt=prompt,
                     npc=npc_object,
+                    messages=messages,
+                    tools=tools_for_llm,
+                    stream=True,
                     team=team_object,
-                    current_path=current_path,
-                    mcp_server_path=effective_mcp_server_path
+                    context=f' The users working directory is {current_path}'
                 )
-                app.corca_states[state_key] = corca_state
-            else:
-                corca_state = app.corca_states[state_key]
-                corca_state.npc = npc_object
-                corca_state.team = team_object
-                corca_state.current_path = current_path
-                corca_state.messages = messages
-                corca_state.command_history = command_history
-                current_mcp_client_path = getattr(corca_state.mcp_client, 'server_script_path', None)
-                if effective_mcp_server_path != current_mcp_client_path:
-                    print(f"MCP server path changed/updated for {state_key}. Disconnecting old client (if any) and reconnecting to {effective_mcp_server_path or 'None'}.")
-                    if corca_state.mcp_client and corca_state.mcp_client.session:
-                        corca_state.mcp_client.disconnect_sync()
-                        corca_state.mcp_client = None
-                    if effective_mcp_server_path:
-                        new_mcp_client = MCPClientNPC()
-                        if new_mcp_client.connect_sync(effective_mcp_server_path):
-                            corca_state.mcp_client = new_mcp_client
-                            print(f"Successfully reconnected MCP client for {state_key} to {effective_mcp_server_path}.")
+                stream = llm_response.get("response", [])
+                messages = llm_response.get("messages", messages)
+                collected_content = ""
+                collected_tool_calls = []
+                for response_chunk in stream:
+                    with cancellation_lock:
+                        if cancellation_flags.get(stream_id, False):
+                            yield {"type": "interrupt"}
+                            return
+                    if hasattr(response_chunk, "choices") and response_chunk.choices:
+                        delta = response_chunk.choices[0].delta
+                        if hasattr(delta, "content") and delta.content:
+                            collected_content += delta.content
+                            chunk_data = {
+                                "id": getattr(response_chunk, "id", None),
+                                "object": getattr(response_chunk, "object", None),
+                                "created": getattr(response_chunk, "created", datetime.datetime.now().strftime('YYYY-DD-MM-HHMMSS')),
+                                "model": getattr(response_chunk, "model", model),
+                                "choices": [
+                                    {
+                                        "index": 0,
+                                        "delta": {
+                                            "content": delta.content,
+                                            "role": "assistant"
+                                        },
+                                        "finish_reason": None
+                                    }
+                                ]
+                            }
+                            yield chunk_data
+                        if hasattr(delta, "tool_calls") and delta.tool_calls:
+                            for tool_call_delta in delta.tool_calls:
+                                idx = getattr(tool_call_delta, "index", 0)
+                                while len(collected_tool_calls) <= idx:
+                                    collected_tool_calls.append({
+                                        "id": "",
+                                        "type": "function",
+                                        "function": {"name": "", "arguments": ""}
+                                    })
+                                if getattr(tool_call_delta, "id", None):
+                                    collected_tool_calls[idx]["id"] = tool_call_delta.id
+                                if hasattr(tool_call_delta, "function"):
+                                    fn = tool_call_delta.function
+                                    if getattr(fn, "name", None):
+                                        collected_tool_calls[idx]["function"]["name"] = fn.name
+                                    if getattr(fn, "arguments", None):
+                                        collected_tool_calls[idx]["function"]["arguments"] += fn.arguments
+                if not collected_tool_calls:
+                    print("[MCP] no tool calls, finishing streaming loop")
+                    break
+                print(f"[MCP] collected tool calls: {[tc['function']['name'] for tc in collected_tool_calls]}")
+                yield {
+                    "type": "tool_execution_start",
+                    "tool_calls": [
+                        {
+                            "name": tc["function"]["name"],
+                            "id": tc["id"],
+                            "function": {
+                                "name": tc["function"]["name"],
+                                "arguments": tc["function"].get("arguments", "")
+                            }
+                        } for tc in collected_tool_calls
+                    ]
+                }
+                tool_results = []
+                for tc in collected_tool_calls:
+                    tool_name = tc["function"]["name"]
+                    tool_args = tc["function"]["arguments"]
+                    tool_id = tc["id"]
+                    if isinstance(tool_args, str):
+                        try:
+                            tool_args = json.loads(tool_args) if tool_args.strip() else {}
+                        except json.JSONDecodeError:
+                            tool_args = {}
+                    print(f"[MCP] tool_start {tool_name} args={tool_args}")
+                    yield {"type": "tool_start", "name": tool_name, "id": tool_id, "args": tool_args}
+                    try:
+                        tool_content = ""
+                        # First, try local Jinx execution
+                        if npc_object and hasattr(npc_object, "jinxs_dict") and tool_name in npc_object.jinxs_dict:
+                            jinx_obj = npc_object.jinxs_dict[tool_name]
+                            try:
+                                jinx_ctx = jinx_obj.execute(
+                                    input_values=tool_args if isinstance(tool_args, dict) else {},
+                                    npc=npc_object,
+                                    messages=messages
+                                )
+                                tool_content = str(jinx_ctx.get("output", jinx_ctx))
+                                print(f"[MCP] jinx tool_complete {tool_name}")
+                            except Exception as e:
+                                raise Exception(f"Jinx execution failed: {e}")
                         else:
-                            print(f"Failed to reconnect MCP client for {state_key} to {effective_mcp_server_path}. Corca will have no tools.")
-                            corca_state.mcp_client = None
-            state, stream_response = execute_command_corca(
-                commandstr,
-                corca_state,
-                command_history,
-                selected_mcp_tools_names=selected_mcp_tools_from_request
-            )
-            app.corca_states[state_key] = state
-            messages = state.messages
+                            try:
+                                loop = asyncio.get_event_loop()
+                            except RuntimeError:
+                                loop = asyncio.new_event_loop()
+                                asyncio.set_event_loop(loop)
+                            if loop.is_closed():
+                                loop = asyncio.new_event_loop()
+                                asyncio.set_event_loop(loop)
+                            mcp_result = loop.run_until_complete(
+                                mcp_client.session.call_tool(tool_name, tool_args)
+                            ) if mcp_client else {"error": "No MCP client"}
+                            if hasattr(mcp_result, "content") and mcp_result.content:
+                                for content_item in mcp_result.content:
+                                    if hasattr(content_item, "text"):
+                                        tool_content += content_item.text
+                                    elif hasattr(content_item, "data"):
+                                        tool_content += str(content_item.data)
+                                    else:
+                                        tool_content += str(content_item)
+                            else:
+                                tool_content = str(mcp_result)
+                        tool_results.append({
+                            "role": "tool",
+                            "tool_call_id": tool_id,
+                            "name": tool_name,
+                            "content": tool_content
+                        })
+                        print(f"[MCP] tool_complete {tool_name}")
+                        yield {"type": "tool_complete", "name": tool_name, "id": tool_id, "result_preview": tool_content[:4000]}
+                    except Exception as e:
+                        err_msg = f"Error executing {tool_name}: {e}"
+                        tool_results.append({
+                            "role": "tool",
+                            "tool_call_id": tool_id,
+                            "name": tool_name,
+                            "content": err_msg
+                        })
+                        print(f"[MCP] tool_error {tool_name}: {e}")
+                        yield {"type": "tool_error", "name": tool_name, "id": tool_id, "error": str(e)}
+                serialized_tool_calls = []
+                for tc in collected_tool_calls:
+                    parsed_args = tc["function"]["arguments"]
+                    # Gemini/LLM expects arguments as JSON string, not dict
+                    if isinstance(parsed_args, dict):
+                        args_for_message = json.dumps(parsed_args)
+                    else:
+                        args_for_message = str(parsed_args)
+                    serialized_tool_calls.append({
+                        "id": tc["id"],
+                        "type": tc["type"],
+                        "function": {
+                            "name": tc["function"]["name"],
+                            "arguments": args_for_message
+                        }
+                    })
+                messages.append({
+                    "role": "assistant",
+                    "content": collected_content,
+                    "tool_calls": serialized_tool_calls
+                })
+                messages.extend(tool_results)
+                tool_results_for_db = tool_results
+                prompt = ""
+            app.mcp_clients[state_key]["messages"] = messages
+            return
+        stream_response = stream_mcp_sse()
+    else:
+        stream_response = {"output": f"Unsupported execution mode: {exe_mode}", "messages": messages}
     user_message_filled = ''
@@ -2391,47 +3671,77 @@ def stream():
         tool_call_data = {"id": None, "function_name": None, "arguments": ""}
         try:
+            # New: handle generators (tool_agent streaming)
+            if hasattr(stream_response, "__iter__") and not isinstance(stream_response, (dict, str)):
+                for chunk in stream_response:
+                    with cancellation_lock:
+                        if cancellation_flags.get(current_stream_id, False):
+                            interrupted = True
+                            break
+                    if chunk is None:
+                        continue
+                    if isinstance(chunk, dict):
+                        if chunk.get("type") == "interrupt":
+                            interrupted = True
+                            break
+                        yield f"data: {json.dumps(chunk)}\n\n"
+                        if chunk.get("choices"):
+                            for choice in chunk["choices"]:
+                                delta = choice.get("delta", {})
+                                content_piece = delta.get("content")
+                                if content_piece:
+                                    complete_response.append(content_piece)
+                        continue
+                    yield f"data: {json.dumps({'choices':[{'delta':{'content': str(chunk), 'role': 'assistant'},'finish_reason':None}]})}\n\n"
+                # ensure stream termination and cleanup for generator flows
+                yield "data: [DONE]\n\n"
+                with cancellation_lock:
+                    if current_stream_id in cancellation_flags:
+                        del cancellation_flags[current_stream_id]
+                        print(f"Cleaned up cancellation flag for stream ID: {current_stream_id}")
+                return
             if isinstance(stream_response, str) :
                 print('stream a str and not a gen')
                 chunk_data = {
-                        "id": None,
-                        "object": None,
-                        "created": datetime.datetime.now().strftime('YYYY-DD-MM-HHMMSS'),
+                        "id": None,
+                        "object": None,
+                        "created": datetime.datetime.now().strftime('YYYY-DD-MM-HHMMSS'),
                         "model": model,
                         "choices": [
                             {
-                                "index": 0,
-                                "delta":
+                                "index": 0,
+                                "delta":
                                     {
                                         "content": stream_response,
                                         "role": "assistant"
-                                  },
+                                  },
                                 "finish_reason": 'done'
                             }
                         ]
                     }
-                yield f"data: {json.dumps(chunk_data)}"
+                yield f"data: {json.dumps(chunk_data)}\n\n"
                 return
             elif isinstance(stream_response, dict) and 'output' in stream_response and isinstance(stream_response.get('output'), str):
-                print('stream a str and not a gen')
+                print('stream a str and not a gen')
                 chunk_data = {
-                        "id": None,
-                        "object": None,
-                        "created": datetime.datetime.now().strftime('YYYY-DD-MM-HHMMSS'),
+                        "id": None,
+                        "object": None,
+                        "created": datetime.datetime.now().strftime('YYYY-DD-MM-HHMMSS'),
                         "model": model,
                         "choices": [
                             {
-                                "index": 0,
-                                "delta":
+                                "index": 0,
+                                "delta":
                                     {
                                         "content": stream_response.get('output') ,
                                         "role": "assistant"
-                                  },
+                                  },
                                 "finish_reason": 'done'
                             }
                         ]
                     }
-                yield f"data: {json.dumps(chunk_data)}"
+                yield f"data: {json.dumps(chunk_data)}\n\n"
                 return
             for response_chunk in stream_response.get('response', stream_response.get('output')):
                 with cancellation_lock:
@@ -2459,8 +3769,8 @@ def stream():
                     if chunk_content:
                         complete_response.append(chunk_content)
                     chunk_data = {
-                        "id": None, "object": None,
-                        "created": response_chunk["created_at"] or datetime.datetime.now(),
+                        "id": None, "object": None,
+                        "created": response_chunk["created_at"] or datetime.datetime.now(),
                         "model": response_chunk["model"],
                         "choices": [{"index": 0, "delta": {"content": chunk_content, "role": response_chunk["message"]["role"]}, "finish_reason": response_chunk.get("done_reason")}]
                     }
@@ -2494,33 +3804,86 @@ def stream():
             print(f"\nAn exception occurred during streaming for {current_stream_id}: {e}")
             traceback.print_exc()
             interrupted = True
         finally:
             print(f"\nStream {current_stream_id} finished. Interrupted: {interrupted}")
             print('\r' + ' ' * dot_count*2 + '\r', end="", flush=True)
             final_response_text = ''.join(complete_response)
+            # Yield message_stop immediately so the client's stream ends quickly
             yield f"data: {json.dumps({'type': 'message_stop'})}\n\n"
+            # Persist tool call metadata and results before final assistant content
+            if tool_call_data.get("function_name") or tool_call_data.get("arguments"):
+                save_conversation_message(
+                    command_history,
+                    conversation_id,
+                    "assistant",
+                    {"tool_call": tool_call_data},
+                    wd=current_path,
+                    model=model,
+                    provider=provider,
+                    npc=npc_name,
+                    team=team,
+                    message_id=generate_message_id(),
+                )
+            if tool_results_for_db:
+                for tr in tool_results_for_db:
+                    save_conversation_message(
+                        command_history,
+                        conversation_id,
+                        "tool",
+                        {"tool_name": tr.get("name"), "tool_call_id": tr.get("tool_call_id"), "content": tr.get("content")},
+                        wd=current_path,
+                        model=model,
+                        provider=provider,
+                        npc=npc_name,
+                        team=team,
+                        message_id=generate_message_id(),
+                    )
+            # Save assistant message to the database
             npc_name_to_save = npc_object.name if npc_object else ''
             save_conversation_message(
-                command_history,
-                conversation_id,
-                "assistant",
+                command_history,
+                conversation_id,
+                "assistant",
                 final_response_text,
-                wd=current_path,
-                model=model,
+                wd=current_path,
+                model=model,
                 provider=provider,
-                npc=npc_name_to_save,
-                team=team,
+                npc=npc_name_to_save,
+                team=team,
                 message_id=message_id,
             )
+            # Start background tasks for memory extraction and context compression
+            # These will run without blocking the main response stream.
+            conversation_turn_text = f"User: {commandstr}\nAssistant: {final_response_text}"
+            background_thread = threading.Thread(
+                target=_run_stream_post_processing,
+                args=(
+                    conversation_turn_text,
+                    conversation_id,
+                    command_history,
+                    npc_name,
+                    team, # Pass the team variable from the outer scope
+                    current_path,
+                    model,
+                    provider,
+                    npc_object,
+                    messages # Pass messages for context compression
+                )
+            )
+            background_thread.daemon = True # Allow the main program to exit even if this thread is still running
+            background_thread.start()
             with cancellation_lock:
                 if current_stream_id in cancellation_flags:
                     del cancellation_flags[current_stream_id]
                     print(f"Cleaned up cancellation flag for stream ID: {current_stream_id}")
     return Response(event_stream(stream_id), mimetype="text/event-stream")
 @app.route('/api/delete_message', methods=['POST'])
@@ -2574,295 +3937,6 @@ def approve_memories():
-@app.route("/api/execute", methods=["POST"])
-def execute():
-    data = request.json
-    stream_id = data.get("streamId")
-    if not stream_id:
-        import uuid
-        stream_id = str(uuid.uuid4())
-    with cancellation_lock:
-        cancellation_flags[stream_id] = False
-    print(f"Starting execute stream with ID: {stream_id}")
-    commandstr = data.get("commandstr")
-    conversation_id = data.get("conversationId")
-    model = data.get("model", 'llama3.2')
-    provider = data.get("provider", 'ollama')
-    if provider is None:
-        provider = available_models.get(model)
-    npc_name = data.get("npc", "sibiji")
-    npc_source = data.get("npcSource", "global")
-    team = data.get("team", None)
-    current_path = data.get("currentPath")
-    if current_path:
-        loaded_vars = load_project_env(current_path)
-        print(f"Loaded project env variables for stream request: {list(loaded_vars.keys())}")
-    npc_object = None
-    team_object = None
-    if team:
-        print(team)
-        if hasattr(app, 'registered_teams') and team in app.registered_teams:
-            team_object = app.registered_teams[team]
-            print(f"Using registered team: {team}")
-        else:
-            print(f"Warning: Team {team} not found in registered teams")
-    if npc_name:
-        if team and hasattr(app, 'registered_teams') and team in app.registered_teams:
-            team_object = app.registered_teams[team]
-            print('team', team_object)
-            if hasattr(team_object, 'npcs'):
-                team_npcs = team_object.npcs
-                if isinstance(team_npcs, dict):
-                    if npc_name in team_npcs:
-                        npc_object = team_npcs[npc_name]
-                        print(f"Found NPC {npc_name} in registered team {team}")
-                elif isinstance(team_npcs, list):
-                    for npc in team_npcs:
-                        if hasattr(npc, 'name') and npc.name == npc_name:
-                            npc_object = npc
-                            print(f"Found NPC {npc_name} in registered team {team}")
-                            break
-            if not npc_object and hasattr(team_object, 'forenpc') and hasattr(team_object.forenpc, 'name'):
-                if team_object.forenpc.name == npc_name:
-                    npc_object = team_object.forenpc
-                    print(f"Found NPC {npc_name} as forenpc in team {team}")
-        if not npc_object and hasattr(app, 'registered_npcs') and npc_name in app.registered_npcs:
-            npc_object = app.registered_npcs[npc_name]
-            print(f"Found NPC {npc_name} in registered NPCs")
-        if not npc_object:
-            db_conn = get_db_connection()
-            npc_object = load_npc_by_name_and_source(npc_name, npc_source, db_conn, current_path)
-            if not npc_object and npc_source == 'project':
-                print(f"NPC {npc_name} not found in project directory, trying global...")
-                npc_object = load_npc_by_name_and_source(npc_name, 'global', db_conn)
-            if npc_object:
-                print(f"Successfully loaded NPC {npc_name} from {npc_source} directory")
-            else:
-                print(f"Warning: Could not load NPC {npc_name}")
-    attachments = data.get("attachments", [])
-    command_history = CommandHistory(app.config.get('DB_PATH'))
-    images = []
-    attachments_loaded = []
-    if attachments:
-        for attachment in attachments:
-            extension = attachment["name"].split(".")[-1]
-            extension_mapped = extension_map.get(extension.upper(), "others")
-            file_path = os.path.expanduser("~/.npcsh/" + extension_mapped + "/" + attachment["name"])
-            if extension_mapped == "images":
-                ImageFile.LOAD_TRUNCATED_IMAGES = True
-                img = Image.open(attachment["path"])
-                img_byte_arr = BytesIO()
-                img.save(img_byte_arr, format="PNG")
-                img_byte_arr.seek(0)
-                img.save(file_path, optimize=True, quality=50)
-                images.append(file_path)
-                attachments_loaded.append({
-                    "name": attachment["name"], "type": extension_mapped,
-                    "data": img_byte_arr.read(), "size": os.path.getsize(file_path)
-                })
-    messages = fetch_messages_for_conversation(conversation_id)
-    if len(messages) == 0 and npc_object is not None:
-        messages = [{'role': 'system', 'content': npc_object.get_system_prompt()}]
-    elif len(messages)>0 and messages[0]['role'] != 'system' and npc_object is not None:
-        messages.insert(0, {'role': 'system', 'content': npc_object.get_system_prompt()})
-    elif len(messages) > 0 and npc_object is not None:
-        messages[0]['content'] = npc_object.get_system_prompt()
-    if npc_object is not None and messages and messages[0]['role'] == 'system':
-        messages[0]['content'] = npc_object.get_system_prompt()
-    message_id = generate_message_id()
-    save_conversation_message(
-        command_history, conversation_id, "user", commandstr,
-        wd=current_path, model=model, provider=provider, npc=npc_name,
-        team=team, attachments=attachments_loaded, message_id=message_id,
-    )
-    response_gen = check_llm_command(
-        commandstr, messages=messages, images=images, model=model,
-        provider=provider, npc=npc_object, team=team_object, stream=True
-    )
-    print(response_gen)
-    message_id = generate_message_id()
-    def event_stream(current_stream_id):
-        complete_response = []
-        dot_count = 0
-        interrupted = False
-        tool_call_data = {"id": None, "function_name": None, "arguments": ""}
-        memory_data = None
-        try:
-            for response_chunk in stream_response.get('response', stream_response.get('output')):
-                with cancellation_lock:
-                    if cancellation_flags.get(current_stream_id, False):
-                        print(f"Cancellation flag triggered for {current_stream_id}. Breaking loop.")
-                        interrupted = True
-                        break
-                print('.', end="", flush=True)
-                dot_count += 1
-                if "hf.co" in model or provider == 'ollama':
-                    chunk_content = response_chunk["message"]["content"] if "message" in response_chunk and "content" in response_chunk["message"] else ""
-                    if "message" in response_chunk and "tool_calls" in response_chunk["message"]:
-                        for tool_call in response_chunk["message"]["tool_calls"]:
-                            if "id" in tool_call:
-                                tool_call_data["id"] = tool_call["id"]
-                            if "function" in tool_call:
-                                if "name" in tool_call["function"]:
-                                    tool_call_data["function_name"] = tool_call["function"]["name"]
-                                if "arguments" in tool_call["function"]:
-                                    arg_val = tool_call["function"]["arguments"]
-                                    if isinstance(arg_val, dict):
-                                        arg_val = json.dumps(arg_val)
-                                    tool_call_data["arguments"] += arg_val
-                    if chunk_content:
-                        complete_response.append(chunk_content)
-                    chunk_data = {
-                        "id": None, "object": None, "created": response_chunk["created_at"], "model": response_chunk["model"],
-                        "choices": [{"index": 0, "delta": {"content": chunk_content, "role": response_chunk["message"]["role"]}, "finish_reason": response_chunk.get("done_reason")}]
-                    }
-                    yield f"data: {json.dumps(chunk_data)}\n\n"
-                else:
-                    chunk_content = ""
-                    reasoning_content = ""
-                    for choice in response_chunk.choices:
-                        if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls:
-                            for tool_call in choice.delta.tool_calls:
-                                if tool_call.id:
-                                    tool_call_data["id"] = tool_call.id
-                                if tool_call.function:
-                                    if hasattr(tool_call.function, "name") and tool_call.function.name:
-                                        tool_call_data["function_name"] = tool_call.function.name
-                                    if hasattr(tool_call.function, "arguments") and tool_call.function.arguments:
-                                        tool_call_data["arguments"] += tool_call.function.arguments
-                    for choice in response_chunk.choices:
-                        if hasattr(choice.delta, "reasoning_content"):
-                            reasoning_content += choice.delta.reasoning_content
-                    chunk_content = "".join(choice.delta.content for choice in response_chunk.choices if choice.delta.content is not None)
-                    if chunk_content:
-                        complete_response.append(chunk_content)
-                    chunk_data = {
-                        "id": response_chunk.id, "object": response_chunk.object, "created": response_chunk.created, "model": response_chunk.model,
-                        "choices": [{"index": choice.index, "delta": {"content": choice.delta.content, "role": choice.delta.role, "reasoning_content": reasoning_content if hasattr(choice.delta, "reasoning_content") else None}, "finish_reason": choice.finish_reason} for choice in response_chunk.choices]
-                    }
-                    yield f"data: {json.dumps(chunk_data)}\n\n"
-        except Exception as e:
-            print(f"\nAn exception occurred during streaming for {current_stream_id}: {e}")
-            traceback.print_exc()
-            interrupted = True
-        finally:
-            print(f"\nStream {current_stream_id} finished. Interrupted: {interrupted}")
-            print('\r' + ' ' * dot_count*2 + '\r', end="", flush=True)
-            final_response_text = ''.join(complete_response)
-            conversation_turn_text = f"User: {commandstr}\nAssistant: {final_response_text}"
-            try:
-                memory_examples = command_history.get_memory_examples_for_context(
-                    npc=npc_name,
-                    team=team,
-                    directory_path=current_path
-                )
-                memory_context = format_memory_context(memory_examples)
-                facts = get_facts(
-                    conversation_turn_text,
-                    model=npc_object.model if npc_object else model,
-                    provider=npc_object.provider if npc_object else provider,
-                    npc=npc_object,
-                    context=memory_context
-                )
-                if facts:
-                    memories_for_approval = []
-                    for i, fact in enumerate(facts):
-                        memory_id = command_history.add_memory_to_database(
-                            message_id=f"{conversation_id}_{datetime.now().strftime('%H%M%S')}_{i}",
-                            conversation_id=conversation_id,
-                            npc=npc_name or "default",
-                            team=team or "default",
-                            directory_path=current_path or "/",
-                            initial_memory=fact['statement'],
-                            status="pending_approval",
-                            model=npc_object.model if npc_object else model,
-                            provider=npc_object.provider if npc_object else provider
-                        )
-                        memories_for_approval.append({
-                            "memory_id": memory_id,
-                            "content": fact['statement'],
-                            "context": f"Type: {fact.get('type', 'unknown')}, Source: {fact.get('source_text', '')}",
-                            "npc": npc_name or "default"
-                        })
-                    memory_data = {
-                        "type": "memory_approval",
-                        "memories": memories_for_approval,
-                        "conversation_id": conversation_id
-                    }
-            except Exception as e:
-                print(f"Memory generation error: {e}")
-            if memory_data:
-                yield f"data: {json.dumps(memory_data)}\n\n"
-            yield f"data: {json.dumps({'type': 'message_stop'})}\n\n"
-            npc_name_to_save = npc_object.name if npc_object else ''
-            save_conversation_message(
-                command_history,
-                conversation_id,
-                "assistant",
-                final_response_text,
-                wd=current_path,
-                model=model,
-                provider=provider,
-                npc=npc_name_to_save,
-                team=team,
-                message_id=message_id,
-            )
-            with cancellation_lock:
-                if current_stream_id in cancellation_flags:
-                    del cancellation_flags[current_stream_id]
-                    print(f"Cleaned up cancellation flag for stream ID: {current_stream_id}")
-    return Response(event_stream(stream_id), mimetype="text/event-stream")
 @app.route("/api/interrupt", methods=["POST"])
 def interrupt_stream():
@@ -3023,6 +4097,37 @@ def ollama_status():
         return jsonify({"status": "not_found"})
+@app.route("/api/ollama/tool_models", methods=["GET"])
+def get_ollama_tool_models():
+    """
+    Best-effort detection of Ollama models whose templates include tool-call support.
+    We scan templates for tool placeholders; if none are found we assume tools are unsupported.
+    """
+    try:
+        detected = []
+        listing = ollama.list()
+        for model in listing.get("models", []):
+            name = getattr(model, "model", None) or model.get("name") if isinstance(model, dict) else None
+            if not name:
+                continue
+            try:
+                details = ollama.show(name)
+                tmpl = details.get("template") or ""
+                if "{{- if .Tools" in tmpl or "{{- range .Tools" in tmpl or "{{- if .ToolCalls" in tmpl:
+                    detected.append(name)
+                    continue
+                metadata = details.get("metadata") or {}
+                if metadata.get("tools") or metadata.get("tool_calls"):
+                    detected.append(name)
+            except Exception as inner_e:
+                print(f"Warning: could not inspect ollama model {name} for tool support: {inner_e}")
+                continue
+        return jsonify({"models": detected, "error": None})
+    except Exception as e:
+        print(f"Error listing Ollama tool-capable models: {e}")
+        return jsonify({"models": [], "error": str(e)}), 500
 @app.route('/api/ollama/models', methods=['GET'])
 def get_ollama_models():
     response = ollama.list()

npcpy 1.2.34__py3-none-any.whl → 1.2.36__py3-none-any.whl

npcpy 1.2.34py3-none-any.whl → 1.2.36py3-none-any.whl