npm - bone-agent - Versions diffs - 1.3.2 → 1.3.3 - Mend

bone-agent 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/README.md +2 -2
package/config.yaml.example +8 -0
package/package.json +3 -2
package/prompts/main/ask_questions.md +31 -0
package/prompts/main/batch_independent_calls.md +5 -0
package/prompts/main/casual_interactions.md +11 -0
package/prompts/main/code_references.md +8 -0
package/prompts/main/communication_style.md +12 -0
package/prompts/main/context_reliability.md +12 -0
package/prompts/main/conversational_tool_calling.md +15 -0
package/prompts/main/dream.md +36 -0
package/prompts/main/editing_pattern.md +13 -0
package/prompts/main/error_handling.md +6 -0
package/prompts/main/exploration_pattern.md +21 -0
package/prompts/main/intro.md +1 -0
package/prompts/main/obsidian.md +16 -0
package/prompts/main/obsidian_project.md +79 -0
package/prompts/main/professional_objectivity.md +3 -0
package/prompts/main/targeted_searching.md +10 -0
package/prompts/main/task_lists_pattern.md +8 -0
package/prompts/main/temp_folder.md +9 -0
package/prompts/main/think_before_acting.md +10 -0
package/prompts/main/tone_and_style.md +4 -0
package/prompts/main/tool_preferences.md +24 -0
package/prompts/main/trust_subagent_context.md +21 -0
package/prompts/main/when_to_use_sub_agent.md +7 -0
package/prompts/micro/ask_questions.md +1 -0
package/prompts/micro/batch_independent_calls.md +1 -0
package/prompts/micro/casual_interactions.md +1 -0
package/prompts/micro/code_references.md +1 -0
package/prompts/micro/communication_style.md +1 -0
package/prompts/micro/context_reliability.md +1 -0
package/prompts/micro/conversational_tool_calling.md +1 -0
package/prompts/micro/editing_pattern.md +1 -0
package/prompts/micro/error_handling.md +1 -0
package/prompts/micro/exploration_pattern.md +1 -0
package/prompts/micro/intro.md +1 -0
package/prompts/micro/obsidian.md +4 -0
package/prompts/micro/obsidian_project.md +5 -0
package/prompts/micro/professional_objectivity.md +1 -0
package/prompts/micro/targeted_searching.md +1 -0
package/prompts/micro/task_lists_pattern.md +1 -0
package/prompts/micro/temp_folder.md +1 -0
package/prompts/micro/think_before_acting.md +5 -0
package/prompts/micro/tone_and_style.md +1 -0
package/prompts/micro/tool_preferences.md +1 -0
package/prompts/micro/trust_subagent_context.md +1 -0
package/prompts/micro/when_to_use_sub_agent.md +1 -0
package/src/core/agentic.py +1 -73
package/src/core/chat_manager.py +42 -7
package/src/core/config_manager.py +6 -0
package/src/core/cron.py +57 -2
package/src/core/memory.py +3 -90
package/src/llm/config.py +28 -2
package/src/llm/prompts.py +251 -497
package/src/llm/providers.py +25 -6
package/src/llm/token_tracker.py +17 -1
package/src/tools/edit.py +8 -6
package/src/tools/helpers/path_resolver.py +18 -12
package/src/tools/rg_search.py +97 -30
package/src/ui/commands.py +120 -5
package/src/ui/displays.py +1 -0
package/src/ui/main.py +1 -0
package/src/utils/settings.py +19 -2
package/src/utils/user_message_logger.py +120 -0

package/src/llm/providers.py CHANGED Viewed

@@ -173,17 +173,27 @@ class AnthropicHandler:
         # OpenAI format: {"choices": [{"message": {"content": "..."}}], "usage": {...}}
         # Convert Anthropic usage format (input_tokens/output_tokens) to OpenAI format (prompt_tokens/completion_tokens)
+        # Anthropic's input_tokens does NOT include cache tokens; total input =
+        #   input_tokens + cache_read_input_tokens + cache_creation_input_tokens
         anthropic_usage = response_json.get("usage", {})
+        cache_read = anthropic_usage.get('cache_read_input_tokens', 0)
+        cache_creation = anthropic_usage.get('cache_creation_input_tokens', 0)
+        prompt_tokens = anthropic_usage.get('input_tokens', 0) + cache_read + cache_creation
+        completion_tokens = anthropic_usage.get('output_tokens', 0)
         openai_format_usage = {
-            'prompt_tokens': anthropic_usage.get('input_tokens', 0),
-            'completion_tokens': anthropic_usage.get('output_tokens', 0),
-            'total_tokens': anthropic_usage.get('input_tokens', 0) + anthropic_usage.get('output_tokens', 0),
+            'prompt_tokens': prompt_tokens,
+            'completion_tokens': completion_tokens,
+            'total_tokens': prompt_tokens + completion_tokens,
         }
         # Preserve Anthropic cache token fields for the token tracker
         if 'cache_read_input_tokens' in anthropic_usage:
             openai_format_usage['cache_read_input_tokens'] = anthropic_usage['cache_read_input_tokens']
         if 'cache_creation_input_tokens' in anthropic_usage:
             openai_format_usage['cache_creation_input_tokens'] = anthropic_usage['cache_creation_input_tokens']
+        # Preserve non-cache input count so cost estimation can bill only the
+        # non-cache portion without relying on fragile prompt_tokens subtraction.
+        if 'input_tokens' in anthropic_usage:
+            openai_format_usage['input_tokens'] = anthropic_usage['input_tokens']
         result = {
             "choices": [],
@@ -279,17 +289,26 @@ class AnthropicHandler:
         # Yield usage data as final item if captured
         # Convert Anthropic format (input_tokens/output_tokens) to OpenAI format (prompt_tokens/completion_tokens)
+        # Anthropic's input_tokens does NOT include cache tokens; total input =
+        #   input_tokens + cache_read_input_tokens + cache_creation_input_tokens
         if usage_data:
+            cache_read = usage_data.get('cache_read_input_tokens', 0)
+            cache_creation = usage_data.get('cache_creation_input_tokens', 0)
+            prompt_tokens = usage_data.get('input_tokens', 0) + cache_read + cache_creation
+            completion_tokens = usage_data.get('output_tokens', 0)
             openai_format_usage = {
-                'prompt_tokens': usage_data.get('input_tokens', 0),
-                'completion_tokens': usage_data.get('output_tokens', 0),
-                'total_tokens': usage_data.get('input_tokens', 0) + usage_data.get('output_tokens', 0),
+                'prompt_tokens': prompt_tokens,
+                'completion_tokens': completion_tokens,
+                'total_tokens': prompt_tokens + completion_tokens,
             }
             # Preserve Anthropic cache token fields for the token tracker
             if 'cache_read_input_tokens' in usage_data:
                 openai_format_usage['cache_read_input_tokens'] = usage_data['cache_read_input_tokens']
             if 'cache_creation_input_tokens' in usage_data:
                 openai_format_usage['cache_creation_input_tokens'] = usage_data['cache_creation_input_tokens']
+            # Preserve non-cache input count for accurate cost estimation
+            if 'input_tokens' in usage_data:
+                openai_format_usage['input_tokens'] = usage_data['input_tokens']
             yield {'__usage__': openai_format_usage}
     @staticmethod

package/src/llm/token_tracker.py CHANGED Viewed

@@ -55,6 +55,10 @@ class TokenTracker:
         self.total_cache_creation_tokens = 0   # Cumulative input tokens written to cache
         self.conv_cache_read_tokens = 0        # Per-conversation cache read tokens
         self.conv_cache_creation_tokens = 0    # Per-conversation cache creation tokens
+        # Active prompt variant (loaded from prompts/ directory)
+        self.current_variant = "main"
     def add_usage(self, usage_data, model_name: str = ""):
         """Add token usage from an API response.
@@ -122,7 +126,19 @@ class TokenTracker:
             # Fallback: look up cost rates from config
             cost_in, cost_out = get_model_cost(model_name)
             if cost_in > 0 or cost_out > 0:
-                computed = self._calculate_cost(prompt_tokens, completion_tokens, cost_in, cost_out)
+                # Compute the billable (non-cache) input token count for cost
+                # estimation.  Providers normalize `prompt_tokens` differently:
+                #   - Anthropic handler: sums input + cache_read + cache_creation
+                #   - OpenAI: prompt_tokens natively includes cached_tokens
+                #   - Future providers: may exclude cache tokens from prompt_tokens
+                # Use the explicit `input_tokens` field (Anthropic native,
+                # non-cache portion) when available; otherwise subtract cache
+                # tokens from prompt_tokens (assumes prompt_tokens includes
+                # cache counts).
+                base_prompt = usage_data.get('input_tokens')
+                if base_prompt is None:
+                    base_prompt = max(0, prompt_tokens - cache_read - cache_creation)
+                computed = self._calculate_cost(base_prompt, completion_tokens, cost_in, cost_out)
                 self.add_estimated_cost(computed['total_cost'])
     def add_actual_cost(self, cost_usd: float):

package/src/tools/edit.py CHANGED Viewed

@@ -223,9 +223,11 @@ def _prepare_edit(arguments, repo_root, gitignore_spec=None, vault_root=None) ->
     if not path or not isinstance(path, str) or not path.strip():
         raise FileEditError("Missing or invalid 'path' parameter")
-    # Memory files (.bone/ and user_memory.md) are auto-approved writes that the
-    # system itself adds to .gitignore, so gitignore filtering would block them.
-    is_memory = Path(path).parent.name == ".bone" or Path(path).name == "user_memory.md"
+    # Memory files (.bone/ under repo root and user_memory.md) are auto-approved
+    # writes that the system itself adds to .gitignore, so gitignore filtering
+    # would block them. Must anchor to repo_root to avoid matching any .bone/ dir.
+    _resolved = (repo_root / path).resolve()
+    is_memory = str(_resolved).startswith(str((repo_root / ".bone").resolve()) + os.sep) or Path(path).name == "user_memory.md"
     # Resolve and validate path using PathResolver
     try:
@@ -236,9 +238,9 @@ def _prepare_edit(arguments, repo_root, gitignore_spec=None, vault_root=None) ->
         raise FileEditError(str(e), details=e.details)
     if not file_path.exists():
-        # Auto-create memory files (.bone/) with default header on first write.
-        # These are already auto-approved, so directory+file creation is safe.
-        if file_path.parent.name == ".bone" or file_path.name == "user_memory.md":
+        # Auto-create memory files (.bone/ under repo root) with default header
+        # on first write. Already auto-approved, so creation is safe.
+        if is_memory:
             file_path.parent.mkdir(parents=True, exist_ok=True)
             header = "# Project Memory\n\n" if file_path.name == "agents.md" else "# User Memory\n\n"
             file_path.write_text(header, encoding="utf-8")

package/src/tools/helpers/path_resolver.py CHANGED Viewed

@@ -107,24 +107,30 @@ class PathResolver:
             # Resolve to absolute path (handles .. and symlinks)
             path = path.resolve()
-            # Step 2b: Security boundary — path must be within repo_root or vault_path
+            # Step 2b: Security boundary — path must be within repo_root, vault_path,
+            # or the agent's own data directory (~/.bone/).
             if enforce_boundary:
                 try:
                     path.relative_to(self.repo_root)
                 except ValueError:
-                    if self.vault_path is not None:
-                        try:
-                            path.relative_to(self.vault_path)
-                        except ValueError:
+                    # Check ~/.bone/ — agent data dir is always accessible
+                    bone_root = Path.home() / ".bone"
+                    try:
+                        path.relative_to(bone_root)
+                    except ValueError:
+                        if self.vault_path is not None:
+                            try:
+                                path.relative_to(self.vault_path)
+                            except ValueError:
+                                elapsed = time.time() - start_time
+                                _track_validation_error("outside_allowed_roots")
+                                _path_resolution_times.append(elapsed)
+                                return None, f"Path is outside allowed directories: {path_str}"
+                        else:
                             elapsed = time.time() - start_time
-                            _track_validation_error("outside_allowed_roots")
+                            _track_validation_error("outside_repo")
                             _path_resolution_times.append(elapsed)
-                            return None, f"Path is outside allowed directories: {path_str}"
-                    else:
-                        elapsed = time.time() - start_time
-                        _track_validation_error("outside_repo")
-                        _path_resolution_times.append(elapsed)
-                        return None, f"Path is outside repository: {path_str}"
+                            return None, f"Path is outside repository: {path_str}"
             # Step 3: Check existence if required
             if must_exist:

package/src/tools/rg_search.py CHANGED Viewed

@@ -2,21 +2,84 @@
 import logging
 import re
-import shlex
+import stat
 import subprocess
 from pathlib import Path
 from typing import Optional
 from .helpers.base import tool
 from .helpers.formatters import format_tool_result
-from .shell import _prepare_execution_environment, run_shell_command
+from .shell import _execute_direct_command, _prepare_execution_environment
 from .helpers.converters import coerce_bool, coerce_int
+from utils.settings import tool_settings
 logger = logging.getLogger(__name__)
 # Default match limit for vault searches (separate from repo limit)
 _VAULT_MAX_MATCHES = 20
+# Regex for detecting file-path lines in rg output (shared by _annotate_file_sizes and _search_vault)
+_path_line_re = re.compile(r"^[^\s:|].*[/.]")
+def _format_file_size(size_bytes: int) -> str:
+    """Format file size in human-readable form."""
+    if size_bytes < 1024:
+        return f"{size_bytes} B"
+    elif size_bytes < 1024 * 1024:
+        return f"{size_bytes / 1024:.1f} KB"
+    else:
+        return f"{size_bytes / (1024 * 1024):.1f} MB"
+def _annotate_file_sizes(formatted_output: str, base_path: Path, output_mode: str = "files_with_matches") -> str:
+    """Append human-readable file sizes to each file path line in rg output.
+    Works on files_with_matches and count output modes where each content
+    line starts with a file path. Skips metadata, truncation, and section
+    header lines. Skipped entirely for content mode (no benefit there).
+    """
+    if output_mode == "content":
+        return formatted_output
+    lines = formatted_output.split("\n")
+    annotated = []
+    for line in lines:
+        stripped = line.strip()
+        if (not stripped
+                or stripped.startswith("exit_code=")
+                or stripped.startswith("matches=")
+                or stripped.startswith("files=")
+                or stripped.startswith("... (")
+                or stripped.startswith("[repo]")
+                or stripped.startswith("[vault]")):
+            annotated.append(line)
+            continue
+        # Only annotate pure file-path lines (files_with_matches: "file",
+        # count: "file:N"). Skip content-mode match lines ("file:line:match")
+        # which always have 2+ colons or a colon-digit-dash pattern.
+        parts = line.split(":")
+        is_file_line = (
+            _path_line_re.match(line)
+            and len(parts) <= 2
+            and (len(parts) == 1 or parts[1].strip().isdigit())
+        )
+        if is_file_line:
+            file_part = parts[0].strip()
+            full_path = base_path / file_part
+            try:
+                st = full_path.stat()
+                if stat.S_ISREG(st.st_mode):
+                    size = _format_file_size(st.st_size)
+                    annotated.append(f"{line}  {size:>8}")
+                else:
+                    annotated.append(line)
+            except (OSError, ValueError):
+                annotated.append(line)
+        else:
+            annotated.append(line)
+    return "\n".join(annotated)
 @tool(
     name="rg",
@@ -103,72 +166,74 @@ def rg(
     if not isinstance(pattern, str) or not pattern.strip():
         return "exit_code=1\nrg requires a non-empty 'pattern' argument."
-    # Build rg command from arguments
-    cmd_parts = ["rg"]
+    # Build rg args as a list (bypass string roundtrip to avoid shlex issues with regex metacharacters)
+    args = []
     # Add --line-number for content mode
     if output_mode == "content":
-        cmd_parts.append("--line-number")
+        args.append("--line-number")
     # Add multiline flag
     multiline = coerce_bool(kwargs.get("multiline"), default=False)
     if multiline:
-        cmd_parts.append("-U")
-        cmd_parts.append("--multiline-dotall")
+        args.append("-U")
+        args.append("--multiline-dotall")
     # Add case insensitive flag
     case_insensitive = coerce_bool(kwargs.get("case_insensitive"), default=False)
     if case_insensitive:
-        cmd_parts.append("--ignore-case")
+        args.append("--ignore-case")
     # Add context lines flag
     context_lines = coerce_int(kwargs.get("context_lines"))[0] if kwargs.get("context_lines") else None
     if context_lines:
-        cmd_parts.append(f"--context={context_lines}")
+        args.append(f"--context={context_lines}")
     # Add glob pattern
     if glob:
-        cmd_parts.append(f"--glob={glob}")
+        args.append(f"--glob={glob}")
     # Add file type filter
     file_type = kwargs.get("type")
     if file_type:
-        cmd_parts.append(f"--type={file_type}")
+        args.append(f"--type={file_type}")
-    # Add files-with-matches flag for count mode
+    # Add output mode flags
     if output_mode == "files_with_matches":
-        cmd_parts.append("--files-with-matches")
+        args.append("--files-with-matches")
     elif output_mode == "count":
-        cmd_parts.append("--count")
+        args.append("--count")
-    # Add pattern - quote if it contains spaces
-    if " " in pattern:
-        cmd_parts.append(shlex.quote(pattern))
-    else:
-        cmd_parts.append(pattern)
+    # Pattern and search path — no quoting needed, subprocess list form bypasses shell
+    args.append(pattern)
-    # Add path (default to current directory)
     search_path = path or "."
-    cmd_parts.append(search_path)
-    # Build command string
-    command = " ".join(cmd_parts)
+    args.append(search_path)
     # Get max_matches from kwargs (default: 100, set to 0 for no limit)
     raw = coerce_int(kwargs.get("max_matches"))[0] if kwargs.get("max_matches") is not None else None
     max_matches = raw if raw is not None and raw >= 0 else 100
-    # Execute repo search
+    # Execute repo search directly (no string→shlex roundtrip)
     try:
-        repo_result = run_shell_command(
-            command, repo_root, rg_exe_path, console, debug_mode, gitignore_spec,
-            max_matches=max_matches
+        env = _prepare_execution_environment(repo_root, rg_exe_path)
+        result = _execute_direct_command(
+            [str(rg_exe_path)] + args,
+            repo_root, env, debug_mode, console,
+        )
+        command_display = "rg " + " ".join(args)
+        repo_result = format_tool_result(
+            result, command=command_display, is_rg=True,
+            debug_mode=debug_mode, max_matches=max_matches,
         )
     except Exception as e:
         return f"exit_code=1\nrg command failed: {str(e)}"
     # If no vault configured, return repo results directly
     if not vault_root:
+        repo_result = _annotate_file_sizes(repo_result, repo_root, output_mode)
         return repo_result
     # Run vault search and merge results
@@ -184,9 +249,12 @@ def rg(
     )
     if not vault_output:
+        repo_result = _annotate_file_sizes(repo_result, repo_root, output_mode)
         return repo_result
     # Merge results: repo section + vault section with absolute paths
+    repo_result = _annotate_file_sizes(repo_result, repo_root, output_mode)
+    vault_output = _annotate_file_sizes(vault_output, Path(vault_root), output_mode)
     return _merge_results(repo_result, vault_output, output_mode)
@@ -257,7 +325,7 @@ def _search_vault(vault_root, rg_exe_path, output_mode, debug_mode, console,
             text=True,
             encoding="utf-8",
             errors="replace",
-            timeout=30,
+            timeout=tool_settings.command_timeout_sec,
             cwd=str(vault_path),
             env=env,
         )
@@ -288,7 +356,6 @@ def _search_vault(vault_root, rg_exe_path, output_mode, debug_mode, console,
         # rg output: "relative/path:linenum:match" or "relative/path-linenum-context"
         # or "relative/path:count" (count mode).  Must contain / or . before any
         # colon to avoid matching content-only lines or binary headers.
-        _path_line_re = re.compile(r"^[^\s:|].*[/.]")
         vault_prefix = str(vault_path)
         lines = formatted.split("\n")

package/src/ui/commands.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """Command routing and help display."""
+import os
 import re
+import subprocess
 from dataclasses import dataclass
 from typing import Optional
 from llm import config
@@ -155,6 +157,9 @@ def _cron_remove(console, sub_args, cron_config, notify_scheduler):
     if not job_id:
         console.print("[red]Usage: /cron remove <id>[/red]")
         return CommandResult(status="handled")
+    if job_id == "dream":
+        console.print("[red]The 'dream' job is managed by DREAM_SETTINGS.enabled in config.yaml and cannot be removed.[/red]")
+        return CommandResult(status="handled")
     if cron_config.remove_job(job_id):
         notify_scheduler()
         console.print(f"[green]Removed cron job '{job_id}'[/green]")
@@ -171,6 +176,9 @@ def _cron_toggle(console, sub_args, cron_config, notify_scheduler, enable):
     if not job_id:
         console.print(f"[red]Usage: /cron {verb} <id>[/red]")
         return CommandResult(status="handled")
+    if not enable and job_id == "dream":
+        console.print("[red]The 'dream' job is managed by DREAM_SETTINGS.enabled in config.yaml and cannot be disabled via /cron.[/red]")
+        return CommandResult(status="handled")
     if job_id in cron_config.jobs:
         cron_config.update_job(job_id, enabled=enable)
         notify_scheduler()
@@ -381,6 +389,12 @@ def _handle_config(chat_manager, console, debug_mode_container, args, cron_sched
                 {"value": "danger", "text": "DANGER"},
             ],
         ),
+        SettingOption(
+            key="memory_enabled", text="Memory",
+            value=config.MEMORY_SETTINGS.get("enabled", True),
+            input_type="boolean",
+            on_text="ON", off_text="OFF",
+        ),
     ]
     # Build status bar settings
@@ -480,6 +494,10 @@ def _handle_config(chat_manager, console, debug_mode_container, args, cron_sched
                 console.print("[bold red on default]  Dangerous git commands are still blocked.[/bold red on default]")
                 console.print("[bold yellow on default]  Use at your own risk![/bold yellow on default]")
                 console.print()
+        elif key == "memory_enabled":
+            config.update_memory_settings({"enabled": value})
+            state = "enabled" if value else "disabled"
+            change_lines.append(f"  Memory: {state}")
         elif key == "compact_trigger_tokens":
             context_settings.compact_trigger_tokens = int(value)
             change_lines.append(f"  Compaction Threshold: {value:,} tokens")
@@ -531,6 +549,17 @@ def _handle_config(chat_manager, console, debug_mode_container, args, cron_sched
         except Exception as e:
             console.print(f"[red]Failed to save status bar settings: {e}[/red]")
+    # Persist memory setting to config
+    if "memory_enabled" in changes:
+        try:
+            cfg_data = config_manager.load(force_reload=True)
+            if "MEMORY_SETTINGS" not in cfg_data:
+                cfg_data["MEMORY_SETTINGS"] = {}
+            cfg_data["MEMORY_SETTINGS"]["enabled"] = changes["memory_enabled"]
+            config_manager.save(cfg_data)
+        except Exception as e:
+            console.print(f"[red]Failed to save memory settings: {e}[/red]")
     # Display summary
     console.print(f"[green]Settings updated:[/green]")
     for line in change_lines:
@@ -561,12 +590,13 @@ def _handle_clear(chat_manager, console, debug_mode_container, args, cron_schedu
     conv_cache_read = chat_manager.token_tracker.conv_cache_read_tokens
     conv_cache_creation = chat_manager.token_tracker.conv_cache_creation_tokens
     if conv_cache_read > 0 or conv_cache_creation > 0:
+        total_cached = conv_cache_read + conv_cache_creation
         cache_hit_pct = (
-            conv_cache_read / conv_in * 100
-        ) if conv_in > 0 else 0
+            conv_cache_read / total_cached * 100
+        ) if total_cached > 0 else 0
         console.print(f"  Cache read:   {conv_cache_read:,} tokens")
         console.print(f"  Cache write:  {conv_cache_creation:,} tokens")
-        console.print(f"  ({cache_hit_pct:.0f}% of input served from cache)")
+        console.print(f"  ({cache_hit_pct:.0f}% cache hit rate)")
     # Display cost — combined actual + estimated, with config-based fallback
     tracker_conv = chat_manager.token_tracker
@@ -1065,10 +1095,11 @@ def _handle_usage(chat_manager, console, debug_mode_container, args, cron_schedu
     # Display cache token breakdown (if any cache tokens were recorded)
     has_cache = tracker.total_cache_read_tokens > 0 or tracker.total_cache_creation_tokens > 0
     if has_cache:
+        total_cached = tracker.total_cache_read_tokens + tracker.total_cache_creation_tokens
         cache_hit_pct = (
             tracker.total_cache_read_tokens
-            / tracker.total_prompt_tokens * 100
-        ) if tracker.total_prompt_tokens > 0 else 0
+            / total_cached * 100
+        ) if total_cached > 0 else 0
         console.print()
         console.print(f"[#5F9EA0]Input Cache ({cache_hit_pct:.0f}% hit rate):[/#5F9EA0]")
         console.print(f"  Cache read:   {tracker.total_cache_read_tokens:,} tokens")
@@ -2433,6 +2464,57 @@ def _handle_cd(chat_manager, console, debug_mode_container, args, cron_scheduler
     return CommandResult(status="handled")
+def _handle_prompt(chat_manager, console, debug_mode_container, args, cron_scheduler=None):
+    """Handle /prompt command — show/swap prompt variants."""
+    from utils.settings import prompt_settings
+    from llm.prompts import _variant_available, _list_variants
+    cfg_manager = config_manager
+    if not args or args.strip() == "list":
+        variants = _list_variants()
+        current = prompt_settings.variant
+        console.print()
+        console.print(f"[bold #5F9EA0]Prompt Variants[/bold #5F9EA0]  (current: [bold]{current}[/bold])")
+        console.print()
+        for v in variants:
+            marker = "[bold green]active[/bold green]" if v == current else ""
+            console.print(f"  [bold]{v}[/bold]  {marker}")
+        console.print()
+        console.print("[dim]Switch with: [bold #5F9EA0]/prompt main[/bold #5F9EA0] or [bold #5F9EA0]/prompt micro[/bold #5F9EA0][/dim]")
+        return CommandResult(status="handled")
+    # Single arg: variant name to switch to
+    target = args.strip().lower()
+    if not _variant_available(target):
+        variants = _list_variants()
+        console.print(f"[red]Unknown variant: '{target}'[/red]")
+        console.print(f"[dim]Available: {', '.join(variants)}[/dim]")
+        return CommandResult(status="handled")
+    # Update settings
+    prompt_settings.variant = target
+    # Persist to config
+    try:
+        cfg_data = cfg_manager.load(force_reload=True)
+        if "PROMPT_SETTINGS" not in cfg_data:
+            cfg_data["PROMPT_SETTINGS"] = {}
+        cfg_data["PROMPT_SETTINGS"]["variant"] = target
+        cfg_manager.save(cfg_data)
+    except Exception as e:
+        console.print(f"[red]Failed to save variant to config: {e}[/red]")
+        console.print("[yellow]Variant applied for this session only — it will revert on restart.[/yellow]")
+    # Rebuild system prompt in-place (no restart)
+    chat_manager.update_system_prompt(variant=target)
+    console.print(f"[green]Switched to '{target}' variant[/green]")
+    console.print("[dim]System prompt rebuilt in-place.[/dim]")
+    return CommandResult(status="handled")
 def _handle_obsidian_init(console, obsidian_settings):
     """Handle /obsidian init — scaffold project folder structure in vault."""
     if not obsidian_settings.is_active():
@@ -2658,9 +2740,34 @@ _COMMAND_HANDLERS = {
     "/cd": _handle_cd,
     "/setup": _handle_setup,
     "/cron": _handle_cron,
+    "/prompt": _handle_prompt,
 }
+def _handle_shell_command(console, command):
+    """Execute a shell command prefixed with : and display output."""
+    from utils.settings import tool_settings
+    try:
+        result = subprocess.run(
+            ["/bin/sh", "-c", command], capture_output=True, text=True,
+            encoding="utf-8", errors="replace", timeout=tool_settings.command_timeout_sec,
+        )
+        output = ((result.stdout or "") + (result.stderr or "")).strip() or "(no output)"
+        lines = output.splitlines()
+        if len(lines) > 200:
+            output = "\n".join(lines[:100]) + f"\n\n... ({len(lines) - 200} lines omitted) ...\n\n" + "\n".join(lines[-100:])
+        console.print()
+        if result.returncode != 0:
+            console.print(f"[red]exit code: {result.returncode}[/red]")
+        console.print(output)
+        console.print()
+    except subprocess.TimeoutExpired:
+        console.print(f"[red]Command timed out after {tool_settings.command_timeout_sec}s[/red]")
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/red]")
+    return CommandResult(status="handled")
 def process_command(chat_manager, user_input, console, debug_mode_container, cron_scheduler=None):
     """Process command and optionally return replacement content.
@@ -2681,6 +2788,14 @@ def process_command(chat_manager, user_input, console, debug_mode_container, cro
     cmd = parts[0].lower()
     args = parts[1] if len(parts) > 1 else None
+    # Shell command prefix (:command)
+    if user_input.startswith(":"):
+        shell_cmd = user_input[1:].strip()
+        if shell_cmd:
+            result = _handle_shell_command(console, shell_cmd)
+            return (result.status, result.replacement_input)
+        return ("handled", None)
     # Look up handler in registry
     handler = _COMMAND_HANDLERS.get(cmd)
     if handler:

package/src/ui/displays.py CHANGED Viewed

@@ -70,6 +70,7 @@ def show_help_table(console):
     table.add_row("[bold #5F9EA0]/tools[/bold #5F9EA0] [list|enable|disable|enable-group|disable-group]", "Toggle tools or groups (e.g. file_ops, task_mgmt)")
     table.add_row("[bold #5F9EA0]/setup[/bold #5F9EA0]", "Re-run the first-run setup wizard")
     table.add_row("[bold #5F9EA0]/cron[/bold #5F9EA0] [list|add|remove|enable|disable|run]", "Manage scheduled cron jobs")
+    table.add_row("[bold #5F9EA0]:[/bold #5F9EA0]<command>", "Run a shell command (e.g. :git status)")
     console.print(Panel(table, title="[bold #5F9EA0]Commands[/bold #5F9EA0]", border_style="grey23", padding=(0, 2)))

package/src/ui/main.py CHANGED Viewed

@@ -507,6 +507,7 @@ def main():
                 thinking_indicator.start()
                 INPUT_BLOCKED['blocked'] = True
                 try:
+                    console.print("─" * console.width, style="rgb(30,30,30)")
                     console.print()  # Extra newline after user input to separate from LLM response
                     # Add user message
                     if TOOLS_ENABLED:

package/src/utils/settings.py CHANGED Viewed

@@ -28,10 +28,14 @@ def left_align_headings(text: str) -> str:
 @dataclass
 class ServerSettings:
     """Local llama-server configuration."""
-    ngl_layers: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ngl_layers", 30))
+    ngl_layers: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ngl_layers", 99))
     ctx_size: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ctx_size", 8192))
     n_predict: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("n_predict", 8192))
     rope_scale: float = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("rope_scale", 1.0))
+    threads: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("threads", 4))
+    batch_size: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("batch_size", 2048))
+    ubatch_size: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ubatch_size", 512))
+    flash_attn: bool = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("flash_attn", True))
     health_check_timeout_sec: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("health_check_timeout_sec", 120))
     health_check_interval_sec: float = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("health_check_interval_sec", 1.0))
@@ -104,6 +108,18 @@ class ContextSettings:
             self.hard_limit_tokens = int(self.max_context_window * 0.9)
+@dataclass
+class PromptSettings:
+    """Prompt variant selection."""
+    variant: str = field(default_factory=lambda: _CONFIG.get("PROMPT_SETTINGS", {}).get("variant", "micro"))
+@dataclass
+class DreamSettings:
+    """Dream memory consolidation settings."""
+    enabled: bool = field(default_factory=lambda: _CONFIG.get("DREAM_SETTINGS", {}).get("enabled", True))
 @dataclass
 class ObsidianSettings:
     """Obsidian vault integration settings.
@@ -165,8 +181,9 @@ tool_settings = ToolSettings()
 file_settings = FileSettings()
 context_settings = ContextSettings()
 sub_agent_settings = SubAgentSettings()
+dream_settings = DreamSettings()
 obsidian_settings = ObsidianSettings()
+prompt_settings = PromptSettings()
 # Tool execution constants
 MAX_TOOL_CALLS = tool_settings.max_tool_calls
 MAX_COMMAND_OUTPUT_LINES = tool_settings.max_command_output_lines