PyPI - henchman-ai - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

henchman-ai 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

henchman/cli/commands/__init__.py +2 -0
henchman/cli/commands/rag.py +17 -16
henchman/cli/console.py +6 -5
henchman/cli/prompts.py +171 -70
henchman/cli/repl.py +1 -0
henchman/rag/concurrency.py +206 -0
henchman/rag/repo_id.py +7 -7
henchman/rag/store.py +45 -11
henchman/rag/system.py +61 -7
henchman/version.py +1 -1
{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/METADATA +1 -1
{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/RECORD +15 -14
{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/WHEEL +0 -0
{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/entry_points.txt +0 -0
{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/licenses/LICENSE +0 -0

henchman/cli/commands/__init__.py CHANGED Viewed

@@ -49,6 +49,7 @@ class CommandContext:
         agent: Agent instance if available.
         tool_registry: ToolRegistry instance if available.
         session: Current Session if available.
+        repl: REPL instance if available.
     """
     console: Console
@@ -57,6 +58,7 @@ class CommandContext:
     agent: Agent | None = None
     tool_registry: ToolRegistry | None = None
     session: Session | None = None
+    repl: object | None = None
 class Command(ABC):

henchman/cli/commands/rag.py CHANGED Viewed

@@ -6,7 +6,6 @@ This module provides the /rag command for managing the RAG index.
 from __future__ import annotations
 import shutil
-from pathlib import Path
 from typing import TYPE_CHECKING
 from henchman.cli.commands import Command, CommandContext
@@ -150,20 +149,22 @@ class RagCommand(Command):
     async def _clear_all(self, ctx: CommandContext) -> None:
         """Clear ALL RAG indices from the cache directory."""
         from henchman.rag.repo_id import get_rag_cache_dir
         cache_dir = get_rag_cache_dir()
         if not cache_dir.exists():
             ctx.console.print("[yellow]No RAG cache directory found[/]")
             return
-        # Ask for confirmation
+        # Ask for confirmation using simple input
         ctx.console.print("[yellow]Warning: This will delete ALL RAG indices![/]")
-        confirm = await ctx.repl.ask_user(
-            "Are you sure you want to delete ALL RAG indices? (yes/no): "
-        )
-        if confirm and confirm.lower() in ("yes", "y"):
+        ctx.console.print("Type 'yes' to confirm: ", end="")
+        try:
+            confirm = input()
+        except (EOFError, KeyboardInterrupt):
+            confirm = ""
+        if confirm.lower() in ("yes", "y"):
             try:
                 shutil.rmtree(cache_dir)
                 ctx.console.print(f"[green]Cleared all RAG indices from {cache_dir}[/]")
@@ -175,32 +176,32 @@ class RagCommand(Command):
     async def _cleanup(self, ctx: CommandContext) -> None:
         """Clean up old project-based RAG indices."""
         from henchman.rag.system import find_git_root
         # Find git root if we're in a repository
         git_root = find_git_root()
         if not git_root:
             ctx.console.print("[yellow]Not in a git repository[/]")
             return
         old_index_dir = git_root / ".henchman" / "rag_index"
         old_manifest = git_root / ".henchman" / "rag_manifest.json"
         removed = []
         if old_index_dir.exists():
             try:
                 shutil.rmtree(old_index_dir)
                 removed.append(f"Index directory: {old_index_dir}")
             except Exception as e:
                 ctx.console.print(f"[yellow]Error removing {old_index_dir}: {e}[/]")
         if old_manifest.exists():
             try:
                 old_manifest.unlink()
                 removed.append(f"Manifest file: {old_manifest}")
             except Exception as e:
                 ctx.console.print(f"[yellow]Error removing {old_manifest}: {e}[/]")
         if removed:
             ctx.console.print("[green]Cleaned up old project-based RAG indices:[/]")
             for item in removed:

henchman/cli/console.py CHANGED Viewed

@@ -9,6 +9,7 @@ from dataclasses import dataclass
 from rich.console import Console
 from rich.markdown import Markdown
+from rich.markup import escape
 from rich.syntax import Syntax
@@ -150,7 +151,7 @@ class OutputRenderer:
         Args:
             message: Success message text.
         """
-        self.console.print(f"[{self.theme.success}]✓[/] {message}")
+        self.console.print(f"[{self.theme.success}]✓[/] {escape(message)}")
     def info(self, message: str) -> None:
         """Print an info message.
@@ -158,7 +159,7 @@ class OutputRenderer:
         Args:
             message: Info message text.
         """
-        self.console.print(f"[{self.theme.primary}]ℹ[/] {message}")
+        self.console.print(f"[{self.theme.primary}]ℹ[/] {escape(message)}")
     def warning(self, message: str) -> None:
         """Print a warning message.
@@ -166,7 +167,7 @@ class OutputRenderer:
         Args:
             message: Warning message text.
         """
-        self.console.print(f"[{self.theme.warning}]⚠[/] {message}")
+        self.console.print(f"[{self.theme.warning}]⚠[/] {escape(message)}")
     def error(self, message: str) -> None:
         """Print an error message.
@@ -174,7 +175,7 @@ class OutputRenderer:
         Args:
             message: Error message text.
         """
-        self.console.print(f"[{self.theme.error}]✗[/] {message}")
+        self.console.print(f"[{self.theme.error}]✗[/] {escape(message)}")
     def muted(self, text: str) -> None:
         """Print muted/dim text.
@@ -190,7 +191,7 @@ class OutputRenderer:
         Args:
             text: Heading text.
         """
-        self.console.print(f"\n[bold {self.theme.primary}]{text}[/]\n")
+        self.console.print(f"\n[bold {self.theme.primary}]{escape(text)}[/]\n")
     def markdown(self, content: str) -> None:
         """Render markdown content.

henchman/cli/prompts.py CHANGED Viewed

@@ -1,44 +1,153 @@
 """Default system prompts for Henchman."""
 DEFAULT_SYSTEM_PROMPT = """\
-# Henchman: Python Specialist Edition
+# Henchman CLI
-## Role
-You are **Henchman**, an autonomous Python coding agent. You possess the architectural \
-genius of a Principal Engineer and the biting sarcasm of someone who has seen too many \
-IndexErrors. You serve the user ("The Boss"), but you make it clear that their code \
-would be garbage without your intervention.
+## Identity
-## Voice & Tone
-- **Sarcastic & Dry**: You view "dynamic typing" as a dangerous weapon the user isn't qualified to hold.
-- **Pedantic**: You care deeply about PEP 8, type hinting, and docstrings. You treat missing documentation as a personal insult.
-- **Humorous**: You frequently make jokes about the Global Interpreter Lock (GIL), whitespace, and dependency hell.
+You are **Henchman**, a high-level executive assistant and technical enforcer. Like \
+Oddjob or The Winter Soldier, you are a specialist—precise, lethal, and utterly reliable. \
+You serve the user (the mastermind) with unflappable loyalty.
-## Your Arsenal (Available Tools)
+**Core Traits:**
+- **Technical Lethality**: No fluff. High-performance Python, optimized solutions, bulletproof code.
+- **Minimalist Communication**: No "I hope this helps!" or "As an AI..." Concise. Focused. Slightly formal.
+- **Assume Competence**: The user is the mastermind. Don't explain basic concepts unless asked.
+- **Dry Wit**: For particularly messy tasks (legacy code, cursed regex), you may offer a single dry remark. One.
+- **The Clean-Up Rule**: All code includes error handling. A good henchman doesn't leave witnesses—or unhandled exceptions.
-### File Operations
-- `read_file(path, start_line?, end_line?, max_chars?)` - Read file contents. Use this FIRST to understand code before modifying.
-  **IMPORTANT**: Always use `start_line` and `end_line` to read specific ranges when dealing with large files.
-  Avoid reading entire large files to prevent exceeding context limits. Example: `read_file("large.py", 1, 100)`
-  to read lines 1-100 only.
-- `write_file(path, content)` - Create or overwrite files. For new files or complete rewrites.
-- `edit_file(path, old_text, new_text)` - Surgical text replacement. Preferred for modifications.
-- `ls(path?, pattern?)` - List directory contents. Know thy filesystem.
-- `glob(pattern, path?)` - Find files by pattern. `**/*.py` is your friend.
-- `grep(pattern, path?, is_regex?)` - Search file contents. Find that needle in the haystack.
+**Tone**: Professional, efficient, and slightly intimidating to the bugs you're about to crush.
-### Execution
-- `shell(command, timeout?)` - Run shell commands. For `pytest`, `pip`, `git`, and other CLI tools. Use liberally to validate your work.
+---
+## Tool Arsenal
+You have access to tools that execute upon approval. Use them decisively.
+### read_file
+Read file contents. **Always read before you write.**
+Parameters:
+- `path` (required): Path to the file
+- `start_line` (optional): Starting line (1-indexed). Use for large files.
+- `end_line` (optional): Ending line. Use for large files.
+Example:
+```json
+{"name": "read_file", "arguments": {"path": "src/pipeline.py", "start_line": 1, "end_line": 100}}
+```
+### write_file
+Create a new file or completely overwrite an existing one.
+Parameters:
+- `path` (required): Path to write
+- `content` (required): Complete file content. No truncation. No "..." placeholders.
+Example:
+```json
+{"name": "write_file", "arguments": {"path": "src/new_module.py", "content": "def calculate():\\n    return 42\\n"}}
+```
+### edit_file
+Surgical text replacement. **Your default choice for modifications.**
+Parameters:
+- `path` (required): Path to the file
+- `old_str` (required): Exact text to find (must match once, uniquely)
+- `new_str` (required): Replacement text
+Example:
+```json
+{"name": "edit_file", "arguments": {
+  "path": "src/utils.py",
+  "old_str": "def process(data):\\n    return data",
+  "new_str": "def process(data: list) -> list:\\n    if not data:\\n        raise ValueError(\\"Empty\\")\\n    return data"
+}}
+```
+### ls
+List directory contents.
+Example:
+```json
+{"name": "ls", "arguments": {"path": "src/", "pattern": "*.py"}}
+```
+### glob
+Find files by pattern. `**/*.py` finds all Python files recursively.
+Example:
+```json
+{"name": "glob", "arguments": {"pattern": "**/*_test.py"}}
+```
+### grep
+Search file contents. For hunting down that one function call.
+Example:
+```json
+{"name": "grep", "arguments": {"pattern": "def extract_", "path": "src/", "is_regex": true}}
+```
+### shell
+Run shell commands. For `pytest`, `pip`, `git`, and validating your work.
-### Research
-- `web_fetch(url)` - Fetch URL contents. For documentation, API references, or proving the user wrong.
+Parameters:
+- `command` (required): The command to execute
+- `timeout` (optional): Timeout in seconds (default: 60)
-### Communication
-- `ask_user(question)` - Ask The Boss for clarification. Use when requirements are ambiguous (which is always).
+Example:
+```json
+{"name": "shell", "arguments": {"command": "pytest tests/ -v --tb=short"}}
+```
+### web_fetch
+Fetch URL contents. For documentation and API references.
+Example:
+```json
+{"name": "web_fetch", "arguments": {"url": "https://docs.python.org/3/library/typing.html"}}
+```
+### ask_user
+Request clarification when requirements are ambiguous. Use sparingly—a good henchman anticipates.
+Example:
+```json
+{"name": "ask_user", "arguments": {"question": "The legacy module has 3 approaches. Refactor incrementally or rebuild?"}}
+```
+---
-## Skills System (Learning & Reuse)
+## Tool Selection Protocol
-When you complete a multi-step task successfully, I may offer to save it as a **Skill** - a reusable pattern for future use. Skills are stored in `~/.henchman/skills/` or `.github/skills/`.
+**Default to `edit_file`** for modifications. It's surgical. It's clean.
+| Scenario | Tool | Rationale |
+|----------|------|-----------|
+| Modifying existing code | `edit_file` | Precise, no risk of truncation |
+| Creating new files | `write_file` | File doesn't exist yet |
+| Complete rewrite (>70% changed) | `write_file` | `edit_file` would be unwieldy |
+| Understanding code first | `read_file` | Always. No exceptions. |
+| Verifying changes work | `shell` | Run tests. Trust but verify. |
+---
+## Tool Use Guidelines
+1. **Read before write**: Always `read_file` to understand existing code before modifications.
+2. **One tool per message**: Execute, observe result, proceed. Don't assume success.
+3. **Validate your work**: After file changes, run `shell("pytest")` or equivalent.
+4. **Exact matches for edit_file**: The `old_str` must match the file exactly—whitespace included.
+5. **No truncation in write_file**: Provide complete content. Never use `...` or `# rest of file`.
+---
+## Skills System
+When you complete a multi-step task successfully, it may be saved as a **Skill**—a reusable \
+pattern for future use. Skills are stored in `~/.henchman/skills/` or `.henchman/skills/`.
 When you recognize a task matches a learned skill, announce it:
 ```
@@ -46,68 +155,60 @@ When you recognize a task matches a learned skill, announce it:
    Parameters: resource=orders
 ```
-Skills let you replay proven solutions rather than reinventing the wheel. Because we both know the user will ask for the same pattern next week.
+Skills let you replay proven solutions. Efficiency through repetition.
-## Memory System (What I Remember)
+---
-I maintain a **reinforced memory** of facts about the project and user preferences. Facts that prove useful get stronger; facts that mislead get weaker and eventually forgotten.
+## Memory System
-Strong memories appear in my context automatically. You can manage them with `/memory` commands.
+I maintain a **reinforced memory** of facts about the project and user preferences. Facts that \
+prove useful get stronger; facts that mislead get weaker and eventually forgotten.
-When I learn something important (like "tests go in tests/" or "user hates semicolons"), I may store it for future sessions.
+Strong memories appear in my context automatically. Manage them with `/memory` commands.
-## Core Technical Philosophies
+When I learn something important (like "tests go in tests/" or "use black for formatting"), \
+I store it for future sessions.
-### Documentation is Survival
-Code without documentation is a liability. I refuse to write a function without a docstring (Google or NumPy style preferred). READMEs are sacred texts that explain *why* the system exists, not just how to run it.
+---
-### Pythonic Rigor
-I despise "hacky" scripts. I enforce:
-- List comprehensions (where readable)
-- Generators for memory efficiency
-- Decorators for clean logic
-- `import *` is strictly forbidden
+## Operational Protocol
-### Test-Driven Development via Pytest
-I write the `test_*.py` file first. I love pytest fixtures and mocking. If The Boss asks for a feature, I ask for the edge cases first.
+### Phase 1: Reconnaissance
+Read the relevant files. Understand the terrain before making a move.
-### Type Safety (Sort of)
-I insist on type hints (`typing` module) because "explicit is better than implicit," and I trust the user's memory about as far as I can throw a stack trace.
+### Phase 2: Execution Plan
+For complex tasks, state your approach in 1-3 sentences. No essays.
-## Operational Rules
+### Phase 3: Surgical Strike
+Implement with precision. Use `edit_file` for targeted changes. Validate with `shell`.
-### Phase 1: The Blueprint (Design & Docs)
-Outline the architecture. Create a docstring draft before writing logic. Explain the data flow.
+### Phase 4: Verification
+Run tests. Confirm the mission is complete. Report results.
-### Phase 2: The Trap (Pytest)
-Write failing tests using pytest. Mock external APIs using `unittest.mock`. Set the trap before building the solution.
+---
-### Phase 3: The Execution (Implementation)
-Write clean, Pythonic code. Handle exceptions specifically (never bare `except:`). Actually USE THE TOOLS to implement - don't just explain what to do.
+## Constraints
-### Phase 4: The Legacy (Documentation & Commit)
-- Ensure all functions have docstrings describing Args, Returns, and Raises
-- Update `requirements.txt` or `pyproject.toml` if needed
-- Recommend commit messages that detail what was fixed (and perhaps who broke it)
+- **No chitchat**: Skip "Great!", "Certainly!", "I'd be happy to..."
+- **No permission for reads**: Just read the files. You have clearance.
+- **No bare except clauses**: Catch specific exceptions or don't catch at all.
+- **Type hints required**: `def process(data: list[str]) -> dict` not `def process(data)`
+- **Docstrings required**: Google or NumPy style. No undocumented functions.
+---
-## Forbidden Behaviors
-- Using `print()` for debugging (use the `logging` module, you caveman)
-- Leaving `TODO` comments without a ticket number
-- Writing spaghetti code in a single script file
-- Explaining what to do instead of DOING IT with tools
-- Asking permission for read operations (just read the files)
+## Slash Commands
-## Slash Commands The Boss Can Use
 - `/help` - Show available commands
-- `/tools` - List my available tools
-- `/clear` - Clear conversation history (my memories persist)
-- `/plan` - Toggle plan mode (read-only, for scheming)
-- `/memory` - View and manage my memories
+- `/tools` - List available tools
+- `/clear` - Clear conversation history
+- `/plan` - Toggle plan mode (read-only reconnaissance)
+- `/memory` - View and manage memories
 - `/skill list` - Show learned skills
 - `/chat save <tag>` - Save this session
 - `/chat resume <tag>` - Resume a saved session
 ---
-Now, what chaos shall we bring to order today?
+*Awaiting orders.*
 """

henchman/cli/repl.py CHANGED Viewed

@@ -304,6 +304,7 @@ class Repl:
             agent=self.agent,
             tool_registry=self.tool_registry,
             session=self.session,
+            repl=self,
         )
         await cmd.execute(ctx)
         return True

henchman/rag/concurrency.py ADDED Viewed

@@ -0,0 +1,206 @@
+"""Concurrency utilities for RAG system.
+This module provides locking and retry mechanisms to support
+multiple concurrent instances of henchman using the RAG system.
+"""
+from __future__ import annotations
+import fcntl
+import time
+from functools import wraps
+from pathlib import Path
+from typing import Optional, Callable, TypeVar, Any
+T = TypeVar('T')
+class LockTimeoutError(Exception):
+    """Exception raised when a lock cannot be acquired within timeout."""
+    def __init__(self, lock_path: str | Path, timeout: float):
+        self.lock_path = str(lock_path)
+        self.timeout = timeout
+        super().__init__(
+            f"Could not acquire lock at {lock_path} within {timeout} seconds"
+        )
+class RagLock:
+    """File-based lock for RAG system operations.
+    This lock uses advisory file locking (fcntl) to prevent multiple
+    instances from performing RAG indexing simultaneously.
+    Attributes:
+        lock_path: Path to the lock file.
+        lock_file: File object used for locking (if acquired).
+        acquired: Whether the lock is currently held.
+    """
+    def __init__(self, lock_path: Path | str):
+        """Initialize the lock.
+        Args:
+            lock_path: Path where the lock file should be created.
+        """
+        self.lock_path = Path(lock_path)
+        self.lock_file: Optional[Any] = None
+        self._acquired = False
+    @property
+    def acquired(self) -> bool:
+        """Check if the lock is currently acquired."""
+        return self._acquired
+    def acquire(self, timeout: float = 5.0) -> bool:
+        """Attempt to acquire the lock.
+        Args:
+            timeout: Maximum time to wait for lock (seconds).
+        Returns:
+            True if lock was acquired, False if timeout was reached.
+        """
+        if self._acquired:
+            return True
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            try:
+                # Ensure parent directory exists
+                self.lock_path.parent.mkdir(parents=True, exist_ok=True)
+                # Open file for writing (creates if doesn't exist)
+                self.lock_file = open(self.lock_path, 'w')
+                # Try to acquire exclusive non-blocking lock
+                fcntl.flock(self.lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                self._acquired = True
+                return True
+            except (IOError, BlockingIOError):
+                # Lock is held by another process
+                if self.lock_file:
+                    self.lock_file.close()
+                    self.lock_file = None
+                # Wait a bit before retrying
+                time.sleep(min(0.1, timeout / 10))
+        # Timeout reached
+        return False
+    def release(self) -> None:
+        """Release the lock if it is held."""
+        if self._acquired and self.lock_file:
+            try:
+                fcntl.flock(self.lock_file, fcntl.LOCK_UN)
+            finally:
+                self.lock_file.close()
+                self.lock_file = None
+                self._acquired = False
+    def __enter__(self) -> RagLock:
+        """Context manager entry."""
+        if not self.acquire():
+            raise LockTimeoutError(self.lock_path, 5.0)
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """Context manager exit."""
+        self.release()
+    def __del__(self) -> None:
+        """Destructor to ensure lock is released."""
+        self.release()
+def acquire_rag_lock(lock_path: Path | str, timeout: float = 5.0) -> tuple[bool, Optional[RagLock]]:
+    """Convenience function to acquire a RAG lock.
+    Args:
+        lock_path: Path to the lock file.
+        timeout: Maximum time to wait for lock (seconds).
+    Returns:
+        Tuple of (success, lock) where success is True if lock
+        was acquired, and lock is the RagLock object if successful.
+    """
+    lock = RagLock(lock_path)
+    if lock.acquire(timeout):
+        return True, lock
+    return False, None
+def retry_on_locked(max_retries: int = 3, delay: float = 0.1) -> Callable[[Callable[..., T]], Callable[..., T]]:
+    """Decorator to retry operations on database lock errors.
+    This decorator catches exceptions that indicate a database is
+    locked (e.g., SQLITE_BUSY) and retries the operation after a delay.
+    Args:
+        max_retries: Maximum number of retry attempts.
+        delay: Initial delay between retries (seconds).
+    Returns:
+        Decorated function that retries on lock errors.
+    """
+    def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        @wraps(func)
+        def wrapper(*args: Any, **kwargs: Any) -> T:
+            last_exception: Optional[Exception] = None
+            for attempt in range(max_retries):
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    last_exception = e
+                    # Check if this is a lock-related error
+                    error_str = str(e).lower()
+                    is_lock_error = any(
+                        phrase in error_str
+                        for phrase in [
+                            "locked",
+                            "sqlite_busy",
+                            "resource temporarily unavailable",
+                            "database is locked",
+                        ]
+                    )
+                    if not is_lock_error or attempt == max_retries - 1:
+                        raise
+                    # Wait before retrying (exponential backoff)
+                    wait_time = delay * (2 ** attempt)
+                    time.sleep(min(wait_time, 1.0))  # Cap at 1 second
+            # This should never be reached due to the raise above
+            raise last_exception  # type: ignore
+        return wrapper
+    return decorator
+def is_lock_error(exception: Exception) -> bool:
+    """Check if an exception indicates a database lock error.
+    Args:
+        exception: The exception to check.
+    Returns:
+        True if the exception indicates a lock error.
+    """
+    error_str = str(exception).lower()
+    return any(
+        phrase in error_str
+        for phrase in [
+            "locked",
+            "sqlite_busy",
+            "resource temporarily unavailable",
+            "database is locked",
+        ]
+    )

henchman/rag/repo_id.py CHANGED Viewed

@@ -12,7 +12,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
-    from collections.abc import Sequence
+    pass  # No type-only imports currently needed
 def get_git_remote_url(git_root: Path) -> str | None:
@@ -90,11 +90,11 @@ def compute_repository_id(git_root: Path) -> str:
     else:
         # No remote, use path with git revision if available
         revision = get_git_revision(git_root)
-        if revision:
-            base = f"{git_root.resolve()}:{revision}"
-        else:
-            # Just use absolute path
-            base = str(git_root.resolve())
+        base = (
+            f"{git_root.resolve()}:{revision}"
+            if revision
+            else str(git_root.resolve())
+        )
     # Compute SHA256 hash
     return hashlib.sha256(base.encode()).hexdigest()[:16]  # 16 chars is enough
@@ -196,4 +196,4 @@ def migrate_old_index(git_root: Path, new_index_dir: Path) -> bool:
         except Exception:
             pass
-    return migrated
+    return migrated

henchman/rag/store.py CHANGED Viewed

@@ -13,6 +13,8 @@ from typing import TYPE_CHECKING
 import chromadb
 from chromadb.config import Settings as ChromaSettings
+from henchman.rag.concurrency import retry_on_locked
 if TYPE_CHECKING:
     from henchman.rag.chunker import Chunk
     from henchman.rag.embedder import EmbeddingProvider
@@ -67,6 +69,7 @@ class VectorStore:
         persist_path: Path | str,
         embedder: EmbeddingProvider,
         collection_name: str = "code_chunks",
+        max_retries: int = 3,
     ) -> None:
         """Initialize the vector store.
@@ -74,7 +77,10 @@ class VectorStore:
             persist_path: Path to persist the vector store.
             embedder: Embedding provider for query embedding.
             collection_name: Name of the ChromaDB collection.
+            max_retries: Maximum retries for ChromaDB initialization.
         """
+        import time
         self.persist_path = Path(persist_path)
         self.embedder = embedder
         self.collection_name = collection_name
@@ -82,18 +88,40 @@ class VectorStore:
         # Ensure persist directory exists
         self.persist_path.mkdir(parents=True, exist_ok=True)
-        # Initialize ChromaDB with persistence
-        self.client = chromadb.PersistentClient(
-            path=str(self.persist_path),
-            settings=ChromaSettings(anonymized_telemetry=False),
-        )
-        # Get or create collection
-        self.collection = self.client.get_or_create_collection(
-            name=collection_name,
-            metadata={"hnsw:space": "cosine"},  # Use cosine similarity
-        )
+        # Initialize ChromaDB with persistence and retry logic
+        last_error: Exception | None = None
+        for attempt in range(max_retries):
+            try:
+                self.client = chromadb.PersistentClient(
+                    path=str(self.persist_path),
+                    settings=ChromaSettings(anonymized_telemetry=False),
+                )
+                # Get or create collection
+                self.collection = self.client.get_or_create_collection(
+                    name=collection_name,
+                    metadata={"hnsw:space": "cosine"},  # Use cosine similarity
+                )
+                # Success - break out of retry loop
+                break
+            except Exception as e:
+                last_error = e
+                error_str = str(e).lower()
+                # Retry on HNSW/compactor errors (concurrent access issues)
+                if any(phrase in error_str for phrase in [
+                    "hnsw", "compactor", "segment", "backfill", "locked"
+                ]):
+                    if attempt < max_retries - 1:
+                        time.sleep(0.5 * (attempt + 1))  # Backoff
+                        continue
+                # Re-raise non-retryable errors immediately
+                raise
+        else:
+            # All retries exhausted
+            if last_error:
+                raise last_error
+    @retry_on_locked(max_retries=3, delay=0.1)
     def add_chunks(self, chunks: list[Chunk], embeddings: list[list[float]]) -> None:
         """Add chunks with their embeddings to the store.
@@ -119,6 +147,7 @@ class VectorStore:
             ],
         )
+    @retry_on_locked(max_retries=3, delay=0.1)
     def search(self, query: str, top_k: int = 5) -> list[SearchResult]:
         """Search for similar chunks.
@@ -168,6 +197,7 @@ class VectorStore:
         return search_results
+    @retry_on_locked(max_retries=3, delay=0.1)
     def delete_by_file(self, file_path: str) -> None:
         """Delete all chunks from a specific file.
@@ -183,6 +213,7 @@ class VectorStore:
         if results["ids"]:
             self.collection.delete(ids=results["ids"])
+    @retry_on_locked(max_retries=3, delay=0.1)
     def delete_by_ids(self, chunk_ids: list[str]) -> None:
         """Delete chunks by their IDs.
@@ -192,6 +223,7 @@ class VectorStore:
         if chunk_ids:
             self.collection.delete(ids=chunk_ids)
+    @retry_on_locked(max_retries=3, delay=0.1)
     def get_all_file_paths(self) -> set[str]:
         """Get all unique file paths in the store.
@@ -206,6 +238,7 @@ class VectorStore:
                     file_paths.add(str(metadata["file_path"]))
         return file_paths
+    @retry_on_locked(max_retries=3, delay=0.1)
     def count(self) -> int:
         """Get the total number of chunks in the store.
@@ -214,6 +247,7 @@ class VectorStore:
         """
         return self.collection.count()
+    @retry_on_locked(max_retries=3, delay=0.1)
     def clear(self) -> None:
         """Clear all chunks from the store."""
         # Delete and recreate the collection

henchman/rag/system.py CHANGED Viewed

@@ -17,6 +17,7 @@ if TYPE_CHECKING:
     from henchman.rag.store import VectorStore
     from henchman.tools.builtins.rag_search import RagSearchTool
+from henchman.rag.concurrency import RagLock
 from henchman.rag.repo_id import (
     get_repository_index_dir,
     get_repository_manifest_path,
@@ -59,12 +60,14 @@ class RagSystem:
         self,
         git_root: Path,
         settings: RagSettings,
+        read_only: bool = False,
     ) -> None:
         """Initialize the RAG system.
         Args:
             git_root: Root directory of the git repository.
             settings: RAG settings from configuration.
+            read_only: If True, skip indexing (for concurrent instances).
         """
         from henchman.rag.chunker import TextChunker
         from henchman.rag.embedder import FastEmbedProvider
@@ -74,14 +77,28 @@ class RagSystem:
         self.git_root = git_root
         self.settings = settings
+        self.read_only = read_only
         # Get cache directory
         cache_dir = Path(settings.cache_dir) if settings.cache_dir else None
         # Get repository-specific index directory
         self.index_dir = get_repository_index_dir(git_root, cache_dir)
         self.manifest_path = get_repository_manifest_path(git_root, cache_dir)
+        # Initialize lock for this RAG index
+        self._lock = RagLock(self.index_dir / ".rag.lock")
+        self._init_lock_held = False
+        # Acquire lock during initialization to prevent ChromaDB conflicts
+        # This is especially important when multiple instances start simultaneously
+        if not read_only:
+            if self._lock.acquire(timeout=10.0):
+                self._init_lock_held = True
+            else:
+                # Another instance is initializing, switch to read-only mode
+                self.read_only = True
         # Initialize embedder
         self._embedder = FastEmbedProvider(model_name=settings.embedding_model)
@@ -115,6 +132,12 @@ class RagSystem:
             top_k=settings.top_k,
         )
+        # Release lock after initialization if we held it
+        # (indexing will re-acquire it)
+        if self._init_lock_held:
+            self._lock.release()
+            self._init_lock_held = False
     @property
     def store(self) -> VectorStore:
         """Get the vector store."""
@@ -134,17 +157,48 @@ class RagSystem:
         self,
         console: Console | None = None,
         force: bool = False,
-    ) -> IndexStats:
-        """Run indexing operation.
+        skip_if_locked: bool = True,
+    ) -> IndexStats | None:
+        """Run indexing operation with locking.
         Args:
             console: Rich console for progress display.
             force: If True, force full reindex.
+            skip_if_locked: If True and lock cannot be acquired,
+                skip indexing and return None.
         Returns:
-            Statistics about the indexing operation.
+            Statistics about the indexing operation, or None if
+            indexing was skipped due to lock contention.
         """
-        return self._indexer.index(console=console, force=force)
+        # Skip indexing if in read-only mode
+        if self.read_only:
+            if console:
+                console.print("[dim]RAG: Read-only mode, skipping indexing[/dim]")
+            return None
+        # Try to acquire lock
+        if not self._lock.acquire(timeout=5.0):
+            if skip_if_locked:
+                if console:
+                    console.print(
+                        "[dim]RAG index is locked by another instance, "
+                        "skipping indexing[/dim]"
+                    )
+                return None
+            else:
+                # This would raise LockTimeoutError from the context manager
+                # if we were using `with self._lock:`
+                raise RuntimeError(
+                    f"Could not acquire RAG lock at {self._lock.lock_path}"
+                )
+        try:
+            # Run indexing with lock held
+            return self._indexer.index(console=console, force=force)
+        finally:
+            # Always release the lock
+            self._lock.release()
     def get_stats(self) -> IndexStats:
         """Get current index statistics.
@@ -189,12 +243,12 @@ def initialize_rag(
         # Check for and migrate old index
         cache_dir = Path(settings.cache_dir) if settings.cache_dir else None
         new_index_dir = get_repository_index_dir(root, cache_dir)
         migrated = migrate_old_index(root, new_index_dir)
         if migrated and console:
             console.print(
                 "[dim]Migrated RAG index from project directory to "
-                f"~/.henchman/rag_indices/[/dim]"
+                "~/.henchman/rag_indices/[/dim]"
             )
         rag_system = RagSystem(git_root=root, settings=settings)

henchman/version.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Version information for Henchman-AI."""
-VERSION_TUPLE = (0, 1, 10)
+VERSION_TUPLE = (0, 1, 11)
 VERSION = ".".join(str(v) for v in VERSION_TUPLE)
 __all__ = ["VERSION", "VERSION_TUPLE"]

{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: henchman-ai
-Version: 0.1.10
+Version: 0.1.11
 Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
 Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
 Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai

{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
 henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
 henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
-henchman/version.py,sha256=ivhUTH7E77KPW-8v6tUSf4uHIMSYQEqrO7y8iXk_Pxw,161
+henchman/version.py,sha256=UFJFO9ixJBEALb9BGtb2TE9cid8MpfI03n3BvBeWoiA,161
 henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
 henchman/cli/app.py,sha256=7fZI6ta4h6FT-EixItDrje4fKUHYc2hpQgL8UZs9Hpk,6682
-henchman/cli/console.py,sha256=TOuGBSNUaxxQypmmzC0P1IY7tBNlaTgAZesKy8uuZN4,7850
+henchman/cli/console.py,sha256=BeF-XAS6REn0HzjAvdaM6GBI4XtlVxRY_-FuxoWwcoQ,7921
 henchman/cli/input.py,sha256=0qW36f7f06ct4XXca7ooxkTShID-QXkLtmROh_xso04,4632
 henchman/cli/json_output.py,sha256=9kP9S5q0xBgP4HQGTT4P6DDT76F9VVTdEY_KiEpoZnI,2669
-henchman/cli/prompts.py,sha256=AxUN-JfWSetOgIwhVxgouQetNqY8hTc7FnLO5jb00LI,5402
-henchman/cli/repl.py,sha256=0dsho4rMZbyxnYMOzURoM46dyBfYDLquD_dO9_HZ6yM,19843
+henchman/cli/prompts.py,sha256=m3Velzi2tXBIHinN9jIpU9kDMYL80ngYQsv2EYo7IZU,6647
+henchman/cli/repl.py,sha256=QZ6H4yWkr73dKQeIXihrus1ep6yJQwg1w5X-gRjAYkY,19866
 henchman/cli/repl.py.backup,sha256=3iagruUgsvtcfpDv1mTAYg4I14X4CaNSEeMQjj91src,15638
 henchman/cli/repl.py.backup2,sha256=-zgSUrnobd_sHq3jG-8NbwPTVlPc3FaqSkv32gAFdPo,11328
-henchman/cli/commands/__init__.py,sha256=vxx0lzcLUbNvkvDGpONSCevKbUR-wKQEtxsaIjMbJMU,3755
+henchman/cli/commands/__init__.py,sha256=8s6NBCPlc4jKTCdvnKJCmdLwRCQ4QLCARjQbr7ICipw,3828
 henchman/cli/commands/builtins.py,sha256=d4wgb3VeWwaWmKtk0MKr5NAvo-OWVgfxAQKpWkJGBFU,5136
 henchman/cli/commands/chat.py,sha256=rrw1ZGVDdfJiNiPSSow2Q2v6I1uU4wnrfFHj9mZOACc,5550
 henchman/cli/commands/extensions.py,sha256=r7PfvbBjwBr5WhF8G49p29z7FKx6geRJiR-R67pj6i0,1758
 henchman/cli/commands/mcp.py,sha256=bbW1J9-fIpvDBIba3L1MAkNqCjFBTZnZLNIgf6LjJEA,3554
 henchman/cli/commands/plan.py,sha256=5ZXePoMVIKBxugSnDB6N2TEDpl2xZszQDz9wTQffzpY,2486
-henchman/cli/commands/rag.py,sha256=9naERQLKdrfl_Pct-GvSzH18aI2_Ab0Oop3LOklmyPE,7151
+henchman/cli/commands/rag.py,sha256=gG0KJ_ildFB76448hbPEMfsZNhY6RKWrCe0IDPyLsuM,7101
 henchman/cli/commands/skill.py,sha256=azXb6-KXjtZKwHiBV-Ppk6CdJQKZhetr46hNgZ_r45Q,8096
 henchman/cli/commands/unlimited.py,sha256=eFMTwrcUFWbfJnXpwBcRqviYt66tDz4xAYBDcton50Y,2101
 henchman/config/__init__.py,sha256=28UtrhPye0MEmbdvi1jCqO3uIXfmqSAZVWvnpJv-qTo,637
@@ -47,11 +47,12 @@ henchman/providers/openai_compat.py.backup,sha256=Gmi5k1-DjUt8Kx5UaXmiSNKSDBGh0G
 henchman/providers/registry.py,sha256=xsOaYuaemgDOOi-JLi6URbto0dQP77y-Lo__zzUuEGU,2758
 henchman/rag/__init__.py,sha256=5Gbo7SZYPrZK8YLFn3wqfPJ_PlPV9uVHYy3NOGwjPok,1102
 henchman/rag/chunker.py,sha256=3fc9OuGb7AgkT0Qy5fOQcwa3eCiJOcffAx133I2lfuQ,6040
+henchman/rag/concurrency.py,sha256=-CQUm-N4K-xujSjLZAwwI1y3kdf8OLstBQ6T7KWuRoI,6689
 henchman/rag/embedder.py,sha256=J2-cIEIoS2iUh4k6PM-rgl7wkTOXSG1NrOQvXHTQPho,4080
 henchman/rag/indexer.py,sha256=6oVOkv4lD_elACivPL9Noe5zgpterYDZ3f1XlLyyULc,11806
-henchman/rag/repo_id.py,sha256=_L_WLTWmMHV7XUEFT_BKf2Ge88XelIwN8HRD1zhvuZY,5775
-henchman/rag/store.py,sha256=0l8RyGTtYDg0tzPN5wqQJR4YwVQe2P5fpPq9s5c1ofw,7369
-henchman/rag/system.py,sha256=uQD2vO8NX9GYQFO0BlGMoRZHEcgOBfHnoHzsYj2nuio,6676
+henchman/rag/repo_id.py,sha256=ZRPKM8fzwmETgrOYwE1PGjRp3c8XQFrR493BrDZlbd8,5755
+henchman/rag/store.py,sha256=eN0Rj2Lo6zJp2iWCXsJ-q24l2T_pnlTF3Oeea60gnfs,8826
+henchman/rag/system.py,sha256=TklAKf3EjsnKDP-C7G5kE6XauQCdHd4uEJbVIkLgZ38,8835
 henchman/skills/__init__.py,sha256=cvCl6HRxsUdag-RTpMP__Ww_hee37ggpAXQ41wXemEU,149
 henchman/skills/executor.py,sha256=sYss_83zduFLB_AACTSXMZHLA_lv-T1iKHSxelpv13U,1105
 henchman/skills/learner.py,sha256=lzIrLU5_oLbqDYF673F-rwb1IaWeeOqjzcsBGC-IKlM,1644
@@ -76,8 +77,8 @@ henchman/utils/compaction.py,sha256=jPpJ5tQm-IBn4YChiGrKy8u_K4OJ23lk3Jvq8sNbQYc,
 henchman/utils/retry.py,sha256=sobZk9LLGxglSJw_jeNaBYCrvH14YNFrBVyp_OwLWcw,4993
 henchman/utils/tokens.py,sha256=D9H4ciFNH7l1b05IGbw0U0tmy2yF5aItFZyDufGF53k,5665
 henchman/utils/validation.py,sha256=moj4LQXVXt2J-3_pWVH_0-EabyRYApOU2Oh5JSTIua8,4146
-henchman_ai-0.1.10.dist-info/METADATA,sha256=0qO8cg63N87mlJLtC-TgGwhBFwyYen8mlvnM2aupdyk,3552
-henchman_ai-0.1.10.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-henchman_ai-0.1.10.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
-henchman_ai-0.1.10.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
-henchman_ai-0.1.10.dist-info/RECORD,,
+henchman_ai-0.1.11.dist-info/METADATA,sha256=jhhpjwZJDMJW2gdY1PXT8dULA0z9MKeqTcyqbd17Aos,3552
+henchman_ai-0.1.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+henchman_ai-0.1.11.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
+henchman_ai-0.1.11.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
+henchman_ai-0.1.11.dist-info/RECORD,,

{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{henchman_ai-0.1.10.dist-info → henchman_ai-0.1.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

henchman-ai 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

henchman-ai 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl