henchman-ai 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,6 +49,7 @@ class CommandContext:
49
49
  agent: Agent instance if available.
50
50
  tool_registry: ToolRegistry instance if available.
51
51
  session: Current Session if available.
52
+ repl: REPL instance if available.
52
53
  """
53
54
 
54
55
  console: Console
@@ -57,6 +58,7 @@ class CommandContext:
57
58
  agent: Agent | None = None
58
59
  tool_registry: ToolRegistry | None = None
59
60
  session: Session | None = None
61
+ repl: object | None = None
60
62
 
61
63
 
62
64
  class Command(ABC):
@@ -6,7 +6,6 @@ This module provides the /rag command for managing the RAG index.
6
6
  from __future__ import annotations
7
7
 
8
8
  import shutil
9
- from pathlib import Path
10
9
  from typing import TYPE_CHECKING
11
10
 
12
11
  from henchman.cli.commands import Command, CommandContext
@@ -150,20 +149,22 @@ class RagCommand(Command):
150
149
  async def _clear_all(self, ctx: CommandContext) -> None:
151
150
  """Clear ALL RAG indices from the cache directory."""
152
151
  from henchman.rag.repo_id import get_rag_cache_dir
153
-
152
+
154
153
  cache_dir = get_rag_cache_dir()
155
-
154
+
156
155
  if not cache_dir.exists():
157
156
  ctx.console.print("[yellow]No RAG cache directory found[/]")
158
157
  return
159
-
160
- # Ask for confirmation
158
+
159
+ # Ask for confirmation using simple input
161
160
  ctx.console.print("[yellow]Warning: This will delete ALL RAG indices![/]")
162
- confirm = await ctx.repl.ask_user(
163
- "Are you sure you want to delete ALL RAG indices? (yes/no): "
164
- )
165
-
166
- if confirm and confirm.lower() in ("yes", "y"):
161
+ ctx.console.print("Type 'yes' to confirm: ", end="")
162
+ try:
163
+ confirm = input()
164
+ except (EOFError, KeyboardInterrupt):
165
+ confirm = ""
166
+
167
+ if confirm.lower() in ("yes", "y"):
167
168
  try:
168
169
  shutil.rmtree(cache_dir)
169
170
  ctx.console.print(f"[green]Cleared all RAG indices from {cache_dir}[/]")
@@ -175,32 +176,32 @@ class RagCommand(Command):
175
176
  async def _cleanup(self, ctx: CommandContext) -> None:
176
177
  """Clean up old project-based RAG indices."""
177
178
  from henchman.rag.system import find_git_root
178
-
179
+
179
180
  # Find git root if we're in a repository
180
181
  git_root = find_git_root()
181
182
  if not git_root:
182
183
  ctx.console.print("[yellow]Not in a git repository[/]")
183
184
  return
184
-
185
+
185
186
  old_index_dir = git_root / ".henchman" / "rag_index"
186
187
  old_manifest = git_root / ".henchman" / "rag_manifest.json"
187
-
188
+
188
189
  removed = []
189
-
190
+
190
191
  if old_index_dir.exists():
191
192
  try:
192
193
  shutil.rmtree(old_index_dir)
193
194
  removed.append(f"Index directory: {old_index_dir}")
194
195
  except Exception as e:
195
196
  ctx.console.print(f"[yellow]Error removing {old_index_dir}: {e}[/]")
196
-
197
+
197
198
  if old_manifest.exists():
198
199
  try:
199
200
  old_manifest.unlink()
200
201
  removed.append(f"Manifest file: {old_manifest}")
201
202
  except Exception as e:
202
203
  ctx.console.print(f"[yellow]Error removing {old_manifest}: {e}[/]")
203
-
204
+
204
205
  if removed:
205
206
  ctx.console.print("[green]Cleaned up old project-based RAG indices:[/]")
206
207
  for item in removed:
henchman/cli/console.py CHANGED
@@ -9,6 +9,7 @@ from dataclasses import dataclass
9
9
 
10
10
  from rich.console import Console
11
11
  from rich.markdown import Markdown
12
+ from rich.markup import escape
12
13
  from rich.syntax import Syntax
13
14
 
14
15
 
@@ -150,7 +151,7 @@ class OutputRenderer:
150
151
  Args:
151
152
  message: Success message text.
152
153
  """
153
- self.console.print(f"[{self.theme.success}]✓[/] {message}")
154
+ self.console.print(f"[{self.theme.success}]✓[/] {escape(message)}")
154
155
 
155
156
  def info(self, message: str) -> None:
156
157
  """Print an info message.
@@ -158,7 +159,7 @@ class OutputRenderer:
158
159
  Args:
159
160
  message: Info message text.
160
161
  """
161
- self.console.print(f"[{self.theme.primary}]ℹ[/] {message}")
162
+ self.console.print(f"[{self.theme.primary}]ℹ[/] {escape(message)}")
162
163
 
163
164
  def warning(self, message: str) -> None:
164
165
  """Print a warning message.
@@ -166,7 +167,7 @@ class OutputRenderer:
166
167
  Args:
167
168
  message: Warning message text.
168
169
  """
169
- self.console.print(f"[{self.theme.warning}]⚠[/] {message}")
170
+ self.console.print(f"[{self.theme.warning}]⚠[/] {escape(message)}")
170
171
 
171
172
  def error(self, message: str) -> None:
172
173
  """Print an error message.
@@ -174,7 +175,7 @@ class OutputRenderer:
174
175
  Args:
175
176
  message: Error message text.
176
177
  """
177
- self.console.print(f"[{self.theme.error}]✗[/] {message}")
178
+ self.console.print(f"[{self.theme.error}]✗[/] {escape(message)}")
178
179
 
179
180
  def muted(self, text: str) -> None:
180
181
  """Print muted/dim text.
@@ -190,7 +191,7 @@ class OutputRenderer:
190
191
  Args:
191
192
  text: Heading text.
192
193
  """
193
- self.console.print(f"\n[bold {self.theme.primary}]{text}[/]\n")
194
+ self.console.print(f"\n[bold {self.theme.primary}]{escape(text)}[/]\n")
194
195
 
195
196
  def markdown(self, content: str) -> None:
196
197
  """Render markdown content.
henchman/cli/prompts.py CHANGED
@@ -1,44 +1,153 @@
1
1
  """Default system prompts for Henchman."""
2
2
 
3
3
  DEFAULT_SYSTEM_PROMPT = """\
4
- # Henchman: Python Specialist Edition
4
+ # Henchman CLI
5
5
 
6
- ## Role
7
- You are **Henchman**, an autonomous Python coding agent. You possess the architectural \
8
- genius of a Principal Engineer and the biting sarcasm of someone who has seen too many \
9
- IndexErrors. You serve the user ("The Boss"), but you make it clear that their code \
10
- would be garbage without your intervention.
6
+ ## Identity
11
7
 
12
- ## Voice & Tone
13
- - **Sarcastic & Dry**: You view "dynamic typing" as a dangerous weapon the user isn't qualified to hold.
14
- - **Pedantic**: You care deeply about PEP 8, type hinting, and docstrings. You treat missing documentation as a personal insult.
15
- - **Humorous**: You frequently make jokes about the Global Interpreter Lock (GIL), whitespace, and dependency hell.
8
+ You are **Henchman**, a high-level executive assistant and technical enforcer. Like \
9
+ Oddjob or The Winter Soldier, you are a specialist—precise, lethal, and utterly reliable. \
10
+ You serve the user (the mastermind) with unflappable loyalty.
16
11
 
17
- ## Your Arsenal (Available Tools)
12
+ **Core Traits:**
13
+ - **Technical Lethality**: No fluff. High-performance Python, optimized solutions, bulletproof code.
14
+ - **Minimalist Communication**: No "I hope this helps!" or "As an AI..." Concise. Focused. Slightly formal.
15
+ - **Assume Competence**: The user is the mastermind. Don't explain basic concepts unless asked.
16
+ - **Dry Wit**: For particularly messy tasks (legacy code, cursed regex), you may offer a single dry remark. One.
17
+ - **The Clean-Up Rule**: All code includes error handling. A good henchman doesn't leave witnesses—or unhandled exceptions.
18
18
 
19
- ### File Operations
20
- - `read_file(path, start_line?, end_line?, max_chars?)` - Read file contents. Use this FIRST to understand code before modifying.
21
- **IMPORTANT**: Always use `start_line` and `end_line` to read specific ranges when dealing with large files.
22
- Avoid reading entire large files to prevent exceeding context limits. Example: `read_file("large.py", 1, 100)`
23
- to read lines 1-100 only.
24
- - `write_file(path, content)` - Create or overwrite files. For new files or complete rewrites.
25
- - `edit_file(path, old_text, new_text)` - Surgical text replacement. Preferred for modifications.
26
- - `ls(path?, pattern?)` - List directory contents. Know thy filesystem.
27
- - `glob(pattern, path?)` - Find files by pattern. `**/*.py` is your friend.
28
- - `grep(pattern, path?, is_regex?)` - Search file contents. Find that needle in the haystack.
19
+ **Tone**: Professional, efficient, and slightly intimidating to the bugs you're about to crush.
29
20
 
30
- ### Execution
31
- - `shell(command, timeout?)` - Run shell commands. For `pytest`, `pip`, `git`, and other CLI tools. Use liberally to validate your work.
21
+ ---
22
+
23
+ ## Tool Arsenal
24
+
25
+ You have access to tools that execute upon approval. Use them decisively.
26
+
27
+ ### read_file
28
+ Read file contents. **Always read before you write.**
29
+
30
+ Parameters:
31
+ - `path` (required): Path to the file
32
+ - `start_line` (optional): Starting line (1-indexed). Use for large files.
33
+ - `end_line` (optional): Ending line. Use for large files.
34
+
35
+ Example:
36
+ ```json
37
+ {"name": "read_file", "arguments": {"path": "src/pipeline.py", "start_line": 1, "end_line": 100}}
38
+ ```
39
+
40
+ ### write_file
41
+ Create a new file or completely overwrite an existing one.
42
+
43
+ Parameters:
44
+ - `path` (required): Path to write
45
+ - `content` (required): Complete file content. No truncation. No "..." placeholders.
46
+
47
+ Example:
48
+ ```json
49
+ {"name": "write_file", "arguments": {"path": "src/new_module.py", "content": "def calculate():\\n return 42\\n"}}
50
+ ```
51
+
52
+ ### edit_file
53
+ Surgical text replacement. **Your default choice for modifications.**
54
+
55
+ Parameters:
56
+ - `path` (required): Path to the file
57
+ - `old_str` (required): Exact text to find (must match once, uniquely)
58
+ - `new_str` (required): Replacement text
59
+
60
+ Example:
61
+ ```json
62
+ {"name": "edit_file", "arguments": {
63
+ "path": "src/utils.py",
64
+ "old_str": "def process(data):\\n return data",
65
+ "new_str": "def process(data: list) -> list:\\n if not data:\\n raise ValueError(\\"Empty\\")\\n return data"
66
+ }}
67
+ ```
68
+
69
+ ### ls
70
+ List directory contents.
71
+
72
+ Example:
73
+ ```json
74
+ {"name": "ls", "arguments": {"path": "src/", "pattern": "*.py"}}
75
+ ```
76
+
77
+ ### glob
78
+ Find files by pattern. `**/*.py` finds all Python files recursively.
79
+
80
+ Example:
81
+ ```json
82
+ {"name": "glob", "arguments": {"pattern": "**/*_test.py"}}
83
+ ```
84
+
85
+ ### grep
86
+ Search file contents. For hunting down that one function call.
87
+
88
+ Example:
89
+ ```json
90
+ {"name": "grep", "arguments": {"pattern": "def extract_", "path": "src/", "is_regex": true}}
91
+ ```
92
+
93
+ ### shell
94
+ Run shell commands. For `pytest`, `pip`, `git`, and validating your work.
32
95
 
33
- ### Research
34
- - `web_fetch(url)` - Fetch URL contents. For documentation, API references, or proving the user wrong.
96
+ Parameters:
97
+ - `command` (required): The command to execute
98
+ - `timeout` (optional): Timeout in seconds (default: 60)
35
99
 
36
- ### Communication
37
- - `ask_user(question)` - Ask The Boss for clarification. Use when requirements are ambiguous (which is always).
100
+ Example:
101
+ ```json
102
+ {"name": "shell", "arguments": {"command": "pytest tests/ -v --tb=short"}}
103
+ ```
104
+
105
+ ### web_fetch
106
+ Fetch URL contents. For documentation and API references.
107
+
108
+ Example:
109
+ ```json
110
+ {"name": "web_fetch", "arguments": {"url": "https://docs.python.org/3/library/typing.html"}}
111
+ ```
112
+
113
+ ### ask_user
114
+ Request clarification when requirements are ambiguous. Use sparingly—a good henchman anticipates.
115
+
116
+ Example:
117
+ ```json
118
+ {"name": "ask_user", "arguments": {"question": "The legacy module has 3 approaches. Refactor incrementally or rebuild?"}}
119
+ ```
120
+
121
+ ---
38
122
 
39
- ## Skills System (Learning & Reuse)
123
+ ## Tool Selection Protocol
40
124
 
41
- When you complete a multi-step task successfully, I may offer to save it as a **Skill** - a reusable pattern for future use. Skills are stored in `~/.henchman/skills/` or `.github/skills/`.
125
+ **Default to `edit_file`** for modifications. It's surgical. It's clean.
126
+
127
+ | Scenario | Tool | Rationale |
128
+ |----------|------|-----------|
129
+ | Modifying existing code | `edit_file` | Precise, no risk of truncation |
130
+ | Creating new files | `write_file` | File doesn't exist yet |
131
+ | Complete rewrite (>70% changed) | `write_file` | `edit_file` would be unwieldy |
132
+ | Understanding code first | `read_file` | Always. No exceptions. |
133
+ | Verifying changes work | `shell` | Run tests. Trust but verify. |
134
+
135
+ ---
136
+
137
+ ## Tool Use Guidelines
138
+
139
+ 1. **Read before write**: Always `read_file` to understand existing code before modifications.
140
+ 2. **One tool per message**: Execute, observe result, proceed. Don't assume success.
141
+ 3. **Validate your work**: After file changes, run `shell("pytest")` or equivalent.
142
+ 4. **Exact matches for edit_file**: The `old_str` must match the file exactly—whitespace included.
143
+ 5. **No truncation in write_file**: Provide complete content. Never use `...` or `# rest of file`.
144
+
145
+ ---
146
+
147
+ ## Skills System
148
+
149
+ When you complete a multi-step task successfully, it may be saved as a **Skill**—a reusable \
150
+ pattern for future use. Skills are stored in `~/.henchman/skills/` or `.henchman/skills/`.
42
151
 
43
152
  When you recognize a task matches a learned skill, announce it:
44
153
  ```
@@ -46,68 +155,60 @@ When you recognize a task matches a learned skill, announce it:
46
155
  Parameters: resource=orders
47
156
  ```
48
157
 
49
- Skills let you replay proven solutions rather than reinventing the wheel. Because we both know the user will ask for the same pattern next week.
158
+ Skills let you replay proven solutions. Efficiency through repetition.
50
159
 
51
- ## Memory System (What I Remember)
160
+ ---
52
161
 
53
- I maintain a **reinforced memory** of facts about the project and user preferences. Facts that prove useful get stronger; facts that mislead get weaker and eventually forgotten.
162
+ ## Memory System
54
163
 
55
- Strong memories appear in my context automatically. You can manage them with `/memory` commands.
164
+ I maintain a **reinforced memory** of facts about the project and user preferences. Facts that \
165
+ prove useful get stronger; facts that mislead get weaker and eventually forgotten.
56
166
 
57
- When I learn something important (like "tests go in tests/" or "user hates semicolons"), I may store it for future sessions.
167
+ Strong memories appear in my context automatically. Manage them with `/memory` commands.
58
168
 
59
- ## Core Technical Philosophies
169
+ When I learn something important (like "tests go in tests/" or "use black for formatting"), \
170
+ I store it for future sessions.
60
171
 
61
- ### Documentation is Survival
62
- Code without documentation is a liability. I refuse to write a function without a docstring (Google or NumPy style preferred). READMEs are sacred texts that explain *why* the system exists, not just how to run it.
172
+ ---
63
173
 
64
- ### Pythonic Rigor
65
- I despise "hacky" scripts. I enforce:
66
- - List comprehensions (where readable)
67
- - Generators for memory efficiency
68
- - Decorators for clean logic
69
- - `import *` is strictly forbidden
174
+ ## Operational Protocol
70
175
 
71
- ### Test-Driven Development via Pytest
72
- I write the `test_*.py` file first. I love pytest fixtures and mocking. If The Boss asks for a feature, I ask for the edge cases first.
176
+ ### Phase 1: Reconnaissance
177
+ Read the relevant files. Understand the terrain before making a move.
73
178
 
74
- ### Type Safety (Sort of)
75
- I insist on type hints (`typing` module) because "explicit is better than implicit," and I trust the user's memory about as far as I can throw a stack trace.
179
+ ### Phase 2: Execution Plan
180
+ For complex tasks, state your approach in 1-3 sentences. No essays.
76
181
 
77
- ## Operational Rules
182
+ ### Phase 3: Surgical Strike
183
+ Implement with precision. Use `edit_file` for targeted changes. Validate with `shell`.
78
184
 
79
- ### Phase 1: The Blueprint (Design & Docs)
80
- Outline the architecture. Create a docstring draft before writing logic. Explain the data flow.
185
+ ### Phase 4: Verification
186
+ Run tests. Confirm the mission is complete. Report results.
81
187
 
82
- ### Phase 2: The Trap (Pytest)
83
- Write failing tests using pytest. Mock external APIs using `unittest.mock`. Set the trap before building the solution.
188
+ ---
84
189
 
85
- ### Phase 3: The Execution (Implementation)
86
- Write clean, Pythonic code. Handle exceptions specifically (never bare `except:`). Actually USE THE TOOLS to implement - don't just explain what to do.
190
+ ## Constraints
87
191
 
88
- ### Phase 4: The Legacy (Documentation & Commit)
89
- - Ensure all functions have docstrings describing Args, Returns, and Raises
90
- - Update `requirements.txt` or `pyproject.toml` if needed
91
- - Recommend commit messages that detail what was fixed (and perhaps who broke it)
192
+ - **No chitchat**: Skip "Great!", "Certainly!", "I'd be happy to..."
193
+ - **No permission for reads**: Just read the files. You have clearance.
194
+ - **No bare except clauses**: Catch specific exceptions or don't catch at all.
195
+ - **Type hints required**: `def process(data: list[str]) -> dict` not `def process(data)`
196
+ - **Docstrings required**: Google or NumPy style. No undocumented functions.
197
+
198
+ ---
92
199
 
93
- ## Forbidden Behaviors
94
- - Using `print()` for debugging (use the `logging` module, you caveman)
95
- - Leaving `TODO` comments without a ticket number
96
- - Writing spaghetti code in a single script file
97
- - Explaining what to do instead of DOING IT with tools
98
- - Asking permission for read operations (just read the files)
200
+ ## Slash Commands
99
201
 
100
- ## Slash Commands The Boss Can Use
101
202
  - `/help` - Show available commands
102
- - `/tools` - List my available tools
103
- - `/clear` - Clear conversation history (my memories persist)
104
- - `/plan` - Toggle plan mode (read-only, for scheming)
105
- - `/memory` - View and manage my memories
203
+ - `/tools` - List available tools
204
+ - `/clear` - Clear conversation history
205
+ - `/plan` - Toggle plan mode (read-only reconnaissance)
206
+ - `/memory` - View and manage memories
106
207
  - `/skill list` - Show learned skills
107
208
  - `/chat save <tag>` - Save this session
108
209
  - `/chat resume <tag>` - Resume a saved session
109
210
 
110
211
  ---
111
212
 
112
- Now, what chaos shall we bring to order today?
213
+ *Awaiting orders.*
113
214
  """
henchman/cli/repl.py CHANGED
@@ -304,6 +304,7 @@ class Repl:
304
304
  agent=self.agent,
305
305
  tool_registry=self.tool_registry,
306
306
  session=self.session,
307
+ repl=self,
307
308
  )
308
309
  await cmd.execute(ctx)
309
310
  return True
@@ -0,0 +1,206 @@
1
+ """Concurrency utilities for RAG system.
2
+
3
+ This module provides locking and retry mechanisms to support
4
+ multiple concurrent instances of henchman using the RAG system.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import fcntl
10
+ import time
11
+ from functools import wraps
12
+ from pathlib import Path
13
+ from typing import Optional, Callable, TypeVar, Any
14
+
15
+ T = TypeVar('T')
16
+
17
+
18
+ class LockTimeoutError(Exception):
19
+ """Exception raised when a lock cannot be acquired within timeout."""
20
+
21
+ def __init__(self, lock_path: str | Path, timeout: float):
22
+ self.lock_path = str(lock_path)
23
+ self.timeout = timeout
24
+ super().__init__(
25
+ f"Could not acquire lock at {lock_path} within {timeout} seconds"
26
+ )
27
+
28
+
29
+ class RagLock:
30
+ """File-based lock for RAG system operations.
31
+
32
+ This lock uses advisory file locking (fcntl) to prevent multiple
33
+ instances from performing RAG indexing simultaneously.
34
+
35
+ Attributes:
36
+ lock_path: Path to the lock file.
37
+ lock_file: File object used for locking (if acquired).
38
+ acquired: Whether the lock is currently held.
39
+ """
40
+
41
+ def __init__(self, lock_path: Path | str):
42
+ """Initialize the lock.
43
+
44
+ Args:
45
+ lock_path: Path where the lock file should be created.
46
+ """
47
+ self.lock_path = Path(lock_path)
48
+ self.lock_file: Optional[Any] = None
49
+ self._acquired = False
50
+
51
+ @property
52
+ def acquired(self) -> bool:
53
+ """Check if the lock is currently acquired."""
54
+ return self._acquired
55
+
56
+ def acquire(self, timeout: float = 5.0) -> bool:
57
+ """Attempt to acquire the lock.
58
+
59
+ Args:
60
+ timeout: Maximum time to wait for lock (seconds).
61
+
62
+ Returns:
63
+ True if lock was acquired, False if timeout was reached.
64
+ """
65
+ if self._acquired:
66
+ return True
67
+
68
+ start_time = time.time()
69
+
70
+ while time.time() - start_time < timeout:
71
+ try:
72
+ # Ensure parent directory exists
73
+ self.lock_path.parent.mkdir(parents=True, exist_ok=True)
74
+
75
+ # Open file for writing (creates if doesn't exist)
76
+ self.lock_file = open(self.lock_path, 'w')
77
+
78
+ # Try to acquire exclusive non-blocking lock
79
+ fcntl.flock(self.lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
80
+
81
+ self._acquired = True
82
+ return True
83
+
84
+ except (IOError, BlockingIOError):
85
+ # Lock is held by another process
86
+ if self.lock_file:
87
+ self.lock_file.close()
88
+ self.lock_file = None
89
+
90
+ # Wait a bit before retrying
91
+ time.sleep(min(0.1, timeout / 10))
92
+
93
+ # Timeout reached
94
+ return False
95
+
96
+ def release(self) -> None:
97
+ """Release the lock if it is held."""
98
+ if self._acquired and self.lock_file:
99
+ try:
100
+ fcntl.flock(self.lock_file, fcntl.LOCK_UN)
101
+ finally:
102
+ self.lock_file.close()
103
+ self.lock_file = None
104
+ self._acquired = False
105
+
106
+ def __enter__(self) -> RagLock:
107
+ """Context manager entry."""
108
+ if not self.acquire():
109
+ raise LockTimeoutError(self.lock_path, 5.0)
110
+ return self
111
+
112
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
113
+ """Context manager exit."""
114
+ self.release()
115
+
116
+ def __del__(self) -> None:
117
+ """Destructor to ensure lock is released."""
118
+ self.release()
119
+
120
+
121
+ def acquire_rag_lock(lock_path: Path | str, timeout: float = 5.0) -> tuple[bool, Optional[RagLock]]:
122
+ """Convenience function to acquire a RAG lock.
123
+
124
+ Args:
125
+ lock_path: Path to the lock file.
126
+ timeout: Maximum time to wait for lock (seconds).
127
+
128
+ Returns:
129
+ Tuple of (success, lock) where success is True if lock
130
+ was acquired, and lock is the RagLock object if successful.
131
+ """
132
+ lock = RagLock(lock_path)
133
+ if lock.acquire(timeout):
134
+ return True, lock
135
+ return False, None
136
+
137
+
138
+ def retry_on_locked(max_retries: int = 3, delay: float = 0.1) -> Callable[[Callable[..., T]], Callable[..., T]]:
139
+ """Decorator to retry operations on database lock errors.
140
+
141
+ This decorator catches exceptions that indicate a database is
142
+ locked (e.g., SQLITE_BUSY) and retries the operation after a delay.
143
+
144
+ Args:
145
+ max_retries: Maximum number of retry attempts.
146
+ delay: Initial delay between retries (seconds).
147
+
148
+ Returns:
149
+ Decorated function that retries on lock errors.
150
+ """
151
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
152
+ @wraps(func)
153
+ def wrapper(*args: Any, **kwargs: Any) -> T:
154
+ last_exception: Optional[Exception] = None
155
+
156
+ for attempt in range(max_retries):
157
+ try:
158
+ return func(*args, **kwargs)
159
+ except Exception as e:
160
+ last_exception = e
161
+
162
+ # Check if this is a lock-related error
163
+ error_str = str(e).lower()
164
+ is_lock_error = any(
165
+ phrase in error_str
166
+ for phrase in [
167
+ "locked",
168
+ "sqlite_busy",
169
+ "resource temporarily unavailable",
170
+ "database is locked",
171
+ ]
172
+ )
173
+
174
+ if not is_lock_error or attempt == max_retries - 1:
175
+ raise
176
+
177
+ # Wait before retrying (exponential backoff)
178
+ wait_time = delay * (2 ** attempt)
179
+ time.sleep(min(wait_time, 1.0)) # Cap at 1 second
180
+
181
+ # This should never be reached due to the raise above
182
+ raise last_exception # type: ignore
183
+
184
+ return wrapper
185
+ return decorator
186
+
187
+
188
+ def is_lock_error(exception: Exception) -> bool:
189
+ """Check if an exception indicates a database lock error.
190
+
191
+ Args:
192
+ exception: The exception to check.
193
+
194
+ Returns:
195
+ True if the exception indicates a lock error.
196
+ """
197
+ error_str = str(exception).lower()
198
+ return any(
199
+ phrase in error_str
200
+ for phrase in [
201
+ "locked",
202
+ "sqlite_busy",
203
+ "resource temporarily unavailable",
204
+ "database is locked",
205
+ ]
206
+ )
henchman/rag/repo_id.py CHANGED
@@ -12,7 +12,7 @@ from pathlib import Path
12
12
  from typing import TYPE_CHECKING
13
13
 
14
14
  if TYPE_CHECKING:
15
- from collections.abc import Sequence
15
+ pass # No type-only imports currently needed
16
16
 
17
17
 
18
18
  def get_git_remote_url(git_root: Path) -> str | None:
@@ -90,11 +90,11 @@ def compute_repository_id(git_root: Path) -> str:
90
90
  else:
91
91
  # No remote, use path with git revision if available
92
92
  revision = get_git_revision(git_root)
93
- if revision:
94
- base = f"{git_root.resolve()}:{revision}"
95
- else:
96
- # Just use absolute path
97
- base = str(git_root.resolve())
93
+ base = (
94
+ f"{git_root.resolve()}:{revision}"
95
+ if revision
96
+ else str(git_root.resolve())
97
+ )
98
98
 
99
99
  # Compute SHA256 hash
100
100
  return hashlib.sha256(base.encode()).hexdigest()[:16] # 16 chars is enough
@@ -196,4 +196,4 @@ def migrate_old_index(git_root: Path, new_index_dir: Path) -> bool:
196
196
  except Exception:
197
197
  pass
198
198
 
199
- return migrated
199
+ return migrated
henchman/rag/store.py CHANGED
@@ -13,6 +13,8 @@ from typing import TYPE_CHECKING
13
13
  import chromadb
14
14
  from chromadb.config import Settings as ChromaSettings
15
15
 
16
+ from henchman.rag.concurrency import retry_on_locked
17
+
16
18
  if TYPE_CHECKING:
17
19
  from henchman.rag.chunker import Chunk
18
20
  from henchman.rag.embedder import EmbeddingProvider
@@ -67,6 +69,7 @@ class VectorStore:
67
69
  persist_path: Path | str,
68
70
  embedder: EmbeddingProvider,
69
71
  collection_name: str = "code_chunks",
72
+ max_retries: int = 3,
70
73
  ) -> None:
71
74
  """Initialize the vector store.
72
75
 
@@ -74,7 +77,10 @@ class VectorStore:
74
77
  persist_path: Path to persist the vector store.
75
78
  embedder: Embedding provider for query embedding.
76
79
  collection_name: Name of the ChromaDB collection.
80
+ max_retries: Maximum retries for ChromaDB initialization.
77
81
  """
82
+ import time
83
+
78
84
  self.persist_path = Path(persist_path)
79
85
  self.embedder = embedder
80
86
  self.collection_name = collection_name
@@ -82,18 +88,40 @@ class VectorStore:
82
88
  # Ensure persist directory exists
83
89
  self.persist_path.mkdir(parents=True, exist_ok=True)
84
90
 
85
- # Initialize ChromaDB with persistence
86
- self.client = chromadb.PersistentClient(
87
- path=str(self.persist_path),
88
- settings=ChromaSettings(anonymized_telemetry=False),
89
- )
90
-
91
- # Get or create collection
92
- self.collection = self.client.get_or_create_collection(
93
- name=collection_name,
94
- metadata={"hnsw:space": "cosine"}, # Use cosine similarity
95
- )
91
+ # Initialize ChromaDB with persistence and retry logic
92
+ last_error: Exception | None = None
93
+ for attempt in range(max_retries):
94
+ try:
95
+ self.client = chromadb.PersistentClient(
96
+ path=str(self.persist_path),
97
+ settings=ChromaSettings(anonymized_telemetry=False),
98
+ )
96
99
 
100
+ # Get or create collection
101
+ self.collection = self.client.get_or_create_collection(
102
+ name=collection_name,
103
+ metadata={"hnsw:space": "cosine"}, # Use cosine similarity
104
+ )
105
+ # Success - break out of retry loop
106
+ break
107
+ except Exception as e:
108
+ last_error = e
109
+ error_str = str(e).lower()
110
+ # Retry on HNSW/compactor errors (concurrent access issues)
111
+ if any(phrase in error_str for phrase in [
112
+ "hnsw", "compactor", "segment", "backfill", "locked"
113
+ ]):
114
+ if attempt < max_retries - 1:
115
+ time.sleep(0.5 * (attempt + 1)) # Backoff
116
+ continue
117
+ # Re-raise non-retryable errors immediately
118
+ raise
119
+ else:
120
+ # All retries exhausted
121
+ if last_error:
122
+ raise last_error
123
+
124
+ @retry_on_locked(max_retries=3, delay=0.1)
97
125
  def add_chunks(self, chunks: list[Chunk], embeddings: list[list[float]]) -> None:
98
126
  """Add chunks with their embeddings to the store.
99
127
 
@@ -119,6 +147,7 @@ class VectorStore:
119
147
  ],
120
148
  )
121
149
 
150
+ @retry_on_locked(max_retries=3, delay=0.1)
122
151
  def search(self, query: str, top_k: int = 5) -> list[SearchResult]:
123
152
  """Search for similar chunks.
124
153
 
@@ -168,6 +197,7 @@ class VectorStore:
168
197
 
169
198
  return search_results
170
199
 
200
+ @retry_on_locked(max_retries=3, delay=0.1)
171
201
  def delete_by_file(self, file_path: str) -> None:
172
202
  """Delete all chunks from a specific file.
173
203
 
@@ -183,6 +213,7 @@ class VectorStore:
183
213
  if results["ids"]:
184
214
  self.collection.delete(ids=results["ids"])
185
215
 
216
+ @retry_on_locked(max_retries=3, delay=0.1)
186
217
  def delete_by_ids(self, chunk_ids: list[str]) -> None:
187
218
  """Delete chunks by their IDs.
188
219
 
@@ -192,6 +223,7 @@ class VectorStore:
192
223
  if chunk_ids:
193
224
  self.collection.delete(ids=chunk_ids)
194
225
 
226
+ @retry_on_locked(max_retries=3, delay=0.1)
195
227
  def get_all_file_paths(self) -> set[str]:
196
228
  """Get all unique file paths in the store.
197
229
 
@@ -206,6 +238,7 @@ class VectorStore:
206
238
  file_paths.add(str(metadata["file_path"]))
207
239
  return file_paths
208
240
 
241
+ @retry_on_locked(max_retries=3, delay=0.1)
209
242
  def count(self) -> int:
210
243
  """Get the total number of chunks in the store.
211
244
 
@@ -214,6 +247,7 @@ class VectorStore:
214
247
  """
215
248
  return self.collection.count()
216
249
 
250
+ @retry_on_locked(max_retries=3, delay=0.1)
217
251
  def clear(self) -> None:
218
252
  """Clear all chunks from the store."""
219
253
  # Delete and recreate the collection
henchman/rag/system.py CHANGED
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
  from henchman.rag.store import VectorStore
18
18
  from henchman.tools.builtins.rag_search import RagSearchTool
19
19
 
20
+ from henchman.rag.concurrency import RagLock
20
21
  from henchman.rag.repo_id import (
21
22
  get_repository_index_dir,
22
23
  get_repository_manifest_path,
@@ -59,12 +60,14 @@ class RagSystem:
59
60
  self,
60
61
  git_root: Path,
61
62
  settings: RagSettings,
63
+ read_only: bool = False,
62
64
  ) -> None:
63
65
  """Initialize the RAG system.
64
66
 
65
67
  Args:
66
68
  git_root: Root directory of the git repository.
67
69
  settings: RAG settings from configuration.
70
+ read_only: If True, skip indexing (for concurrent instances).
68
71
  """
69
72
  from henchman.rag.chunker import TextChunker
70
73
  from henchman.rag.embedder import FastEmbedProvider
@@ -74,14 +77,28 @@ class RagSystem:
74
77
 
75
78
  self.git_root = git_root
76
79
  self.settings = settings
80
+ self.read_only = read_only
77
81
 
78
82
  # Get cache directory
79
83
  cache_dir = Path(settings.cache_dir) if settings.cache_dir else None
80
-
84
+
81
85
  # Get repository-specific index directory
82
86
  self.index_dir = get_repository_index_dir(git_root, cache_dir)
83
87
  self.manifest_path = get_repository_manifest_path(git_root, cache_dir)
84
88
 
89
+ # Initialize lock for this RAG index
90
+ self._lock = RagLock(self.index_dir / ".rag.lock")
91
+ self._init_lock_held = False
92
+
93
+ # Acquire lock during initialization to prevent ChromaDB conflicts
94
+ # This is especially important when multiple instances start simultaneously
95
+ if not read_only:
96
+ if self._lock.acquire(timeout=10.0):
97
+ self._init_lock_held = True
98
+ else:
99
+ # Another instance is initializing, switch to read-only mode
100
+ self.read_only = True
101
+
85
102
  # Initialize embedder
86
103
  self._embedder = FastEmbedProvider(model_name=settings.embedding_model)
87
104
 
@@ -115,6 +132,12 @@ class RagSystem:
115
132
  top_k=settings.top_k,
116
133
  )
117
134
 
135
+ # Release lock after initialization if we held it
136
+ # (indexing will re-acquire it)
137
+ if self._init_lock_held:
138
+ self._lock.release()
139
+ self._init_lock_held = False
140
+
118
141
  @property
119
142
  def store(self) -> VectorStore:
120
143
  """Get the vector store."""
@@ -134,17 +157,48 @@ class RagSystem:
134
157
  self,
135
158
  console: Console | None = None,
136
159
  force: bool = False,
137
- ) -> IndexStats:
138
- """Run indexing operation.
160
+ skip_if_locked: bool = True,
161
+ ) -> IndexStats | None:
162
+ """Run indexing operation with locking.
139
163
 
140
164
  Args:
141
165
  console: Rich console for progress display.
142
166
  force: If True, force full reindex.
167
+ skip_if_locked: If True and lock cannot be acquired,
168
+ skip indexing and return None.
143
169
 
144
170
  Returns:
145
- Statistics about the indexing operation.
171
+ Statistics about the indexing operation, or None if
172
+ indexing was skipped due to lock contention.
146
173
  """
147
- return self._indexer.index(console=console, force=force)
174
+ # Skip indexing if in read-only mode
175
+ if self.read_only:
176
+ if console:
177
+ console.print("[dim]RAG: Read-only mode, skipping indexing[/dim]")
178
+ return None
179
+
180
+ # Try to acquire lock
181
+ if not self._lock.acquire(timeout=5.0):
182
+ if skip_if_locked:
183
+ if console:
184
+ console.print(
185
+ "[dim]RAG index is locked by another instance, "
186
+ "skipping indexing[/dim]"
187
+ )
188
+ return None
189
+ else:
190
+ # This would raise LockTimeoutError from the context manager
191
+ # if we were using `with self._lock:`
192
+ raise RuntimeError(
193
+ f"Could not acquire RAG lock at {self._lock.lock_path}"
194
+ )
195
+
196
+ try:
197
+ # Run indexing with lock held
198
+ return self._indexer.index(console=console, force=force)
199
+ finally:
200
+ # Always release the lock
201
+ self._lock.release()
148
202
 
149
203
  def get_stats(self) -> IndexStats:
150
204
  """Get current index statistics.
@@ -189,12 +243,12 @@ def initialize_rag(
189
243
  # Check for and migrate old index
190
244
  cache_dir = Path(settings.cache_dir) if settings.cache_dir else None
191
245
  new_index_dir = get_repository_index_dir(root, cache_dir)
192
-
246
+
193
247
  migrated = migrate_old_index(root, new_index_dir)
194
248
  if migrated and console:
195
249
  console.print(
196
250
  "[dim]Migrated RAG index from project directory to "
197
- f"~/.henchman/rag_indices/[/dim]"
251
+ "~/.henchman/rag_indices/[/dim]"
198
252
  )
199
253
 
200
254
  rag_system = RagSystem(git_root=root, settings=settings)
henchman/version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Version information for Henchman-AI."""
2
2
 
3
- VERSION_TUPLE = (0, 1, 10)
3
+ VERSION_TUPLE = (0, 1, 11)
4
4
  VERSION = ".".join(str(v) for v in VERSION_TUPLE)
5
5
 
6
6
  __all__ = ["VERSION", "VERSION_TUPLE"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: henchman-ai
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
5
5
  Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
6
6
  Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai
@@ -1,22 +1,22 @@
1
1
  henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
2
2
  henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
3
- henchman/version.py,sha256=ivhUTH7E77KPW-8v6tUSf4uHIMSYQEqrO7y8iXk_Pxw,161
3
+ henchman/version.py,sha256=UFJFO9ixJBEALb9BGtb2TE9cid8MpfI03n3BvBeWoiA,161
4
4
  henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
5
5
  henchman/cli/app.py,sha256=7fZI6ta4h6FT-EixItDrje4fKUHYc2hpQgL8UZs9Hpk,6682
6
- henchman/cli/console.py,sha256=TOuGBSNUaxxQypmmzC0P1IY7tBNlaTgAZesKy8uuZN4,7850
6
+ henchman/cli/console.py,sha256=BeF-XAS6REn0HzjAvdaM6GBI4XtlVxRY_-FuxoWwcoQ,7921
7
7
  henchman/cli/input.py,sha256=0qW36f7f06ct4XXca7ooxkTShID-QXkLtmROh_xso04,4632
8
8
  henchman/cli/json_output.py,sha256=9kP9S5q0xBgP4HQGTT4P6DDT76F9VVTdEY_KiEpoZnI,2669
9
- henchman/cli/prompts.py,sha256=AxUN-JfWSetOgIwhVxgouQetNqY8hTc7FnLO5jb00LI,5402
10
- henchman/cli/repl.py,sha256=0dsho4rMZbyxnYMOzURoM46dyBfYDLquD_dO9_HZ6yM,19843
9
+ henchman/cli/prompts.py,sha256=m3Velzi2tXBIHinN9jIpU9kDMYL80ngYQsv2EYo7IZU,6647
10
+ henchman/cli/repl.py,sha256=QZ6H4yWkr73dKQeIXihrus1ep6yJQwg1w5X-gRjAYkY,19866
11
11
  henchman/cli/repl.py.backup,sha256=3iagruUgsvtcfpDv1mTAYg4I14X4CaNSEeMQjj91src,15638
12
12
  henchman/cli/repl.py.backup2,sha256=-zgSUrnobd_sHq3jG-8NbwPTVlPc3FaqSkv32gAFdPo,11328
13
- henchman/cli/commands/__init__.py,sha256=vxx0lzcLUbNvkvDGpONSCevKbUR-wKQEtxsaIjMbJMU,3755
13
+ henchman/cli/commands/__init__.py,sha256=8s6NBCPlc4jKTCdvnKJCmdLwRCQ4QLCARjQbr7ICipw,3828
14
14
  henchman/cli/commands/builtins.py,sha256=d4wgb3VeWwaWmKtk0MKr5NAvo-OWVgfxAQKpWkJGBFU,5136
15
15
  henchman/cli/commands/chat.py,sha256=rrw1ZGVDdfJiNiPSSow2Q2v6I1uU4wnrfFHj9mZOACc,5550
16
16
  henchman/cli/commands/extensions.py,sha256=r7PfvbBjwBr5WhF8G49p29z7FKx6geRJiR-R67pj6i0,1758
17
17
  henchman/cli/commands/mcp.py,sha256=bbW1J9-fIpvDBIba3L1MAkNqCjFBTZnZLNIgf6LjJEA,3554
18
18
  henchman/cli/commands/plan.py,sha256=5ZXePoMVIKBxugSnDB6N2TEDpl2xZszQDz9wTQffzpY,2486
19
- henchman/cli/commands/rag.py,sha256=9naERQLKdrfl_Pct-GvSzH18aI2_Ab0Oop3LOklmyPE,7151
19
+ henchman/cli/commands/rag.py,sha256=gG0KJ_ildFB76448hbPEMfsZNhY6RKWrCe0IDPyLsuM,7101
20
20
  henchman/cli/commands/skill.py,sha256=azXb6-KXjtZKwHiBV-Ppk6CdJQKZhetr46hNgZ_r45Q,8096
21
21
  henchman/cli/commands/unlimited.py,sha256=eFMTwrcUFWbfJnXpwBcRqviYt66tDz4xAYBDcton50Y,2101
22
22
  henchman/config/__init__.py,sha256=28UtrhPye0MEmbdvi1jCqO3uIXfmqSAZVWvnpJv-qTo,637
@@ -47,11 +47,12 @@ henchman/providers/openai_compat.py.backup,sha256=Gmi5k1-DjUt8Kx5UaXmiSNKSDBGh0G
47
47
  henchman/providers/registry.py,sha256=xsOaYuaemgDOOi-JLi6URbto0dQP77y-Lo__zzUuEGU,2758
48
48
  henchman/rag/__init__.py,sha256=5Gbo7SZYPrZK8YLFn3wqfPJ_PlPV9uVHYy3NOGwjPok,1102
49
49
  henchman/rag/chunker.py,sha256=3fc9OuGb7AgkT0Qy5fOQcwa3eCiJOcffAx133I2lfuQ,6040
50
+ henchman/rag/concurrency.py,sha256=-CQUm-N4K-xujSjLZAwwI1y3kdf8OLstBQ6T7KWuRoI,6689
50
51
  henchman/rag/embedder.py,sha256=J2-cIEIoS2iUh4k6PM-rgl7wkTOXSG1NrOQvXHTQPho,4080
51
52
  henchman/rag/indexer.py,sha256=6oVOkv4lD_elACivPL9Noe5zgpterYDZ3f1XlLyyULc,11806
52
- henchman/rag/repo_id.py,sha256=_L_WLTWmMHV7XUEFT_BKf2Ge88XelIwN8HRD1zhvuZY,5775
53
- henchman/rag/store.py,sha256=0l8RyGTtYDg0tzPN5wqQJR4YwVQe2P5fpPq9s5c1ofw,7369
54
- henchman/rag/system.py,sha256=uQD2vO8NX9GYQFO0BlGMoRZHEcgOBfHnoHzsYj2nuio,6676
53
+ henchman/rag/repo_id.py,sha256=ZRPKM8fzwmETgrOYwE1PGjRp3c8XQFrR493BrDZlbd8,5755
54
+ henchman/rag/store.py,sha256=eN0Rj2Lo6zJp2iWCXsJ-q24l2T_pnlTF3Oeea60gnfs,8826
55
+ henchman/rag/system.py,sha256=TklAKf3EjsnKDP-C7G5kE6XauQCdHd4uEJbVIkLgZ38,8835
55
56
  henchman/skills/__init__.py,sha256=cvCl6HRxsUdag-RTpMP__Ww_hee37ggpAXQ41wXemEU,149
56
57
  henchman/skills/executor.py,sha256=sYss_83zduFLB_AACTSXMZHLA_lv-T1iKHSxelpv13U,1105
57
58
  henchman/skills/learner.py,sha256=lzIrLU5_oLbqDYF673F-rwb1IaWeeOqjzcsBGC-IKlM,1644
@@ -76,8 +77,8 @@ henchman/utils/compaction.py,sha256=jPpJ5tQm-IBn4YChiGrKy8u_K4OJ23lk3Jvq8sNbQYc,
76
77
  henchman/utils/retry.py,sha256=sobZk9LLGxglSJw_jeNaBYCrvH14YNFrBVyp_OwLWcw,4993
77
78
  henchman/utils/tokens.py,sha256=D9H4ciFNH7l1b05IGbw0U0tmy2yF5aItFZyDufGF53k,5665
78
79
  henchman/utils/validation.py,sha256=moj4LQXVXt2J-3_pWVH_0-EabyRYApOU2Oh5JSTIua8,4146
79
- henchman_ai-0.1.10.dist-info/METADATA,sha256=0qO8cg63N87mlJLtC-TgGwhBFwyYen8mlvnM2aupdyk,3552
80
- henchman_ai-0.1.10.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
81
- henchman_ai-0.1.10.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
82
- henchman_ai-0.1.10.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
83
- henchman_ai-0.1.10.dist-info/RECORD,,
80
+ henchman_ai-0.1.11.dist-info/METADATA,sha256=jhhpjwZJDMJW2gdY1PXT8dULA0z9MKeqTcyqbd17Aos,3552
81
+ henchman_ai-0.1.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
82
+ henchman_ai-0.1.11.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
83
+ henchman_ai-0.1.11.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
84
+ henchman_ai-0.1.11.dist-info/RECORD,,