henchman-ai 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- henchman/cli/app.py +131 -22
- henchman/cli/commands/__init__.py +2 -0
- henchman/cli/commands/builtins.py +6 -0
- henchman/cli/commands/chat.py +50 -36
- henchman/cli/commands/rag.py +26 -20
- henchman/cli/console.py +11 -6
- henchman/cli/input.py +65 -0
- henchman/cli/prompts.py +171 -70
- henchman/cli/repl.py +191 -33
- henchman/core/turn.py +15 -9
- henchman/rag/concurrency.py +206 -0
- henchman/rag/repo_id.py +7 -7
- henchman/rag/store.py +45 -11
- henchman/rag/system.py +93 -7
- henchman/utils/compaction.py +4 -3
- henchman/version.py +1 -1
- {henchman_ai-0.1.10.dist-info → henchman_ai-0.1.12.dist-info}/METADATA +1 -1
- {henchman_ai-0.1.10.dist-info → henchman_ai-0.1.12.dist-info}/RECORD +21 -20
- {henchman_ai-0.1.10.dist-info → henchman_ai-0.1.12.dist-info}/WHEEL +0 -0
- {henchman_ai-0.1.10.dist-info → henchman_ai-0.1.12.dist-info}/entry_points.txt +0 -0
- {henchman_ai-0.1.10.dist-info → henchman_ai-0.1.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Concurrency utilities for RAG system.
|
|
2
|
+
|
|
3
|
+
This module provides locking and retry mechanisms to support
|
|
4
|
+
multiple concurrent instances of henchman using the RAG system.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import fcntl
|
|
10
|
+
import time
|
|
11
|
+
from functools import wraps
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional, Callable, TypeVar, Any
|
|
14
|
+
|
|
15
|
+
T = TypeVar('T')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LockTimeoutError(Exception):
|
|
19
|
+
"""Exception raised when a lock cannot be acquired within timeout."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, lock_path: str | Path, timeout: float):
|
|
22
|
+
self.lock_path = str(lock_path)
|
|
23
|
+
self.timeout = timeout
|
|
24
|
+
super().__init__(
|
|
25
|
+
f"Could not acquire lock at {lock_path} within {timeout} seconds"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RagLock:
|
|
30
|
+
"""File-based lock for RAG system operations.
|
|
31
|
+
|
|
32
|
+
This lock uses advisory file locking (fcntl) to prevent multiple
|
|
33
|
+
instances from performing RAG indexing simultaneously.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
lock_path: Path to the lock file.
|
|
37
|
+
lock_file: File object used for locking (if acquired).
|
|
38
|
+
acquired: Whether the lock is currently held.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, lock_path: Path | str):
|
|
42
|
+
"""Initialize the lock.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
lock_path: Path where the lock file should be created.
|
|
46
|
+
"""
|
|
47
|
+
self.lock_path = Path(lock_path)
|
|
48
|
+
self.lock_file: Optional[Any] = None
|
|
49
|
+
self._acquired = False
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def acquired(self) -> bool:
|
|
53
|
+
"""Check if the lock is currently acquired."""
|
|
54
|
+
return self._acquired
|
|
55
|
+
|
|
56
|
+
def acquire(self, timeout: float = 5.0) -> bool:
|
|
57
|
+
"""Attempt to acquire the lock.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
timeout: Maximum time to wait for lock (seconds).
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
True if lock was acquired, False if timeout was reached.
|
|
64
|
+
"""
|
|
65
|
+
if self._acquired:
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
start_time = time.time()
|
|
69
|
+
|
|
70
|
+
while time.time() - start_time < timeout:
|
|
71
|
+
try:
|
|
72
|
+
# Ensure parent directory exists
|
|
73
|
+
self.lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
|
|
75
|
+
# Open file for writing (creates if doesn't exist)
|
|
76
|
+
self.lock_file = open(self.lock_path, 'w')
|
|
77
|
+
|
|
78
|
+
# Try to acquire exclusive non-blocking lock
|
|
79
|
+
fcntl.flock(self.lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
80
|
+
|
|
81
|
+
self._acquired = True
|
|
82
|
+
return True
|
|
83
|
+
|
|
84
|
+
except (IOError, BlockingIOError):
|
|
85
|
+
# Lock is held by another process
|
|
86
|
+
if self.lock_file:
|
|
87
|
+
self.lock_file.close()
|
|
88
|
+
self.lock_file = None
|
|
89
|
+
|
|
90
|
+
# Wait a bit before retrying
|
|
91
|
+
time.sleep(min(0.1, timeout / 10))
|
|
92
|
+
|
|
93
|
+
# Timeout reached
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
def release(self) -> None:
|
|
97
|
+
"""Release the lock if it is held."""
|
|
98
|
+
if self._acquired and self.lock_file:
|
|
99
|
+
try:
|
|
100
|
+
fcntl.flock(self.lock_file, fcntl.LOCK_UN)
|
|
101
|
+
finally:
|
|
102
|
+
self.lock_file.close()
|
|
103
|
+
self.lock_file = None
|
|
104
|
+
self._acquired = False
|
|
105
|
+
|
|
106
|
+
def __enter__(self) -> RagLock:
|
|
107
|
+
"""Context manager entry."""
|
|
108
|
+
if not self.acquire():
|
|
109
|
+
raise LockTimeoutError(self.lock_path, 5.0)
|
|
110
|
+
return self
|
|
111
|
+
|
|
112
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
113
|
+
"""Context manager exit."""
|
|
114
|
+
self.release()
|
|
115
|
+
|
|
116
|
+
def __del__(self) -> None:
|
|
117
|
+
"""Destructor to ensure lock is released."""
|
|
118
|
+
self.release()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def acquire_rag_lock(lock_path: Path | str, timeout: float = 5.0) -> tuple[bool, Optional[RagLock]]:
|
|
122
|
+
"""Convenience function to acquire a RAG lock.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
lock_path: Path to the lock file.
|
|
126
|
+
timeout: Maximum time to wait for lock (seconds).
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Tuple of (success, lock) where success is True if lock
|
|
130
|
+
was acquired, and lock is the RagLock object if successful.
|
|
131
|
+
"""
|
|
132
|
+
lock = RagLock(lock_path)
|
|
133
|
+
if lock.acquire(timeout):
|
|
134
|
+
return True, lock
|
|
135
|
+
return False, None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def retry_on_locked(max_retries: int = 3, delay: float = 0.1) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
139
|
+
"""Decorator to retry operations on database lock errors.
|
|
140
|
+
|
|
141
|
+
This decorator catches exceptions that indicate a database is
|
|
142
|
+
locked (e.g., SQLITE_BUSY) and retries the operation after a delay.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
max_retries: Maximum number of retry attempts.
|
|
146
|
+
delay: Initial delay between retries (seconds).
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Decorated function that retries on lock errors.
|
|
150
|
+
"""
|
|
151
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
152
|
+
@wraps(func)
|
|
153
|
+
def wrapper(*args: Any, **kwargs: Any) -> T:
|
|
154
|
+
last_exception: Optional[Exception] = None
|
|
155
|
+
|
|
156
|
+
for attempt in range(max_retries):
|
|
157
|
+
try:
|
|
158
|
+
return func(*args, **kwargs)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
last_exception = e
|
|
161
|
+
|
|
162
|
+
# Check if this is a lock-related error
|
|
163
|
+
error_str = str(e).lower()
|
|
164
|
+
is_lock_error = any(
|
|
165
|
+
phrase in error_str
|
|
166
|
+
for phrase in [
|
|
167
|
+
"locked",
|
|
168
|
+
"sqlite_busy",
|
|
169
|
+
"resource temporarily unavailable",
|
|
170
|
+
"database is locked",
|
|
171
|
+
]
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if not is_lock_error or attempt == max_retries - 1:
|
|
175
|
+
raise
|
|
176
|
+
|
|
177
|
+
# Wait before retrying (exponential backoff)
|
|
178
|
+
wait_time = delay * (2 ** attempt)
|
|
179
|
+
time.sleep(min(wait_time, 1.0)) # Cap at 1 second
|
|
180
|
+
|
|
181
|
+
# This should never be reached due to the raise above
|
|
182
|
+
raise last_exception # type: ignore
|
|
183
|
+
|
|
184
|
+
return wrapper
|
|
185
|
+
return decorator
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def is_lock_error(exception: Exception) -> bool:
|
|
189
|
+
"""Check if an exception indicates a database lock error.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
exception: The exception to check.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
True if the exception indicates a lock error.
|
|
196
|
+
"""
|
|
197
|
+
error_str = str(exception).lower()
|
|
198
|
+
return any(
|
|
199
|
+
phrase in error_str
|
|
200
|
+
for phrase in [
|
|
201
|
+
"locked",
|
|
202
|
+
"sqlite_busy",
|
|
203
|
+
"resource temporarily unavailable",
|
|
204
|
+
"database is locked",
|
|
205
|
+
]
|
|
206
|
+
)
|
henchman/rag/repo_id.py
CHANGED
|
@@ -12,7 +12,7 @@ from pathlib import Path
|
|
|
12
12
|
from typing import TYPE_CHECKING
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
|
-
|
|
15
|
+
pass # No type-only imports currently needed
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def get_git_remote_url(git_root: Path) -> str | None:
|
|
@@ -90,11 +90,11 @@ def compute_repository_id(git_root: Path) -> str:
|
|
|
90
90
|
else:
|
|
91
91
|
# No remote, use path with git revision if available
|
|
92
92
|
revision = get_git_revision(git_root)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
93
|
+
base = (
|
|
94
|
+
f"{git_root.resolve()}:{revision}"
|
|
95
|
+
if revision
|
|
96
|
+
else str(git_root.resolve())
|
|
97
|
+
)
|
|
98
98
|
|
|
99
99
|
# Compute SHA256 hash
|
|
100
100
|
return hashlib.sha256(base.encode()).hexdigest()[:16] # 16 chars is enough
|
|
@@ -196,4 +196,4 @@ def migrate_old_index(git_root: Path, new_index_dir: Path) -> bool:
|
|
|
196
196
|
except Exception:
|
|
197
197
|
pass
|
|
198
198
|
|
|
199
|
-
return migrated
|
|
199
|
+
return migrated
|
henchman/rag/store.py
CHANGED
|
@@ -13,6 +13,8 @@ from typing import TYPE_CHECKING
|
|
|
13
13
|
import chromadb
|
|
14
14
|
from chromadb.config import Settings as ChromaSettings
|
|
15
15
|
|
|
16
|
+
from henchman.rag.concurrency import retry_on_locked
|
|
17
|
+
|
|
16
18
|
if TYPE_CHECKING:
|
|
17
19
|
from henchman.rag.chunker import Chunk
|
|
18
20
|
from henchman.rag.embedder import EmbeddingProvider
|
|
@@ -67,6 +69,7 @@ class VectorStore:
|
|
|
67
69
|
persist_path: Path | str,
|
|
68
70
|
embedder: EmbeddingProvider,
|
|
69
71
|
collection_name: str = "code_chunks",
|
|
72
|
+
max_retries: int = 3,
|
|
70
73
|
) -> None:
|
|
71
74
|
"""Initialize the vector store.
|
|
72
75
|
|
|
@@ -74,7 +77,10 @@ class VectorStore:
|
|
|
74
77
|
persist_path: Path to persist the vector store.
|
|
75
78
|
embedder: Embedding provider for query embedding.
|
|
76
79
|
collection_name: Name of the ChromaDB collection.
|
|
80
|
+
max_retries: Maximum retries for ChromaDB initialization.
|
|
77
81
|
"""
|
|
82
|
+
import time
|
|
83
|
+
|
|
78
84
|
self.persist_path = Path(persist_path)
|
|
79
85
|
self.embedder = embedder
|
|
80
86
|
self.collection_name = collection_name
|
|
@@ -82,18 +88,40 @@ class VectorStore:
|
|
|
82
88
|
# Ensure persist directory exists
|
|
83
89
|
self.persist_path.mkdir(parents=True, exist_ok=True)
|
|
84
90
|
|
|
85
|
-
# Initialize ChromaDB with persistence
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
name=collection_name,
|
|
94
|
-
metadata={"hnsw:space": "cosine"}, # Use cosine similarity
|
|
95
|
-
)
|
|
91
|
+
# Initialize ChromaDB with persistence and retry logic
|
|
92
|
+
last_error: Exception | None = None
|
|
93
|
+
for attempt in range(max_retries):
|
|
94
|
+
try:
|
|
95
|
+
self.client = chromadb.PersistentClient(
|
|
96
|
+
path=str(self.persist_path),
|
|
97
|
+
settings=ChromaSettings(anonymized_telemetry=False),
|
|
98
|
+
)
|
|
96
99
|
|
|
100
|
+
# Get or create collection
|
|
101
|
+
self.collection = self.client.get_or_create_collection(
|
|
102
|
+
name=collection_name,
|
|
103
|
+
metadata={"hnsw:space": "cosine"}, # Use cosine similarity
|
|
104
|
+
)
|
|
105
|
+
# Success - break out of retry loop
|
|
106
|
+
break
|
|
107
|
+
except Exception as e:
|
|
108
|
+
last_error = e
|
|
109
|
+
error_str = str(e).lower()
|
|
110
|
+
# Retry on HNSW/compactor errors (concurrent access issues)
|
|
111
|
+
if any(phrase in error_str for phrase in [
|
|
112
|
+
"hnsw", "compactor", "segment", "backfill", "locked"
|
|
113
|
+
]):
|
|
114
|
+
if attempt < max_retries - 1:
|
|
115
|
+
time.sleep(0.5 * (attempt + 1)) # Backoff
|
|
116
|
+
continue
|
|
117
|
+
# Re-raise non-retryable errors immediately
|
|
118
|
+
raise
|
|
119
|
+
else:
|
|
120
|
+
# All retries exhausted
|
|
121
|
+
if last_error:
|
|
122
|
+
raise last_error
|
|
123
|
+
|
|
124
|
+
@retry_on_locked(max_retries=3, delay=0.1)
|
|
97
125
|
def add_chunks(self, chunks: list[Chunk], embeddings: list[list[float]]) -> None:
|
|
98
126
|
"""Add chunks with their embeddings to the store.
|
|
99
127
|
|
|
@@ -119,6 +147,7 @@ class VectorStore:
|
|
|
119
147
|
],
|
|
120
148
|
)
|
|
121
149
|
|
|
150
|
+
@retry_on_locked(max_retries=3, delay=0.1)
|
|
122
151
|
def search(self, query: str, top_k: int = 5) -> list[SearchResult]:
|
|
123
152
|
"""Search for similar chunks.
|
|
124
153
|
|
|
@@ -168,6 +197,7 @@ class VectorStore:
|
|
|
168
197
|
|
|
169
198
|
return search_results
|
|
170
199
|
|
|
200
|
+
@retry_on_locked(max_retries=3, delay=0.1)
|
|
171
201
|
def delete_by_file(self, file_path: str) -> None:
|
|
172
202
|
"""Delete all chunks from a specific file.
|
|
173
203
|
|
|
@@ -183,6 +213,7 @@ class VectorStore:
|
|
|
183
213
|
if results["ids"]:
|
|
184
214
|
self.collection.delete(ids=results["ids"])
|
|
185
215
|
|
|
216
|
+
@retry_on_locked(max_retries=3, delay=0.1)
|
|
186
217
|
def delete_by_ids(self, chunk_ids: list[str]) -> None:
|
|
187
218
|
"""Delete chunks by their IDs.
|
|
188
219
|
|
|
@@ -192,6 +223,7 @@ class VectorStore:
|
|
|
192
223
|
if chunk_ids:
|
|
193
224
|
self.collection.delete(ids=chunk_ids)
|
|
194
225
|
|
|
226
|
+
@retry_on_locked(max_retries=3, delay=0.1)
|
|
195
227
|
def get_all_file_paths(self) -> set[str]:
|
|
196
228
|
"""Get all unique file paths in the store.
|
|
197
229
|
|
|
@@ -206,6 +238,7 @@ class VectorStore:
|
|
|
206
238
|
file_paths.add(str(metadata["file_path"]))
|
|
207
239
|
return file_paths
|
|
208
240
|
|
|
241
|
+
@retry_on_locked(max_retries=3, delay=0.1)
|
|
209
242
|
def count(self) -> int:
|
|
210
243
|
"""Get the total number of chunks in the store.
|
|
211
244
|
|
|
@@ -214,6 +247,7 @@ class VectorStore:
|
|
|
214
247
|
"""
|
|
215
248
|
return self.collection.count()
|
|
216
249
|
|
|
250
|
+
@retry_on_locked(max_retries=3, delay=0.1)
|
|
217
251
|
def clear(self) -> None:
|
|
218
252
|
"""Clear all chunks from the store."""
|
|
219
253
|
# Delete and recreate the collection
|
henchman/rag/system.py
CHANGED
|
@@ -6,6 +6,7 @@ the RAG system in the CLI.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
import asyncio
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import TYPE_CHECKING
|
|
11
12
|
|
|
@@ -17,6 +18,7 @@ if TYPE_CHECKING:
|
|
|
17
18
|
from henchman.rag.store import VectorStore
|
|
18
19
|
from henchman.tools.builtins.rag_search import RagSearchTool
|
|
19
20
|
|
|
21
|
+
from henchman.rag.concurrency import RagLock
|
|
20
22
|
from henchman.rag.repo_id import (
|
|
21
23
|
get_repository_index_dir,
|
|
22
24
|
get_repository_manifest_path,
|
|
@@ -59,12 +61,14 @@ class RagSystem:
|
|
|
59
61
|
self,
|
|
60
62
|
git_root: Path,
|
|
61
63
|
settings: RagSettings,
|
|
64
|
+
read_only: bool = False,
|
|
62
65
|
) -> None:
|
|
63
66
|
"""Initialize the RAG system.
|
|
64
67
|
|
|
65
68
|
Args:
|
|
66
69
|
git_root: Root directory of the git repository.
|
|
67
70
|
settings: RAG settings from configuration.
|
|
71
|
+
read_only: If True, skip indexing (for concurrent instances).
|
|
68
72
|
"""
|
|
69
73
|
from henchman.rag.chunker import TextChunker
|
|
70
74
|
from henchman.rag.embedder import FastEmbedProvider
|
|
@@ -74,14 +78,29 @@ class RagSystem:
|
|
|
74
78
|
|
|
75
79
|
self.git_root = git_root
|
|
76
80
|
self.settings = settings
|
|
81
|
+
self.read_only = read_only
|
|
82
|
+
self.is_indexing = False
|
|
77
83
|
|
|
78
84
|
# Get cache directory
|
|
79
85
|
cache_dir = Path(settings.cache_dir) if settings.cache_dir else None
|
|
80
|
-
|
|
86
|
+
|
|
81
87
|
# Get repository-specific index directory
|
|
82
88
|
self.index_dir = get_repository_index_dir(git_root, cache_dir)
|
|
83
89
|
self.manifest_path = get_repository_manifest_path(git_root, cache_dir)
|
|
84
90
|
|
|
91
|
+
# Initialize lock for this RAG index
|
|
92
|
+
self._lock = RagLock(self.index_dir / ".rag.lock")
|
|
93
|
+
self._init_lock_held = False
|
|
94
|
+
|
|
95
|
+
# Acquire lock during initialization to prevent ChromaDB conflicts
|
|
96
|
+
# This is especially important when multiple instances start simultaneously
|
|
97
|
+
if not read_only:
|
|
98
|
+
if self._lock.acquire(timeout=10.0):
|
|
99
|
+
self._init_lock_held = True
|
|
100
|
+
else:
|
|
101
|
+
# Another instance is initializing, switch to read-only mode
|
|
102
|
+
self.read_only = True
|
|
103
|
+
|
|
85
104
|
# Initialize embedder
|
|
86
105
|
self._embedder = FastEmbedProvider(model_name=settings.embedding_model)
|
|
87
106
|
|
|
@@ -115,6 +134,12 @@ class RagSystem:
|
|
|
115
134
|
top_k=settings.top_k,
|
|
116
135
|
)
|
|
117
136
|
|
|
137
|
+
# Release lock after initialization if we held it
|
|
138
|
+
# (indexing will re-acquire it)
|
|
139
|
+
if self._init_lock_held:
|
|
140
|
+
self._lock.release()
|
|
141
|
+
self._init_lock_held = False
|
|
142
|
+
|
|
118
143
|
@property
|
|
119
144
|
def store(self) -> VectorStore:
|
|
120
145
|
"""Get the vector store."""
|
|
@@ -134,17 +159,73 @@ class RagSystem:
|
|
|
134
159
|
self,
|
|
135
160
|
console: Console | None = None,
|
|
136
161
|
force: bool = False,
|
|
137
|
-
|
|
138
|
-
|
|
162
|
+
skip_if_locked: bool = True,
|
|
163
|
+
) -> IndexStats | None:
|
|
164
|
+
"""Run indexing operation with locking.
|
|
139
165
|
|
|
140
166
|
Args:
|
|
141
167
|
console: Rich console for progress display.
|
|
142
168
|
force: If True, force full reindex.
|
|
169
|
+
skip_if_locked: If True and lock cannot be acquired,
|
|
170
|
+
skip indexing and return None.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Statistics about the indexing operation, or None if
|
|
174
|
+
indexing was skipped due to lock contention.
|
|
175
|
+
"""
|
|
176
|
+
# Skip indexing if in read-only mode
|
|
177
|
+
if self.read_only:
|
|
178
|
+
if console:
|
|
179
|
+
console.print("[dim]RAG: Read-only mode, skipping indexing[/dim]")
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
# Try to acquire lock
|
|
183
|
+
if not self._lock.acquire(timeout=5.0):
|
|
184
|
+
if skip_if_locked:
|
|
185
|
+
if console:
|
|
186
|
+
console.print(
|
|
187
|
+
"[dim]RAG index is locked by another instance, "
|
|
188
|
+
"skipping indexing[/dim]"
|
|
189
|
+
)
|
|
190
|
+
return None
|
|
191
|
+
else:
|
|
192
|
+
# This would raise LockTimeoutError from the context manager
|
|
193
|
+
# if we were using `with self._lock:`
|
|
194
|
+
raise RuntimeError(
|
|
195
|
+
f"Could not acquire RAG lock at {self._lock.lock_path}"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
self.is_indexing = True
|
|
199
|
+
try:
|
|
200
|
+
# Run indexing with lock held
|
|
201
|
+
return self._indexer.index(console=console, force=force)
|
|
202
|
+
finally:
|
|
203
|
+
# Always release the lock
|
|
204
|
+
self._lock.release()
|
|
205
|
+
self.is_indexing = False
|
|
206
|
+
|
|
207
|
+
async def index_async(
|
|
208
|
+
self,
|
|
209
|
+
console: Console | None = None,
|
|
210
|
+
force: bool = False,
|
|
211
|
+
skip_if_locked: bool = True,
|
|
212
|
+
) -> IndexStats | None:
|
|
213
|
+
"""Run indexing operation asynchronously in a separate thread.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
console: Rich console for progress display.
|
|
217
|
+
force: If True, force full reindex.
|
|
218
|
+
skip_if_locked: If True and lock cannot be acquired,
|
|
219
|
+
skip indexing and return None.
|
|
143
220
|
|
|
144
221
|
Returns:
|
|
145
222
|
Statistics about the indexing operation.
|
|
146
223
|
"""
|
|
147
|
-
|
|
224
|
+
loop = asyncio.get_running_loop()
|
|
225
|
+
return await loop.run_in_executor(
|
|
226
|
+
None,
|
|
227
|
+
lambda: self.index(console=console, force=force, skip_if_locked=skip_if_locked)
|
|
228
|
+
)
|
|
148
229
|
|
|
149
230
|
def get_stats(self) -> IndexStats:
|
|
150
231
|
"""Get current index statistics.
|
|
@@ -166,6 +247,7 @@ def initialize_rag(
|
|
|
166
247
|
settings: RagSettings,
|
|
167
248
|
console: Console | None = None,
|
|
168
249
|
git_root: Path | None = None,
|
|
250
|
+
index: bool = True,
|
|
169
251
|
) -> RagSystem | None:
|
|
170
252
|
"""Initialize the RAG system if in a git repository.
|
|
171
253
|
|
|
@@ -173,6 +255,7 @@ def initialize_rag(
|
|
|
173
255
|
settings: RAG settings from configuration.
|
|
174
256
|
console: Rich console for output.
|
|
175
257
|
git_root: Optional pre-computed git root.
|
|
258
|
+
index: Whether to run indexing immediately (blocking).
|
|
176
259
|
|
|
177
260
|
Returns:
|
|
178
261
|
RagSystem instance if successful, None if not in a git repo
|
|
@@ -189,17 +272,20 @@ def initialize_rag(
|
|
|
189
272
|
# Check for and migrate old index
|
|
190
273
|
cache_dir = Path(settings.cache_dir) if settings.cache_dir else None
|
|
191
274
|
new_index_dir = get_repository_index_dir(root, cache_dir)
|
|
192
|
-
|
|
275
|
+
|
|
193
276
|
migrated = migrate_old_index(root, new_index_dir)
|
|
194
277
|
if migrated and console:
|
|
195
278
|
console.print(
|
|
196
279
|
"[dim]Migrated RAG index from project directory to "
|
|
197
|
-
|
|
280
|
+
"~/.henchman/rag_indices/[/dim]"
|
|
198
281
|
)
|
|
199
282
|
|
|
200
283
|
rag_system = RagSystem(git_root=root, settings=settings)
|
|
201
284
|
|
|
202
|
-
|
|
285
|
+
if not index:
|
|
286
|
+
return rag_system
|
|
287
|
+
|
|
288
|
+
# Run indexing (blocking)
|
|
203
289
|
stats = rag_system.index(console=console)
|
|
204
290
|
|
|
205
291
|
# Show summary
|
henchman/utils/compaction.py
CHANGED
|
@@ -259,13 +259,14 @@ class ContextCompactor:
|
|
|
259
259
|
)
|
|
260
260
|
protected_tokens = TokenCounter.count_messages(protected_msgs)
|
|
261
261
|
|
|
262
|
-
# Group unprotected messages into atomic sequences
|
|
263
|
-
sequences = self._group_into_sequences(unprotected_msgs)
|
|
264
|
-
|
|
265
262
|
# Separate system messages (always kept)
|
|
266
263
|
system_msgs = [msg for msg in unprotected_msgs if msg.role == "system"]
|
|
267
264
|
system_tokens = TokenCounter.count_messages(system_msgs)
|
|
268
265
|
|
|
266
|
+
# Group non-system unprotected messages into atomic sequences
|
|
267
|
+
non_system_unprotected = [msg for msg in unprotected_msgs if msg.role != "system"]
|
|
268
|
+
sequences = self._group_into_sequences(non_system_unprotected)
|
|
269
|
+
|
|
269
270
|
# Calculate budget for unprotected sequences
|
|
270
271
|
# Must fit: system + kept sequences + protected zone
|
|
271
272
|
budget = self.max_tokens - system_tokens - protected_tokens
|
henchman/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: henchman-ai
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
|
|
5
5
|
Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
|
|
6
6
|
Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
|
|
2
2
|
henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
|
|
3
|
-
henchman/version.py,sha256=
|
|
3
|
+
henchman/version.py,sha256=UFJFO9ixJBEALb9BGtb2TE9cid8MpfI03n3BvBeWoiA,161
|
|
4
4
|
henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
|
|
5
|
-
henchman/cli/app.py,sha256=
|
|
6
|
-
henchman/cli/console.py,sha256=
|
|
7
|
-
henchman/cli/input.py,sha256=
|
|
5
|
+
henchman/cli/app.py,sha256=ausKDDrRJ5KgiespK2P9vhX1yn-DxdJhYyBJ6tB5sb4,11507
|
|
6
|
+
henchman/cli/console.py,sha256=S4Jvq0UTmu9KtOkLNsIsvG_8X9eg1Guc6NAh8T_JeNI,8017
|
|
7
|
+
henchman/cli/input.py,sha256=oMKMF1CQCZrON5gqy8mtbYqIoGUvXcBEiDZeTxC9B6s,7129
|
|
8
8
|
henchman/cli/json_output.py,sha256=9kP9S5q0xBgP4HQGTT4P6DDT76F9VVTdEY_KiEpoZnI,2669
|
|
9
|
-
henchman/cli/prompts.py,sha256=
|
|
10
|
-
henchman/cli/repl.py,sha256=
|
|
9
|
+
henchman/cli/prompts.py,sha256=m3Velzi2tXBIHinN9jIpU9kDMYL80ngYQsv2EYo7IZU,6647
|
|
10
|
+
henchman/cli/repl.py,sha256=fkeaMEGnFaFZ-HIjBLI1DUfos9ebGkHrPUUNMWS_LLU,26535
|
|
11
11
|
henchman/cli/repl.py.backup,sha256=3iagruUgsvtcfpDv1mTAYg4I14X4CaNSEeMQjj91src,15638
|
|
12
12
|
henchman/cli/repl.py.backup2,sha256=-zgSUrnobd_sHq3jG-8NbwPTVlPc3FaqSkv32gAFdPo,11328
|
|
13
|
-
henchman/cli/commands/__init__.py,sha256=
|
|
14
|
-
henchman/cli/commands/builtins.py,sha256
|
|
15
|
-
henchman/cli/commands/chat.py,sha256=
|
|
13
|
+
henchman/cli/commands/__init__.py,sha256=8s6NBCPlc4jKTCdvnKJCmdLwRCQ4QLCARjQbr7ICipw,3828
|
|
14
|
+
henchman/cli/commands/builtins.py,sha256=-XOAY0EzvyHYnoOc6QMwVve7aMEWPYiMUUjor4OzBqg,5439
|
|
15
|
+
henchman/cli/commands/chat.py,sha256=ePPRh68ZHHS_l1Uj7fUtjBQrVKOx6WvZQsuIzXdxgjY,6204
|
|
16
16
|
henchman/cli/commands/extensions.py,sha256=r7PfvbBjwBr5WhF8G49p29z7FKx6geRJiR-R67pj6i0,1758
|
|
17
17
|
henchman/cli/commands/mcp.py,sha256=bbW1J9-fIpvDBIba3L1MAkNqCjFBTZnZLNIgf6LjJEA,3554
|
|
18
18
|
henchman/cli/commands/plan.py,sha256=5ZXePoMVIKBxugSnDB6N2TEDpl2xZszQDz9wTQffzpY,2486
|
|
19
|
-
henchman/cli/commands/rag.py,sha256=
|
|
19
|
+
henchman/cli/commands/rag.py,sha256=sXY7MCZ4UMVzNX2ALVM8wt7q82PZovwVHOSMDfot8jQ,7308
|
|
20
20
|
henchman/cli/commands/skill.py,sha256=azXb6-KXjtZKwHiBV-Ppk6CdJQKZhetr46hNgZ_r45Q,8096
|
|
21
21
|
henchman/cli/commands/unlimited.py,sha256=eFMTwrcUFWbfJnXpwBcRqviYt66tDz4xAYBDcton50Y,2101
|
|
22
22
|
henchman/config/__init__.py,sha256=28UtrhPye0MEmbdvi1jCqO3uIXfmqSAZVWvnpJv-qTo,637
|
|
@@ -28,7 +28,7 @@ henchman/core/agent.py,sha256=l9BJO8Zw4bMdUyTDjcZKG84WdZ1Kndm3Y09oUAZFYp0,13475
|
|
|
28
28
|
henchman/core/agent.py.backup,sha256=Tq0IhWAPMRQTxjETeH7WTosEmzuUVz7um0YbCnuNbLQ,7417
|
|
29
29
|
henchman/core/events.py,sha256=Uijv3NGNV8yJnQfY48u0pBBvEauAEczAbkGARJy8mfI,1423
|
|
30
30
|
henchman/core/session.py,sha256=NkwEG2ZS2uh2ZeX8_LkSN7MwQlBxwhTXjx0BoNUZLDw,12475
|
|
31
|
-
henchman/core/turn.py,sha256=
|
|
31
|
+
henchman/core/turn.py,sha256=iRaTr_8hGSGQUt0vqmUE8w5D-drvkLeZlNEHmNUfX0M,8617
|
|
32
32
|
henchman/extensions/__init__.py,sha256=C7LrK50uiHwmLlOGkQyngbFvuUYdCcZEb6ucOklY_ws,310
|
|
33
33
|
henchman/extensions/base.py,sha256=cHUzWu4OGFju9Wr1xAiGHZOOW7eQbcC1-dqEd2Oe3QM,2290
|
|
34
34
|
henchman/extensions/manager.py,sha256=xHxMo0-BxzyFfD3fZgvsiovORYpMM6sPnDPOTfULZSU,6666
|
|
@@ -47,11 +47,12 @@ henchman/providers/openai_compat.py.backup,sha256=Gmi5k1-DjUt8Kx5UaXmiSNKSDBGh0G
|
|
|
47
47
|
henchman/providers/registry.py,sha256=xsOaYuaemgDOOi-JLi6URbto0dQP77y-Lo__zzUuEGU,2758
|
|
48
48
|
henchman/rag/__init__.py,sha256=5Gbo7SZYPrZK8YLFn3wqfPJ_PlPV9uVHYy3NOGwjPok,1102
|
|
49
49
|
henchman/rag/chunker.py,sha256=3fc9OuGb7AgkT0Qy5fOQcwa3eCiJOcffAx133I2lfuQ,6040
|
|
50
|
+
henchman/rag/concurrency.py,sha256=-CQUm-N4K-xujSjLZAwwI1y3kdf8OLstBQ6T7KWuRoI,6689
|
|
50
51
|
henchman/rag/embedder.py,sha256=J2-cIEIoS2iUh4k6PM-rgl7wkTOXSG1NrOQvXHTQPho,4080
|
|
51
52
|
henchman/rag/indexer.py,sha256=6oVOkv4lD_elACivPL9Noe5zgpterYDZ3f1XlLyyULc,11806
|
|
52
|
-
henchman/rag/repo_id.py,sha256=
|
|
53
|
-
henchman/rag/store.py,sha256=
|
|
54
|
-
henchman/rag/system.py,sha256=
|
|
53
|
+
henchman/rag/repo_id.py,sha256=ZRPKM8fzwmETgrOYwE1PGjRp3c8XQFrR493BrDZlbd8,5755
|
|
54
|
+
henchman/rag/store.py,sha256=eN0Rj2Lo6zJp2iWCXsJ-q24l2T_pnlTF3Oeea60gnfs,8826
|
|
55
|
+
henchman/rag/system.py,sha256=uEftMJJCy0wlHvt09j2YP7xDdN0if8eCYD4MnS7_Xvc,9869
|
|
55
56
|
henchman/skills/__init__.py,sha256=cvCl6HRxsUdag-RTpMP__Ww_hee37ggpAXQ41wXemEU,149
|
|
56
57
|
henchman/skills/executor.py,sha256=sYss_83zduFLB_AACTSXMZHLA_lv-T1iKHSxelpv13U,1105
|
|
57
58
|
henchman/skills/learner.py,sha256=lzIrLU5_oLbqDYF673F-rwb1IaWeeOqjzcsBGC-IKlM,1644
|
|
@@ -72,12 +73,12 @@ henchman/tools/builtins/rag_search.py,sha256=yk0z0mIVRH-yl47uteNXTy76aXP8PLxBq51
|
|
|
72
73
|
henchman/tools/builtins/shell.py,sha256=Gx8x1jBq1NvERFnc-kUNMovFoWg_i4IrV_askSECfEM,4134
|
|
73
74
|
henchman/tools/builtins/web_fetch.py,sha256=uwgZm0ye3yDuS2U2DPV4D-8bjviYDTKN-cNi7mCMRpw,3370
|
|
74
75
|
henchman/utils/__init__.py,sha256=ayu2XRNx3Fw0z8vbIne63A3gBjxu779QE8sUQsjNnm4,240
|
|
75
|
-
henchman/utils/compaction.py,sha256=
|
|
76
|
+
henchman/utils/compaction.py,sha256=ARS0jUDI2adsoCTfJjygRom31N16QtWbRzNXDKzX6cA,22871
|
|
76
77
|
henchman/utils/retry.py,sha256=sobZk9LLGxglSJw_jeNaBYCrvH14YNFrBVyp_OwLWcw,4993
|
|
77
78
|
henchman/utils/tokens.py,sha256=D9H4ciFNH7l1b05IGbw0U0tmy2yF5aItFZyDufGF53k,5665
|
|
78
79
|
henchman/utils/validation.py,sha256=moj4LQXVXt2J-3_pWVH_0-EabyRYApOU2Oh5JSTIua8,4146
|
|
79
|
-
henchman_ai-0.1.
|
|
80
|
-
henchman_ai-0.1.
|
|
81
|
-
henchman_ai-0.1.
|
|
82
|
-
henchman_ai-0.1.
|
|
83
|
-
henchman_ai-0.1.
|
|
80
|
+
henchman_ai-0.1.12.dist-info/METADATA,sha256=Ht1dV7MGoqu7E2XAubPT9sZbNxFepLvA9jGQxw2KTas,3552
|
|
81
|
+
henchman_ai-0.1.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
82
|
+
henchman_ai-0.1.12.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
|
|
83
|
+
henchman_ai-0.1.12.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
|
|
84
|
+
henchman_ai-0.1.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|