henchman-ai 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- henchman/tools/builtins/glob_tool.py +4 -4
- henchman/tools/builtins/grep.py +2 -5
- henchman/tools/builtins/ls.py +6 -6
- henchman/tools/builtins/shell.py +1 -1
- henchman/utils/compaction.py +30 -12
- henchman/utils/tokens.py +24 -0
- henchman/version.py +1 -1
- {henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/METADATA +1 -1
- {henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/RECORD +12 -12
- {henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/WHEEL +0 -0
- {henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/entry_points.txt +0 -0
- {henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/licenses/LICENSE +0 -0
|
@@ -80,16 +80,16 @@ class GlobTool(Tool):
|
|
|
80
80
|
# Use a generator approach to avoid loading all files into memory if possible
|
|
81
81
|
# But glob() returns a generator anyway.
|
|
82
82
|
matches_iter = base_path.glob(pattern)
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
matches = []
|
|
85
85
|
truncated = False
|
|
86
|
-
|
|
86
|
+
|
|
87
87
|
try:
|
|
88
88
|
for _ in range(self.MAX_MATCHES + 1):
|
|
89
89
|
matches.append(next(matches_iter))
|
|
90
90
|
except StopIteration:
|
|
91
91
|
pass
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
if len(matches) > self.MAX_MATCHES:
|
|
94
94
|
truncated = True
|
|
95
95
|
matches = matches[:self.MAX_MATCHES]
|
|
@@ -108,7 +108,7 @@ class GlobTool(Tool):
|
|
|
108
108
|
results.append(str(rel_path))
|
|
109
109
|
except ValueError: # pragma: no cover
|
|
110
110
|
results.append(str(match))
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
if truncated:
|
|
113
113
|
results.append(f"... Output truncated (limit reached: {self.MAX_MATCHES} matches) ...")
|
|
114
114
|
|
henchman/tools/builtins/grep.py
CHANGED
|
@@ -132,11 +132,8 @@ class GrepTool(Tool):
|
|
|
132
132
|
for i, line in enumerate(lines, 1):
|
|
133
133
|
if regex.search(line):
|
|
134
134
|
prefix = f"{file_path}:" if len(files) > 1 else ""
|
|
135
|
-
if line_numbers
|
|
136
|
-
|
|
137
|
-
else:
|
|
138
|
-
match_str = f"{prefix}{line}"
|
|
139
|
-
|
|
135
|
+
match_str = f"{prefix}{i}:{line}" if line_numbers else f"{prefix}{line}"
|
|
136
|
+
|
|
140
137
|
results.append(match_str)
|
|
141
138
|
total_chars += len(match_str) + 1 # +1 for newline
|
|
142
139
|
|
henchman/tools/builtins/ls.py
CHANGED
|
@@ -86,24 +86,24 @@ class LsTool(Tool):
|
|
|
86
86
|
# List directory contents
|
|
87
87
|
entries = []
|
|
88
88
|
truncated = False
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
# Use iterdir() which returns an iterator
|
|
91
91
|
iterator = target.iterdir()
|
|
92
|
-
# We can't sort immediately if we want to limit processing,
|
|
92
|
+
# We can't sort immediately if we want to limit processing,
|
|
93
93
|
# but for consistent output on small dirs, sorting is better.
|
|
94
94
|
# So we collect up to limit + 1
|
|
95
|
-
|
|
95
|
+
|
|
96
96
|
all_items = []
|
|
97
97
|
try:
|
|
98
98
|
for _ in range(self.MAX_ITEMS + 1):
|
|
99
99
|
all_items.append(next(iterator))
|
|
100
100
|
except StopIteration:
|
|
101
101
|
pass
|
|
102
|
-
|
|
102
|
+
|
|
103
103
|
if len(all_items) > self.MAX_ITEMS:
|
|
104
104
|
truncated = True
|
|
105
105
|
all_items = all_items[:self.MAX_ITEMS]
|
|
106
|
-
|
|
106
|
+
|
|
107
107
|
# Sort the collected items
|
|
108
108
|
all_items.sort(key=lambda p: p.name)
|
|
109
109
|
|
|
@@ -117,7 +117,7 @@ class LsTool(Tool):
|
|
|
117
117
|
entries.append(f"{item.name}/")
|
|
118
118
|
else:
|
|
119
119
|
entries.append(item.name)
|
|
120
|
-
|
|
120
|
+
|
|
121
121
|
if truncated:
|
|
122
122
|
entries.append(f"... Output truncated (limit reached: {self.MAX_ITEMS} items) ...")
|
|
123
123
|
|
henchman/tools/builtins/shell.py
CHANGED
|
@@ -114,7 +114,7 @@ class ShellTool(Tool):
|
|
|
114
114
|
output_parts.append(stderr_text)
|
|
115
115
|
|
|
116
116
|
output = "\n".join(output_parts)
|
|
117
|
-
|
|
117
|
+
|
|
118
118
|
# Truncate if too long
|
|
119
119
|
if len(output) > self.MAX_OUTPUT_CHARS:
|
|
120
120
|
output = output[:self.MAX_OUTPUT_CHARS] + f"\n... (output truncated after {self.MAX_OUTPUT_CHARS} chars)"
|
henchman/utils/compaction.py
CHANGED
|
@@ -68,9 +68,6 @@ class ContextCompactor:
|
|
|
68
68
|
|
|
69
69
|
Preserves atomic sequences, especially tool call sequences.
|
|
70
70
|
"""
|
|
71
|
-
|
|
72
|
-
# Safety limit for individual message size
|
|
73
|
-
MAX_MESSAGE_CHARS = 100_000
|
|
74
71
|
|
|
75
72
|
def __init__(self, max_tokens: int = 8000) -> None:
|
|
76
73
|
"""Initialize compactor.
|
|
@@ -81,10 +78,10 @@ class ContextCompactor:
|
|
|
81
78
|
self.max_tokens = max_tokens
|
|
82
79
|
|
|
83
80
|
def enforce_safety_limits(self, messages: list[Message]) -> list[Message]:
|
|
84
|
-
"""Enforce
|
|
81
|
+
"""Enforce limits on individual message size using tokens.
|
|
85
82
|
|
|
86
83
|
This prevents context overflow from individual massive messages
|
|
87
|
-
|
|
84
|
+
by truncating them to fit within the context window.
|
|
88
85
|
|
|
89
86
|
Args:
|
|
90
87
|
messages: List of messages to check.
|
|
@@ -93,11 +90,33 @@ class ContextCompactor:
|
|
|
93
90
|
List of messages with content limits enforced.
|
|
94
91
|
"""
|
|
95
92
|
safe_messages = []
|
|
93
|
+
# Reserve tokens for overhead/other messages.
|
|
94
|
+
# We use 75% of max_tokens to allow for message overhead, system prompts,
|
|
95
|
+
# and the truncation suffix itself.
|
|
96
|
+
limit = int(self.max_tokens * 0.75)
|
|
97
|
+
|
|
96
98
|
for msg in messages:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
99
|
+
if not msg.content:
|
|
100
|
+
safe_messages.append(msg)
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
# Quick character check optimization:
|
|
104
|
+
# If chars < limit, tokens are definitely < limit (1 token >= 1 char usually)
|
|
105
|
+
# Actually, 1 token ~ 4 chars. So if chars < limit, it's definitely safe?
|
|
106
|
+
# No, if chars < limit, tokens could be anything.
|
|
107
|
+
# But if chars < limit (tokens), then tokens < limit is guaranteed since token count <= char count?
|
|
108
|
+
# Tiktoken: "hello" (5 chars) -> 1 token. " " (1 char) -> 1 token.
|
|
109
|
+
# Generally token count < char count.
|
|
110
|
+
# So if len(msg.content) < limit, we are safe.
|
|
111
|
+
if len(msg.content) < limit:
|
|
112
|
+
safe_messages.append(msg)
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
# Check token count
|
|
116
|
+
if TokenCounter.count_text(msg.content) > limit:
|
|
117
|
+
# Truncate
|
|
118
|
+
truncated_content = TokenCounter.truncate_text(msg.content, limit)
|
|
119
|
+
new_content = truncated_content + f"\n... (truncated by safety limit: > {limit} tokens)"
|
|
101
120
|
|
|
102
121
|
# Create copy with modified content
|
|
103
122
|
safe_msg = Message(
|
|
@@ -111,7 +130,6 @@ class ContextCompactor:
|
|
|
111
130
|
safe_messages.append(msg)
|
|
112
131
|
|
|
113
132
|
return safe_messages
|
|
114
|
-
|
|
115
133
|
def _group_into_sequences(self, messages: list[Message]) -> list[MessageSequence]:
|
|
116
134
|
"""Group messages into atomic sequences that must be kept together.
|
|
117
135
|
|
|
@@ -183,7 +201,7 @@ class ContextCompactor:
|
|
|
183
201
|
"""
|
|
184
202
|
if not messages: # pragma: no cover
|
|
185
203
|
return []
|
|
186
|
-
|
|
204
|
+
|
|
187
205
|
# First, enforce safety limits on individual messages
|
|
188
206
|
# This prevents massive messages from breaking the token counter or API
|
|
189
207
|
messages = self.enforce_safety_limits(messages)
|
|
@@ -461,7 +479,7 @@ async def compact_with_summarization(
|
|
|
461
479
|
return result
|
|
462
480
|
|
|
463
481
|
# Identify dropped messages for summarization
|
|
464
|
-
kept_set =
|
|
482
|
+
kept_set = {id(m) for m in result.messages}
|
|
465
483
|
dropped_messages = [m for m in messages if id(m) not in kept_set]
|
|
466
484
|
|
|
467
485
|
# Attempt summarization if enabled and we have a provider
|
henchman/utils/tokens.py
CHANGED
|
@@ -110,6 +110,30 @@ class TokenCounter:
|
|
|
110
110
|
encoding = cls._get_encoding(model)
|
|
111
111
|
return len(encoding.encode(text))
|
|
112
112
|
|
|
113
|
+
@classmethod
|
|
114
|
+
def truncate_text(cls, text: str, max_tokens: int, model: str | None = None) -> str:
|
|
115
|
+
"""Truncate text to a maximum number of tokens.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
text: The text to truncate.
|
|
119
|
+
max_tokens: Maximum number of tokens allowed.
|
|
120
|
+
model: Optional model name.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
The truncated text.
|
|
124
|
+
"""
|
|
125
|
+
if not text:
|
|
126
|
+
return ""
|
|
127
|
+
|
|
128
|
+
encoding = cls._get_encoding(model)
|
|
129
|
+
tokens = encoding.encode(text)
|
|
130
|
+
if len(tokens) <= max_tokens:
|
|
131
|
+
return text
|
|
132
|
+
|
|
133
|
+
# Decode the truncated tokens
|
|
134
|
+
# Note: We don't handle partial unicode bytes here as tiktoken handles text -> tokens -> text
|
|
135
|
+
return encoding.decode(tokens[:max_tokens])
|
|
136
|
+
|
|
113
137
|
@classmethod
|
|
114
138
|
def count_messages(cls, messages: list[Message], model: str | None = None) -> int:
|
|
115
139
|
"""Count tokens in a list of messages.
|
henchman/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: henchman-ai
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
|
|
5
5
|
Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
|
|
6
6
|
Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
|
|
2
2
|
henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
|
|
3
|
-
henchman/version.py,sha256=
|
|
3
|
+
henchman/version.py,sha256=__LbucVLec_Xjo5kM2xlVJLc9NovQpC_LE82ONoodKg,160
|
|
4
4
|
henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
|
|
5
5
|
henchman/cli/app.py,sha256=AFiMOfqYdwJrzcp5LRqwgwic2A6yhAUr_01w6BQwPq8,6097
|
|
6
6
|
henchman/cli/console.py,sha256=TOuGBSNUaxxQypmmzC0P1IY7tBNlaTgAZesKy8uuZN4,7850
|
|
@@ -55,17 +55,17 @@ henchman/tools/builtins/ask_user.py,sha256=xPu74cB0rYahZHajVdjKgdmKU121SWyAgZSkU
|
|
|
55
55
|
henchman/tools/builtins/file_edit.py,sha256=VjfpYVZulpIBufRSIsTx9eD5gYGnSybksyo5vGCL4wo,3709
|
|
56
56
|
henchman/tools/builtins/file_read.py,sha256=RJCsK9Y-M2bd4IB8hnGaMjdzl62WSq7wOS9apcA3thA,4173
|
|
57
57
|
henchman/tools/builtins/file_write.py,sha256=0vDAe6JAZHDdGIhSpf2q4ApxQ_DKL0L49_jfqogsiXo,2584
|
|
58
|
-
henchman/tools/builtins/glob_tool.py,sha256=
|
|
59
|
-
henchman/tools/builtins/grep.py,sha256=
|
|
60
|
-
henchman/tools/builtins/ls.py,sha256=
|
|
61
|
-
henchman/tools/builtins/shell.py,sha256=
|
|
58
|
+
henchman/tools/builtins/glob_tool.py,sha256=7NAlan5A6v-RWAIUj8ID78aYRSvXe9Jtt2I6ICzEcus,3651
|
|
59
|
+
henchman/tools/builtins/grep.py,sha256=PV8X2ydnAutrWCS5VR9lABFpfSv0Olzsqa1Ktb5X4z0,5321
|
|
60
|
+
henchman/tools/builtins/ls.py,sha256=5iSqHilrEiZ8ziOG4nKwC90fuLEx01V_0BzfS2PNAro,4167
|
|
61
|
+
henchman/tools/builtins/shell.py,sha256=Gx8x1jBq1NvERFnc-kUNMovFoWg_i4IrV_askSECfEM,4134
|
|
62
62
|
henchman/tools/builtins/web_fetch.py,sha256=uwgZm0ye3yDuS2U2DPV4D-8bjviYDTKN-cNi7mCMRpw,3370
|
|
63
63
|
henchman/utils/__init__.py,sha256=tqyNdgGqZrcISSg2vBtMlVxsOvwaLo3zjqIk5f3QkhM,37
|
|
64
|
-
henchman/utils/compaction.py,sha256=
|
|
65
|
-
henchman/utils/tokens.py,sha256=
|
|
64
|
+
henchman/utils/compaction.py,sha256=Urj7z8Q8p-S8Euk4Hf_6Q7Q4h_jw-KMiNco1ioOqoNo,17547
|
|
65
|
+
henchman/utils/tokens.py,sha256=vzJTUT4qrwYqH46yW6bd4vaCPelmaua_TpEgLF7FamY,5673
|
|
66
66
|
henchman/utils/validation.py,sha256=UNt2CQ3b1SOGyhJu8PrdMbxQ80MnykmlQJd7ANJnBKQ,3852
|
|
67
|
-
henchman_ai-0.1.
|
|
68
|
-
henchman_ai-0.1.
|
|
69
|
-
henchman_ai-0.1.
|
|
70
|
-
henchman_ai-0.1.
|
|
71
|
-
henchman_ai-0.1.
|
|
67
|
+
henchman_ai-0.1.7.dist-info/METADATA,sha256=kJmH9DyuFUU4kbb-K7WZ3prI9N8nPP4ALLBqwCknxGU,3492
|
|
68
|
+
henchman_ai-0.1.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
69
|
+
henchman_ai-0.1.7.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
|
|
70
|
+
henchman_ai-0.1.7.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
|
|
71
|
+
henchman_ai-0.1.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|