henchman-ai 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,16 +80,16 @@ class GlobTool(Tool):
80
80
  # Use a generator approach to avoid loading all files into memory if possible
81
81
  # But glob() returns a generator anyway.
82
82
  matches_iter = base_path.glob(pattern)
83
-
83
+
84
84
  matches = []
85
85
  truncated = False
86
-
86
+
87
87
  try:
88
88
  for _ in range(self.MAX_MATCHES + 1):
89
89
  matches.append(next(matches_iter))
90
90
  except StopIteration:
91
91
  pass
92
-
92
+
93
93
  if len(matches) > self.MAX_MATCHES:
94
94
  truncated = True
95
95
  matches = matches[:self.MAX_MATCHES]
@@ -108,7 +108,7 @@ class GlobTool(Tool):
108
108
  results.append(str(rel_path))
109
109
  except ValueError: # pragma: no cover
110
110
  results.append(str(match))
111
-
111
+
112
112
  if truncated:
113
113
  results.append(f"... Output truncated (limit reached: {self.MAX_MATCHES} matches) ...")
114
114
 
@@ -132,11 +132,8 @@ class GrepTool(Tool):
132
132
  for i, line in enumerate(lines, 1):
133
133
  if regex.search(line):
134
134
  prefix = f"{file_path}:" if len(files) > 1 else ""
135
- if line_numbers:
136
- match_str = f"{prefix}{i}:{line}"
137
- else:
138
- match_str = f"{prefix}{line}"
139
-
135
+ match_str = f"{prefix}{i}:{line}" if line_numbers else f"{prefix}{line}"
136
+
140
137
  results.append(match_str)
141
138
  total_chars += len(match_str) + 1 # +1 for newline
142
139
 
@@ -86,24 +86,24 @@ class LsTool(Tool):
86
86
  # List directory contents
87
87
  entries = []
88
88
  truncated = False
89
-
89
+
90
90
  # Use iterdir() which returns an iterator
91
91
  iterator = target.iterdir()
92
- # We can't sort immediately if we want to limit processing,
92
+ # We can't sort immediately if we want to limit processing,
93
93
  # but for consistent output on small dirs, sorting is better.
94
94
  # So we collect up to limit + 1
95
-
95
+
96
96
  all_items = []
97
97
  try:
98
98
  for _ in range(self.MAX_ITEMS + 1):
99
99
  all_items.append(next(iterator))
100
100
  except StopIteration:
101
101
  pass
102
-
102
+
103
103
  if len(all_items) > self.MAX_ITEMS:
104
104
  truncated = True
105
105
  all_items = all_items[:self.MAX_ITEMS]
106
-
106
+
107
107
  # Sort the collected items
108
108
  all_items.sort(key=lambda p: p.name)
109
109
 
@@ -117,7 +117,7 @@ class LsTool(Tool):
117
117
  entries.append(f"{item.name}/")
118
118
  else:
119
119
  entries.append(item.name)
120
-
120
+
121
121
  if truncated:
122
122
  entries.append(f"... Output truncated (limit reached: {self.MAX_ITEMS} items) ...")
123
123
 
@@ -114,7 +114,7 @@ class ShellTool(Tool):
114
114
  output_parts.append(stderr_text)
115
115
 
116
116
  output = "\n".join(output_parts)
117
-
117
+
118
118
  # Truncate if too long
119
119
  if len(output) > self.MAX_OUTPUT_CHARS:
120
120
  output = output[:self.MAX_OUTPUT_CHARS] + f"\n... (output truncated after {self.MAX_OUTPUT_CHARS} chars)"
@@ -68,9 +68,6 @@ class ContextCompactor:
68
68
 
69
69
  Preserves atomic sequences, especially tool call sequences.
70
70
  """
71
-
72
- # Safety limit for individual message size
73
- MAX_MESSAGE_CHARS = 100_000
74
71
 
75
72
  def __init__(self, max_tokens: int = 8000) -> None:
76
73
  """Initialize compactor.
@@ -81,10 +78,10 @@ class ContextCompactor:
81
78
  self.max_tokens = max_tokens
82
79
 
83
80
  def enforce_safety_limits(self, messages: list[Message]) -> list[Message]:
84
- """Enforce hard safety limits on individual message content size.
81
+ """Enforce limits on individual message size using tokens.
85
82
 
86
83
  This prevents context overflow from individual massive messages
87
- that might slip through tool-specific limits.
84
+ by truncating them to fit within the context window.
88
85
 
89
86
  Args:
90
87
  messages: List of messages to check.
@@ -93,11 +90,33 @@ class ContextCompactor:
93
90
  List of messages with content limits enforced.
94
91
  """
95
92
  safe_messages = []
93
+ # Reserve tokens for overhead/other messages.
94
+ # We use 75% of max_tokens to allow for message overhead, system prompts,
95
+ # and the truncation suffix itself.
96
+ limit = int(self.max_tokens * 0.75)
97
+
96
98
  for msg in messages:
97
- # Check content length
98
- if msg.content and len(msg.content) > self.MAX_MESSAGE_CHARS:
99
- # Create a new message with truncated content
100
- new_content = msg.content[:self.MAX_MESSAGE_CHARS] + f"\n... (truncated by safety limit: > {self.MAX_MESSAGE_CHARS} chars)"
99
+ if not msg.content:
100
+ safe_messages.append(msg)
101
+ continue
102
+
103
+ # Quick character check optimization:
104
+ # If chars < limit, tokens are definitely < limit (1 token >= 1 char usually)
105
+ # Actually, 1 token ~ 4 chars. So if chars < limit, it's definitely safe?
106
+ # No, if chars < limit, tokens could be anything.
107
+ # But if chars < limit (tokens), then tokens < limit is guaranteed since token count <= char count?
108
+ # Tiktoken: "hello" (5 chars) -> 1 token. " " (1 char) -> 1 token.
109
+ # Generally token count < char count.
110
+ # So if len(msg.content) < limit, we are safe.
111
+ if len(msg.content) < limit:
112
+ safe_messages.append(msg)
113
+ continue
114
+
115
+ # Check token count
116
+ if TokenCounter.count_text(msg.content) > limit:
117
+ # Truncate
118
+ truncated_content = TokenCounter.truncate_text(msg.content, limit)
119
+ new_content = truncated_content + f"\n... (truncated by safety limit: > {limit} tokens)"
101
120
 
102
121
  # Create copy with modified content
103
122
  safe_msg = Message(
@@ -111,7 +130,6 @@ class ContextCompactor:
111
130
  safe_messages.append(msg)
112
131
 
113
132
  return safe_messages
114
-
115
133
  def _group_into_sequences(self, messages: list[Message]) -> list[MessageSequence]:
116
134
  """Group messages into atomic sequences that must be kept together.
117
135
 
@@ -183,7 +201,7 @@ class ContextCompactor:
183
201
  """
184
202
  if not messages: # pragma: no cover
185
203
  return []
186
-
204
+
187
205
  # First, enforce safety limits on individual messages
188
206
  # This prevents massive messages from breaking the token counter or API
189
207
  messages = self.enforce_safety_limits(messages)
@@ -461,7 +479,7 @@ async def compact_with_summarization(
461
479
  return result
462
480
 
463
481
  # Identify dropped messages for summarization
464
- kept_set = set(id(m) for m in result.messages)
482
+ kept_set = {id(m) for m in result.messages}
465
483
  dropped_messages = [m for m in messages if id(m) not in kept_set]
466
484
 
467
485
  # Attempt summarization if enabled and we have a provider
henchman/utils/tokens.py CHANGED
@@ -110,6 +110,30 @@ class TokenCounter:
110
110
  encoding = cls._get_encoding(model)
111
111
  return len(encoding.encode(text))
112
112
 
113
+ @classmethod
114
+ def truncate_text(cls, text: str, max_tokens: int, model: str | None = None) -> str:
115
+ """Truncate text to a maximum number of tokens.
116
+
117
+ Args:
118
+ text: The text to truncate.
119
+ max_tokens: Maximum number of tokens allowed.
120
+ model: Optional model name.
121
+
122
+ Returns:
123
+ The truncated text.
124
+ """
125
+ if not text:
126
+ return ""
127
+
128
+ encoding = cls._get_encoding(model)
129
+ tokens = encoding.encode(text)
130
+ if len(tokens) <= max_tokens:
131
+ return text
132
+
133
+ # Decode the truncated tokens
134
+ # Note: We don't handle partial unicode bytes here as tiktoken handles text -> tokens -> text
135
+ return encoding.decode(tokens[:max_tokens])
136
+
113
137
  @classmethod
114
138
  def count_messages(cls, messages: list[Message], model: str | None = None) -> int:
115
139
  """Count tokens in a list of messages.
henchman/version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Version information for Henchman-AI."""
2
2
 
3
- VERSION_TUPLE = (0, 1, 6)
3
+ VERSION_TUPLE = (0, 1, 7)
4
4
  VERSION = ".".join(str(v) for v in VERSION_TUPLE)
5
5
 
6
6
  __all__ = ["VERSION", "VERSION_TUPLE"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: henchman-ai
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
5
5
  Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
6
6
  Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai
@@ -1,6 +1,6 @@
1
1
  henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
2
2
  henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
3
- henchman/version.py,sha256=5Z3z01BWpsFAAqfIHjPWW4YpDhynfz8POxb252U_T2U,160
3
+ henchman/version.py,sha256=__LbucVLec_Xjo5kM2xlVJLc9NovQpC_LE82ONoodKg,160
4
4
  henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
5
5
  henchman/cli/app.py,sha256=AFiMOfqYdwJrzcp5LRqwgwic2A6yhAUr_01w6BQwPq8,6097
6
6
  henchman/cli/console.py,sha256=TOuGBSNUaxxQypmmzC0P1IY7tBNlaTgAZesKy8uuZN4,7850
@@ -55,17 +55,17 @@ henchman/tools/builtins/ask_user.py,sha256=xPu74cB0rYahZHajVdjKgdmKU121SWyAgZSkU
55
55
  henchman/tools/builtins/file_edit.py,sha256=VjfpYVZulpIBufRSIsTx9eD5gYGnSybksyo5vGCL4wo,3709
56
56
  henchman/tools/builtins/file_read.py,sha256=RJCsK9Y-M2bd4IB8hnGaMjdzl62WSq7wOS9apcA3thA,4173
57
57
  henchman/tools/builtins/file_write.py,sha256=0vDAe6JAZHDdGIhSpf2q4ApxQ_DKL0L49_jfqogsiXo,2584
58
- henchman/tools/builtins/glob_tool.py,sha256=4zlPov-FONFHRFoe9Q49rDJNe_9E1jO-62IlEOZzHvU,3703
59
- henchman/tools/builtins/grep.py,sha256=r68Pm9wHwF2jqNuD5DcpPeIwM8a0YB2uz_ejm2KgksM,5455
60
- henchman/tools/builtins/ls.py,sha256=aSg_5D8zddLfio4I3p5EAS8QleRVaCa-laYWY8T1r2A,4232
61
- henchman/tools/builtins/shell.py,sha256=noDimK35cIc5PhXcSq5DV9h8D41c5DzFQfBzHlRII2M,4146
58
+ henchman/tools/builtins/glob_tool.py,sha256=7NAlan5A6v-RWAIUj8ID78aYRSvXe9Jtt2I6ICzEcus,3651
59
+ henchman/tools/builtins/grep.py,sha256=PV8X2ydnAutrWCS5VR9lABFpfSv0Olzsqa1Ktb5X4z0,5321
60
+ henchman/tools/builtins/ls.py,sha256=5iSqHilrEiZ8ziOG4nKwC90fuLEx01V_0BzfS2PNAro,4167
61
+ henchman/tools/builtins/shell.py,sha256=Gx8x1jBq1NvERFnc-kUNMovFoWg_i4IrV_askSECfEM,4134
62
62
  henchman/tools/builtins/web_fetch.py,sha256=uwgZm0ye3yDuS2U2DPV4D-8bjviYDTKN-cNi7mCMRpw,3370
63
63
  henchman/utils/__init__.py,sha256=tqyNdgGqZrcISSg2vBtMlVxsOvwaLo3zjqIk5f3QkhM,37
64
- henchman/utils/compaction.py,sha256=3lw-plxTX_YYdUs3PTGSlUaFAOrBbELCxfyc0q2HGLQ,16618
65
- henchman/utils/tokens.py,sha256=ortHhy6btO0JTCubeADIJXEfQK0X30mmssoTroYpsXs,4905
64
+ henchman/utils/compaction.py,sha256=Urj7z8Q8p-S8Euk4Hf_6Q7Q4h_jw-KMiNco1ioOqoNo,17547
65
+ henchman/utils/tokens.py,sha256=vzJTUT4qrwYqH46yW6bd4vaCPelmaua_TpEgLF7FamY,5673
66
66
  henchman/utils/validation.py,sha256=UNt2CQ3b1SOGyhJu8PrdMbxQ80MnykmlQJd7ANJnBKQ,3852
67
- henchman_ai-0.1.6.dist-info/METADATA,sha256=qShP4hvGxFvGdBHyclIvA1XCyz3cfM3d--wPbi7Oj_M,3492
68
- henchman_ai-0.1.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
69
- henchman_ai-0.1.6.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
70
- henchman_ai-0.1.6.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
71
- henchman_ai-0.1.6.dist-info/RECORD,,
67
+ henchman_ai-0.1.7.dist-info/METADATA,sha256=kJmH9DyuFUU4kbb-K7WZ3prI9N8nPP4ALLBqwCknxGU,3492
68
+ henchman_ai-0.1.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
69
+ henchman_ai-0.1.7.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
70
+ henchman_ai-0.1.7.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
71
+ henchman_ai-0.1.7.dist-info/RECORD,,