PyPI - henchman-ai - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

henchman-ai 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

henchman/tools/builtins/glob_tool.py +4 -4
henchman/tools/builtins/grep.py +2 -5
henchman/tools/builtins/ls.py +6 -6
henchman/tools/builtins/shell.py +1 -1
henchman/utils/compaction.py +30 -12
henchman/utils/tokens.py +24 -0
henchman/version.py +1 -1
{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/METADATA +1 -1
{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/RECORD +12 -12
{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/WHEEL +0 -0
{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/entry_points.txt +0 -0
{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/licenses/LICENSE +0 -0

henchman/tools/builtins/glob_tool.py CHANGED Viewed

@@ -80,16 +80,16 @@ class GlobTool(Tool):
             # Use a generator approach to avoid loading all files into memory if possible
             # But glob() returns a generator anyway.
             matches_iter = base_path.glob(pattern)
             matches = []
             truncated = False
             try:
                 for _ in range(self.MAX_MATCHES + 1):
                     matches.append(next(matches_iter))
             except StopIteration:
                 pass
             if len(matches) > self.MAX_MATCHES:
                 truncated = True
                 matches = matches[:self.MAX_MATCHES]
@@ -108,7 +108,7 @@ class GlobTool(Tool):
                     results.append(str(rel_path))
                 except ValueError:  # pragma: no cover
                     results.append(str(match))
             if truncated:
                 results.append(f"... Output truncated (limit reached: {self.MAX_MATCHES} matches) ...")

henchman/tools/builtins/grep.py CHANGED Viewed

@@ -132,11 +132,8 @@ class GrepTool(Tool):
                     for i, line in enumerate(lines, 1):
                         if regex.search(line):
                             prefix = f"{file_path}:" if len(files) > 1 else ""
-                            if line_numbers:
-                                match_str = f"{prefix}{i}:{line}"
-                            else:
-                                match_str = f"{prefix}{line}"
+                            match_str = f"{prefix}{i}:{line}" if line_numbers else f"{prefix}{line}"
                             results.append(match_str)
                             total_chars += len(match_str) + 1  # +1 for newline

henchman/tools/builtins/ls.py CHANGED Viewed

@@ -86,24 +86,24 @@ class LsTool(Tool):
             # List directory contents
             entries = []
             truncated = False
             # Use iterdir() which returns an iterator
             iterator = target.iterdir()
-            # We can't sort immediately if we want to limit processing,
+            # We can't sort immediately if we want to limit processing,
             # but for consistent output on small dirs, sorting is better.
             # So we collect up to limit + 1
             all_items = []
             try:
                 for _ in range(self.MAX_ITEMS + 1):
                     all_items.append(next(iterator))
             except StopIteration:
                 pass
             if len(all_items) > self.MAX_ITEMS:
                 truncated = True
                 all_items = all_items[:self.MAX_ITEMS]
             # Sort the collected items
             all_items.sort(key=lambda p: p.name)
@@ -117,7 +117,7 @@ class LsTool(Tool):
                     entries.append(f"{item.name}/")
                 else:
                     entries.append(item.name)
             if truncated:
                 entries.append(f"... Output truncated (limit reached: {self.MAX_ITEMS} items) ...")

henchman/tools/builtins/shell.py CHANGED Viewed

@@ -114,7 +114,7 @@ class ShellTool(Tool):
                 output_parts.append(stderr_text)
             output = "\n".join(output_parts)
             # Truncate if too long
             if len(output) > self.MAX_OUTPUT_CHARS:
                 output = output[:self.MAX_OUTPUT_CHARS] + f"\n... (output truncated after {self.MAX_OUTPUT_CHARS} chars)"

henchman/utils/compaction.py CHANGED Viewed

@@ -68,9 +68,6 @@ class ContextCompactor:
     Preserves atomic sequences, especially tool call sequences.
     """
-    # Safety limit for individual message size
-    MAX_MESSAGE_CHARS = 100_000
     def __init__(self, max_tokens: int = 8000) -> None:
         """Initialize compactor.
@@ -81,10 +78,10 @@ class ContextCompactor:
         self.max_tokens = max_tokens
     def enforce_safety_limits(self, messages: list[Message]) -> list[Message]:
-        """Enforce hard safety limits on individual message content size.
+        """Enforce limits on individual message size using tokens.
         This prevents context overflow from individual massive messages
-        that might slip through tool-specific limits.
+        by truncating them to fit within the context window.
         Args:
             messages: List of messages to check.
@@ -93,11 +90,33 @@ class ContextCompactor:
             List of messages with content limits enforced.
         """
         safe_messages = []
+        # Reserve tokens for overhead/other messages.
+        # We use 75% of max_tokens to allow for message overhead, system prompts,
+        # and the truncation suffix itself.
+        limit = int(self.max_tokens * 0.75)
         for msg in messages:
-            # Check content length
-            if msg.content and len(msg.content) > self.MAX_MESSAGE_CHARS:
-                # Create a new message with truncated content
-                new_content = msg.content[:self.MAX_MESSAGE_CHARS] + f"\n... (truncated by safety limit: > {self.MAX_MESSAGE_CHARS} chars)"
+            if not msg.content:
+                safe_messages.append(msg)
+                continue
+            # Quick character check optimization:
+            # If chars < limit, tokens are definitely < limit (1 token >= 1 char usually)
+            # Actually, 1 token ~ 4 chars. So if chars < limit, it's definitely safe?
+            # No, if chars < limit, tokens could be anything.
+            # But if chars < limit (tokens), then tokens < limit is guaranteed since token count <= char count?
+            # Tiktoken: "hello" (5 chars) -> 1 token. " " (1 char) -> 1 token.
+            # Generally token count < char count.
+            # So if len(msg.content) < limit, we are safe.
+            if len(msg.content) < limit:
+                safe_messages.append(msg)
+                continue
+            # Check token count
+            if TokenCounter.count_text(msg.content) > limit:
+                # Truncate
+                truncated_content = TokenCounter.truncate_text(msg.content, limit)
+                new_content = truncated_content + f"\n... (truncated by safety limit: > {limit} tokens)"
                 # Create copy with modified content
                 safe_msg = Message(
@@ -111,7 +130,6 @@ class ContextCompactor:
                 safe_messages.append(msg)
         return safe_messages
     def _group_into_sequences(self, messages: list[Message]) -> list[MessageSequence]:
         """Group messages into atomic sequences that must be kept together.
@@ -183,7 +201,7 @@ class ContextCompactor:
         """
         if not messages:  # pragma: no cover
             return []
         # First, enforce safety limits on individual messages
         # This prevents massive messages from breaking the token counter or API
         messages = self.enforce_safety_limits(messages)
@@ -461,7 +479,7 @@ async def compact_with_summarization(
         return result
     # Identify dropped messages for summarization
-    kept_set = set(id(m) for m in result.messages)
+    kept_set = {id(m) for m in result.messages}
     dropped_messages = [m for m in messages if id(m) not in kept_set]
     # Attempt summarization if enabled and we have a provider

henchman/utils/tokens.py CHANGED Viewed

@@ -110,6 +110,30 @@ class TokenCounter:
         encoding = cls._get_encoding(model)
         return len(encoding.encode(text))
+    @classmethod
+    def truncate_text(cls, text: str, max_tokens: int, model: str | None = None) -> str:
+        """Truncate text to a maximum number of tokens.
+        Args:
+            text: The text to truncate.
+            max_tokens: Maximum number of tokens allowed.
+            model: Optional model name.
+        Returns:
+            The truncated text.
+        """
+        if not text:
+            return ""
+        encoding = cls._get_encoding(model)
+        tokens = encoding.encode(text)
+        if len(tokens) <= max_tokens:
+            return text
+        # Decode the truncated tokens
+        # Note: We don't handle partial unicode bytes here as tiktoken handles text -> tokens -> text
+        return encoding.decode(tokens[:max_tokens])
     @classmethod
     def count_messages(cls, messages: list[Message], model: str | None = None) -> int:
         """Count tokens in a list of messages.

henchman/version.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Version information for Henchman-AI."""
-VERSION_TUPLE = (0, 1, 6)
+VERSION_TUPLE = (0, 1, 7)
 VERSION = ".".join(str(v) for v in VERSION_TUPLE)
 __all__ = ["VERSION", "VERSION_TUPLE"]

{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: henchman-ai
-Version: 0.1.6
+Version: 0.1.7
 Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
 Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
 Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai

{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
 henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
-henchman/version.py,sha256=5Z3z01BWpsFAAqfIHjPWW4YpDhynfz8POxb252U_T2U,160
+henchman/version.py,sha256=__LbucVLec_Xjo5kM2xlVJLc9NovQpC_LE82ONoodKg,160
 henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
 henchman/cli/app.py,sha256=AFiMOfqYdwJrzcp5LRqwgwic2A6yhAUr_01w6BQwPq8,6097
 henchman/cli/console.py,sha256=TOuGBSNUaxxQypmmzC0P1IY7tBNlaTgAZesKy8uuZN4,7850
@@ -55,17 +55,17 @@ henchman/tools/builtins/ask_user.py,sha256=xPu74cB0rYahZHajVdjKgdmKU121SWyAgZSkU
 henchman/tools/builtins/file_edit.py,sha256=VjfpYVZulpIBufRSIsTx9eD5gYGnSybksyo5vGCL4wo,3709
 henchman/tools/builtins/file_read.py,sha256=RJCsK9Y-M2bd4IB8hnGaMjdzl62WSq7wOS9apcA3thA,4173
 henchman/tools/builtins/file_write.py,sha256=0vDAe6JAZHDdGIhSpf2q4ApxQ_DKL0L49_jfqogsiXo,2584
-henchman/tools/builtins/glob_tool.py,sha256=4zlPov-FONFHRFoe9Q49rDJNe_9E1jO-62IlEOZzHvU,3703
-henchman/tools/builtins/grep.py,sha256=r68Pm9wHwF2jqNuD5DcpPeIwM8a0YB2uz_ejm2KgksM,5455
-henchman/tools/builtins/ls.py,sha256=aSg_5D8zddLfio4I3p5EAS8QleRVaCa-laYWY8T1r2A,4232
-henchman/tools/builtins/shell.py,sha256=noDimK35cIc5PhXcSq5DV9h8D41c5DzFQfBzHlRII2M,4146
+henchman/tools/builtins/glob_tool.py,sha256=7NAlan5A6v-RWAIUj8ID78aYRSvXe9Jtt2I6ICzEcus,3651
+henchman/tools/builtins/grep.py,sha256=PV8X2ydnAutrWCS5VR9lABFpfSv0Olzsqa1Ktb5X4z0,5321
+henchman/tools/builtins/ls.py,sha256=5iSqHilrEiZ8ziOG4nKwC90fuLEx01V_0BzfS2PNAro,4167
+henchman/tools/builtins/shell.py,sha256=Gx8x1jBq1NvERFnc-kUNMovFoWg_i4IrV_askSECfEM,4134
 henchman/tools/builtins/web_fetch.py,sha256=uwgZm0ye3yDuS2U2DPV4D-8bjviYDTKN-cNi7mCMRpw,3370
 henchman/utils/__init__.py,sha256=tqyNdgGqZrcISSg2vBtMlVxsOvwaLo3zjqIk5f3QkhM,37
-henchman/utils/compaction.py,sha256=3lw-plxTX_YYdUs3PTGSlUaFAOrBbELCxfyc0q2HGLQ,16618
-henchman/utils/tokens.py,sha256=ortHhy6btO0JTCubeADIJXEfQK0X30mmssoTroYpsXs,4905
+henchman/utils/compaction.py,sha256=Urj7z8Q8p-S8Euk4Hf_6Q7Q4h_jw-KMiNco1ioOqoNo,17547
+henchman/utils/tokens.py,sha256=vzJTUT4qrwYqH46yW6bd4vaCPelmaua_TpEgLF7FamY,5673
 henchman/utils/validation.py,sha256=UNt2CQ3b1SOGyhJu8PrdMbxQ80MnykmlQJd7ANJnBKQ,3852
-henchman_ai-0.1.6.dist-info/METADATA,sha256=qShP4hvGxFvGdBHyclIvA1XCyz3cfM3d--wPbi7Oj_M,3492
-henchman_ai-0.1.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-henchman_ai-0.1.6.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
-henchman_ai-0.1.6.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
-henchman_ai-0.1.6.dist-info/RECORD,,
+henchman_ai-0.1.7.dist-info/METADATA,sha256=kJmH9DyuFUU4kbb-K7WZ3prI9N8nPP4ALLBqwCknxGU,3492
+henchman_ai-0.1.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+henchman_ai-0.1.7.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
+henchman_ai-0.1.7.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
+henchman_ai-0.1.7.dist-info/RECORD,,

{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{henchman_ai-0.1.6.dist-info → henchman_ai-0.1.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

henchman-ai 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

henchman-ai 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl