PyPI - pythonclaw - Versions diffs - 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl - Mend

pythonclaw 0.6.4py3-none-any.whl → 0.6.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

pythonclaw/__init__.py +1 -1
pythonclaw/channels/discord_bot.py +79 -1
pythonclaw/channels/telegram_bot.py +99 -11
pythonclaw/channels/whatsapp_bot.py +69 -0
pythonclaw/config.py +25 -0
pythonclaw/core/agent.py +6 -3
pythonclaw/core/skillhub.py +206 -78
pythonclaw/core/stt.py +156 -0
pythonclaw/core/tools.py +56 -1
pythonclaw/init.py +2 -1
pythonclaw/scheduler/cron.py +3 -1
pythonclaw/templates/skills/data/pdf_convert/SKILL.md +65 -0
pythonclaw/templates/skills/data/pdf_convert/convert_pdf.py +187 -0
pythonclaw/templates/skills/data/pdf_merge/SKILL.md +52 -0
pythonclaw/templates/skills/data/pdf_merge/merge_pdf.py +115 -0
pythonclaw/templates/skills/data/pdf_protect/SKILL.md +65 -0
pythonclaw/templates/skills/data/pdf_protect/protect_pdf.py +140 -0
pythonclaw/templates/skills/data/pdf_split/SKILL.md +55 -0
pythonclaw/templates/skills/data/pdf_split/split_pdf.py +109 -0
pythonclaw/templates/skills/data/pdf_writer/SKILL.md +61 -0
pythonclaw/templates/skills/data/pdf_writer/write_pdf.py +138 -0
pythonclaw/web/app.py +68 -42
{pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/METADATA +2 -1
{pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/RECORD +28 -17
{pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/WHEEL +0 -0
{pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/entry_points.txt +0 -0
{pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/licenses/LICENSE +0 -0
{pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/top_level.txt +0 -0

pythonclaw/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ from .core.llm.base import LLMProvider
 from .core.llm.openai_compatible import OpenAICompatibleProvider
 from .init import init
-__version__ = "0.5.0"
+__version__ = "0.6.6"
 __all__ = [
     "Agent",
     "LLMProvider",

pythonclaw/channels/discord_bot.py CHANGED Viewed

@@ -31,6 +31,7 @@ whitelisted channels).
 from __future__ import annotations
+import asyncio
 import base64
 import logging
 from typing import TYPE_CHECKING
@@ -142,6 +143,16 @@ class DiscordBot:
                 a.content_type and a.content_type.startswith("image/")
                 for a in message.attachments
             )
+            has_audio = any(
+                a.content_type and a.content_type.startswith("audio/")
+                for a in message.attachments
+            )
+            if has_audio and not content:
+                transcript = await self._transcribe_audio(message)
+                if transcript is None:
+                    return
+                content = transcript
             if not content and not has_image:
                 return
@@ -157,6 +168,9 @@ class DiscordBot:
                 hint = content[len("!compact"):].strip() or None
                 await self._cmd_compact(message, is_dm, hint)
                 return
+            if content.startswith("!clear_files"):
+                await self._cmd_clear_files(message)
+                return
             chat_input = content or ""
             if has_image:
@@ -188,6 +202,40 @@ class DiscordBot:
                     logger.warning("[Discord] Failed to download attachment %s", att.filename)
         return parts
+    # ── Voice / audio handling ─────────────────────────────────────────────────
+    @staticmethod
+    async def _transcribe_audio(message: discord.Message) -> str | None:
+        """Download the first audio attachment and transcribe via Deepgram."""
+        from ..core.stt import no_key_message, transcribe_bytes_async
+        for att in message.attachments:
+            if att.content_type and att.content_type.startswith("audio/"):
+                try:
+                    data = await att.read()
+                except Exception:
+                    logger.warning("[Discord] Failed to download audio %s", att.filename)
+                    return None
+                mime = att.content_type.split(";")[0]
+                try:
+                    transcript = await transcribe_bytes_async(data, mime)
+                except Exception as exc:
+                    logger.warning("[Discord] Deepgram failed: %s", exc)
+                    await message.reply(f"Voice transcription failed: {exc}")
+                    return None
+                if transcript is None:
+                    await message.reply(no_key_message())
+                    return None
+                if not transcript.strip():
+                    await message.reply("Could not recognise any speech in the audio.")
+                    return None
+                logger.info("[Discord] Audio transcribed: %s", transcript[:80])
+                return transcript
+        return None
     # ── Command implementations ───────────────────────────────────────────────
     async def _cmd_reset(self, message: discord.Message, is_dm: bool) -> None:
@@ -213,6 +261,11 @@ class DiscordBot:
         )
         await message.reply(status)
+    async def _cmd_clear_files(self, message: discord.Message) -> None:
+        from .. import config as _cfg
+        count = _cfg.clear_files()
+        await message.reply(f"Cleared {count} file(s) from the downloads folder.")
     async def _cmd_compact(self, message: discord.Message, is_dm: bool, hint: str | None) -> None:
         sid = self._session_id(message.author.id if is_dm else message.channel.id, is_dm)
         agent = self._sm.get_or_create(sid)
@@ -240,8 +293,8 @@ class DiscordBot:
         async with message.channel.typing():
             try:
                 async with self._sm.acquire(sid):
-                    import asyncio
                     loop = asyncio.get_event_loop()
+                    self._register_file_sender(loop, message.channel)
                     response = await loop.run_in_executor(None, agent.chat, content)
             except Exception as exc:
                 logger.exception("[Discord] Agent.chat() raised an exception")
@@ -249,6 +302,31 @@ class DiscordBot:
         for chunk in self._split_message(response or "(no response)"):
             await message.reply(chunk)
+    # ── File sending ──────────────────────────────────────────────────────────
+    def _register_file_sender(
+        self,
+        loop: asyncio.AbstractEventLoop,
+        channel: discord.abc.Messageable,
+    ) -> None:
+        """Register a sync callback so the Agent can send files via Discord."""
+        from ..core.tools import set_file_sender
+        def _sender(path: str, caption: str = "") -> None:
+            async def _do_send():
+                try:
+                    await channel.send(
+                        content=caption[:2000] if caption else None,
+                        file=discord.File(path),
+                    )
+                except Exception as exc:
+                    logger.warning("[Discord] send_file failed: %s", exc)
+            future = asyncio.run_coroutine_threadsafe(_do_send(), loop)
+            future.result(timeout=60)
+        set_file_sender(_sender)
     # ── Lifecycle ─────────────────────────────────────────────────────────────
     async def start_async(self) -> None:

pythonclaw/channels/telegram_bot.py CHANGED Viewed

@@ -144,7 +144,8 @@ class TelegramBot:
             "  /start          \u2014 show this message\n"
             "  /reset          \u2014 start a fresh session\n"
             "  /status         \u2014 show session info\n"
-            "  /compact [hint] \u2014 compact conversation history"
+            "  /compact [hint] \u2014 compact conversation history\n"
+            "  /clear_files    \u2014 delete all downloaded files"
         )
     async def _cmd_reset(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -186,6 +187,13 @@ class TelegramBot:
         for chunk in _split_message(result):
             await update.message.reply_text(chunk)
+    async def _cmd_clear_files(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+        if not await self._check_access(update, context):
+            return
+        from .. import config as _cfg
+        count = _cfg.clear_files()
+        await update.message.reply_text(f"Cleared {count} file(s) from the downloads folder.")
     # ── Message handler (text + photos) ───────────────────────────────────────
     async def _handle_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -200,6 +208,13 @@ class TelegramBot:
         user_text = self._strip_mention(user_text)
         has_photo = bool(update.message.photo)
+        has_voice = bool(update.message.voice or update.message.audio)
+        if has_voice:
+            transcript = await self._transcribe_voice(update)
+            if transcript is None:
+                return
+            user_text = transcript
         if not user_text and not has_photo:
             return
@@ -229,6 +244,8 @@ class TelegramBot:
         try:
             async with self._sm.acquire(sid):
                 loop = asyncio.get_event_loop()
+                chat_id = update.effective_chat.id
+                self._register_file_sender(loop, chat_id)
                 future = loop.run_in_executor(
                     None, agent.chat_stream, chat_input, token_queue.put,
                 )
@@ -293,21 +310,27 @@ class TelegramBot:
             text = _clean_response(raw)
             now = time.monotonic()
-            if text and text != live_text and (now - last_edit) >= THROTTLE:
+            # Only show up to the last paragraph break while streaming;
+            # the trailing incomplete line is held back to avoid flashing
+            # progress narration that will be stripped later.
+            last_break = text.rfind("\n\n")
+            display = text[:last_break].rstrip() if last_break > 0 else ""
+            if display and display != live_text and (now - last_edit) >= THROTTLE:
                 try:
                     if live_msg is None:
                         live_msg = await update.message.reply_text(
-                            text[:4096],
+                            display[:4096],
                         )
-                        live_text = text[:4096]
-                    elif len(text) <= 4096:
-                        await live_msg.edit_text(text)
-                        live_text = text
+                        live_text = display[:4096]
+                    elif len(display) <= 4096:
+                        await live_msg.edit_text(display)
+                        live_text = display
                     else:
-                        await live_msg.edit_text(live_text)
+                        await live_msg.edit_text(display[:4096])
                         live_msg = None
                         live_text = ""
-                        buf = [text[len(live_text):] if live_text else text]
+                        buf = [display[4096:] + text[last_break:]]
                     sent_any = True
                 except Exception:
                     pass
@@ -345,6 +368,29 @@ class TelegramBot:
             for chunk in _split_message(text):
                 await update.message.reply_text(chunk)
+    def _register_file_sender(self, loop: asyncio.AbstractEventLoop, chat_id: int) -> None:
+        """Register a sync callback so the Agent can send files via Telegram."""
+        from ..core.tools import set_file_sender
+        bot_app = self._app
+        def _sender(path: str, caption: str = "") -> None:
+            async def _do_send():
+                try:
+                    with open(path, "rb") as f:
+                        await bot_app.bot.send_document(
+                            chat_id=chat_id,
+                            document=f,
+                            caption=caption[:1024] if caption else None,
+                        )
+                except Exception as exc:
+                    logger.warning("[Telegram] send_file failed: %s", exc)
+            future = asyncio.run_coroutine_threadsafe(_do_send(), loop)
+            future.result(timeout=60)
+        set_file_sender(_sender)
     async def _build_image_input(self, update: Update, caption: str) -> list:
         """Download photo and build a multimodal content array."""
         photo = update.message.photo[-1]  # highest resolution
@@ -362,6 +408,37 @@ class TelegramBot:
             },
         ]
+    async def _transcribe_voice(self, update: Update) -> str | None:
+        """Download a voice/audio message and transcribe via Deepgram.
+        Returns the transcript text, or sends a hint to the user and
+        returns ``None`` if Deepgram is not configured.
+        """
+        from ..core.stt import no_key_message, transcribe_bytes_async
+        voice = update.message.voice or update.message.audio
+        tg_file = await voice.get_file()
+        audio_bytes = bytes(await tg_file.download_as_bytearray())
+        mime = voice.mime_type or "audio/ogg"
+        try:
+            transcript = await transcribe_bytes_async(audio_bytes, mime)
+        except Exception as exc:
+            logger.warning("[Telegram] Deepgram transcription failed: %s", exc)
+            await update.message.reply_text(f"Voice transcription failed: {exc}")
+            return None
+        if transcript is None:
+            await update.message.reply_text(no_key_message())
+            return None
+        if not transcript.strip():
+            await update.message.reply_text("Could not recognise any speech in the audio.")
+            return None
+        logger.info("[Telegram] Voice transcribed: %s", transcript[:80])
+        return transcript
     async def _keep_typing(self, chat_id: int) -> None:
         """Re-send the 'typing' chat action every 4 s until cancelled."""
         try:
@@ -380,6 +457,7 @@ class TelegramBot:
         BotCommand("reset", "Start a fresh session"),
         BotCommand("status", "Show session info"),
         BotCommand("compact", "Compact conversation history"),
+        BotCommand("clear_files", "Delete all downloaded files"),
     ]
     def build_application(self) -> Application:
@@ -388,8 +466,10 @@ class TelegramBot:
         app.add_handler(CommandHandler("reset", self._cmd_reset))
         app.add_handler(CommandHandler("status", self._cmd_status))
         app.add_handler(CommandHandler("compact", self._cmd_compact))
+        app.add_handler(CommandHandler("clear_files", self._cmd_clear_files))
         app.add_handler(MessageHandler(
-            (filters.TEXT | filters.PHOTO) & ~filters.COMMAND,
+            (filters.TEXT | filters.PHOTO | filters.VOICE | filters.AUDIO)
+            & ~filters.COMMAND,
             self._handle_message,
         ))
         self._app = app
@@ -443,10 +523,18 @@ _LEAKED_TOOL_RE = re.compile(
 )
+_PROGRESS_LINE_RE = re.compile(r'\n\n.{0,60}[：:]\s*\n\n')
 def _clean_response(text: str) -> str:
-    """Strip leaked tool-call XML/DSML markup from LLM output."""
+    """Strip leaked tool-call XML/DSML markup and excess whitespace."""
     text = _LEAKED_TOOL_RE.sub('', text)
     text = re.sub(r'\n{3,}', '\n\n', text)
+    for _ in range(10):
+        cleaned = _PROGRESS_LINE_RE.sub('\n\n', text)
+        if cleaned == text:
+            break
+        text = cleaned
     return text.strip()

pythonclaw/channels/whatsapp_bot.py CHANGED Viewed

@@ -98,6 +98,24 @@ class WhatsAppBot:
             self._locks[session_id] = threading.Lock()
         return self._locks[session_id]
+    # ── File sending ──────────────────────────────────────────────────────────
+    def _register_file_sender(self, client, wa_id: str) -> None:
+        """Register a sync callback so the Agent can send files via WhatsApp."""
+        from ..core.tools import set_file_sender
+        def _sender(path: str, caption: str = "") -> None:
+            try:
+                client.send_document(
+                    to=wa_id,
+                    document=path,
+                    caption=caption[:1024] if caption else None,
+                )
+            except Exception as exc:
+                logger.warning("[WhatsApp] send_file failed: %s", exc)
+        set_file_sender(_sender)
     # ── Mount on FastAPI ──────────────────────────────────────────────────────
     def mount(self, app: "FastAPI") -> None:
@@ -133,6 +151,10 @@ class WhatsAppBot:
             text = (msg.text or "").strip()
             has_image = msg.has_media and getattr(msg, "image", None) is not None
+            has_audio = msg.has_media and (
+                getattr(msg, "audio", None) is not None
+                or getattr(msg, "voice", None) is not None
+            )
             # Group mention check
             is_group = getattr(msg, "is_group", False)
@@ -145,6 +167,12 @@ class WhatsAppBot:
                 if not mentioned:
                     return
+            if has_audio and not text:
+                transcript = _transcribe_wa_audio(client, msg)
+                if transcript is None:
+                    return
+                text = transcript
             if not text and not has_image:
                 return
@@ -180,6 +208,12 @@ class WhatsAppBot:
                     msg.reply(chunk)
                 return
+            if text.lower() == "!clear_files":
+                from .. import config as _cfg
+                count = _cfg.clear_files()
+                msg.reply(f"Cleared {count} file(s) from the downloads folder.")
+                return
             # Build input (text or multimodal)
             chat_input = text or "What's in this image?"
             if has_image:
@@ -191,6 +225,8 @@ class WhatsAppBot:
             if lock.locked():
                 msg.reply("Processing previous message...")
+            bot._register_file_sender(client, wa_id)
             try:
                 with lock:
                     response = agent.chat(chat_input)
@@ -248,6 +284,39 @@ def _build_wa_image_input(client, msg, caption: str) -> list:
         return caption
+def _transcribe_wa_audio(client, msg) -> str | None:
+    """Download WhatsApp voice/audio and transcribe via Deepgram."""
+    from ..core.stt import no_key_message, transcribe_bytes
+    media = getattr(msg, "voice", None) or getattr(msg, "audio", None)
+    if media is None:
+        return None
+    try:
+        data = media.download(in_memory=True)
+    except Exception:
+        logger.warning("[WhatsApp] Failed to download audio")
+        return None
+    mime = getattr(media, "mime_type", "audio/ogg")
+    try:
+        transcript = transcribe_bytes(data, mime)
+    except Exception as exc:
+        logger.warning("[WhatsApp] Deepgram failed: %s", exc)
+        msg.reply(f"Voice transcription failed: {exc}")
+        return None
+    if transcript is None:
+        msg.reply(no_key_message())
+        return None
+    if not transcript.strip():
+        msg.reply("Could not recognise any speech in the audio.")
+        return None
+    logger.info("[WhatsApp] Audio transcribed: %s", transcript[:80])
+    return transcript
 def create_bot(session_manager: "SessionManager") -> WhatsAppBot:
     """Create a WhatsAppBot from pythonclaw.json / env vars."""
     phone_id = config.get_str(

pythonclaw/config.py CHANGED Viewed

@@ -209,6 +209,31 @@ def group_context_dir(session_id: str) -> Path:
     return PYTHONCLAW_HOME / "context" / "groups" / safe
+def files_dir() -> Path:
+    """Return the shared files directory (``~/.pythonclaw/context/files/``)."""
+    d = PYTHONCLAW_HOME / "context" / "files"
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+def clear_files() -> int:
+    """Delete all files in the shared files directory. Returns count removed."""
+    d = files_dir()
+    count = 0
+    for entry in d.iterdir():
+        try:
+            if entry.is_file():
+                entry.unlink()
+                count += 1
+            elif entry.is_dir():
+                import shutil
+                shutil.rmtree(entry)
+                count += 1
+        except OSError:
+            pass
+    return count
 def reset() -> None:
     """Clear the cached config (mainly for testing)."""
     global _config, _config_path

pythonclaw/core/agent.py CHANGED Viewed

@@ -207,6 +207,7 @@ class Agent:
         self.MAX_PARALLEL_SKILLS = config.get_int(
             "agent", "maxParallelSkills", default=5,
         )
+        self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="agent-bg")
         # Memory — with optional global fallback for per-group isolation
         mem_dir = memory_dir or config.get("memory", "dir", env="PYTHONCLAW_MEMORY_DIR")
@@ -342,9 +343,11 @@ You decide which mode fits. Don't announce the mode name.
 - Proactively `remember` user preferences, decisions, key facts.
 - Use `recall` when user references past context.
 - Memory auto-loaded at session start. INDEX.md = curated system info.
+- All downloaded/generated files go in the shared files directory (`~/.pythonclaw/context/files/`). The `run_command` tool uses this as its working directory.
 - NEVER output tool calls as XML or text. Always use the function calling API.
 ### Response Guidelines
+- **Language matching**: ALWAYS reply in the SAME language the user used in their message. If the user writes in Chinese, reply in Chinese. If in English, reply in English. Mirror the user's language exactly.
 - Answer the user's question directly and concisely.
 - Keep responses focused — under 300 words when possible. Break long answers into short paragraphs.
 - Do NOT mention what skills or tools you have available, unless explicitly asked.
@@ -798,7 +801,7 @@ Don't repeat this if `bot_name` already exists in memory.
         soft_threshold = int(self.compaction_threshold * 0.8)
         if not self._memory_flushed_this_cycle and tokens >= soft_threshold:
-            self._proactive_memory_flush()
+            self._bg_executor.submit(self._proactive_memory_flush)
             self._memory_flushed_this_cycle = True
         if tokens < self.compaction_threshold:
@@ -955,7 +958,7 @@ Don't repeat this if `bot_name` already exists in memory.
                 t0 = time.monotonic()
                 results: dict[str, str] = {}
-                with ThreadPoolExecutor(max_workers=min(len(tool_calls), 8)) as pool:
+                with ThreadPoolExecutor(max_workers=min(len(tool_calls), 16)) as pool:
                     futures = {
                         pool.submit(self._execute_tool_call, tc): tc
                         for tc in tool_calls
@@ -1101,7 +1104,7 @@ Don't repeat this if `bot_name` already exists in memory.
                 results: dict[str, str] = {}
                 with ThreadPoolExecutor(
-                    max_workers=min(len(tool_calls), 8)
+                    max_workers=min(len(tool_calls), 16),
                 ) as pool:
                     futures = {
                         pool.submit(self._execute_tool_call, tc): tc

pythonclaw 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

pythonclaw 0.6.4py3-none-any.whl → 0.6.6py3-none-any.whl