pythonclaw 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. pythonclaw/__init__.py +1 -1
  2. pythonclaw/channels/discord_bot.py +79 -1
  3. pythonclaw/channels/telegram_bot.py +99 -11
  4. pythonclaw/channels/whatsapp_bot.py +69 -0
  5. pythonclaw/config.py +25 -0
  6. pythonclaw/core/agent.py +6 -3
  7. pythonclaw/core/skillhub.py +206 -78
  8. pythonclaw/core/stt.py +156 -0
  9. pythonclaw/core/tools.py +56 -1
  10. pythonclaw/init.py +2 -1
  11. pythonclaw/scheduler/cron.py +3 -1
  12. pythonclaw/templates/skills/data/pdf_convert/SKILL.md +65 -0
  13. pythonclaw/templates/skills/data/pdf_convert/convert_pdf.py +187 -0
  14. pythonclaw/templates/skills/data/pdf_merge/SKILL.md +52 -0
  15. pythonclaw/templates/skills/data/pdf_merge/merge_pdf.py +115 -0
  16. pythonclaw/templates/skills/data/pdf_protect/SKILL.md +65 -0
  17. pythonclaw/templates/skills/data/pdf_protect/protect_pdf.py +140 -0
  18. pythonclaw/templates/skills/data/pdf_split/SKILL.md +55 -0
  19. pythonclaw/templates/skills/data/pdf_split/split_pdf.py +109 -0
  20. pythonclaw/templates/skills/data/pdf_writer/SKILL.md +61 -0
  21. pythonclaw/templates/skills/data/pdf_writer/write_pdf.py +138 -0
  22. pythonclaw/web/app.py +68 -42
  23. {pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/METADATA +2 -1
  24. {pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/RECORD +28 -17
  25. {pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/WHEEL +0 -0
  26. {pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/entry_points.txt +0 -0
  27. {pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/licenses/LICENSE +0 -0
  28. {pythonclaw-0.6.4.dist-info → pythonclaw-0.6.6.dist-info}/top_level.txt +0 -0
pythonclaw/__init__.py CHANGED
@@ -6,7 +6,7 @@ from .core.llm.base import LLMProvider
6
6
  from .core.llm.openai_compatible import OpenAICompatibleProvider
7
7
  from .init import init
8
8
 
9
- __version__ = "0.5.0"
9
+ __version__ = "0.6.6"
10
10
  __all__ = [
11
11
  "Agent",
12
12
  "LLMProvider",
@@ -31,6 +31,7 @@ whitelisted channels).
31
31
 
32
32
  from __future__ import annotations
33
33
 
34
+ import asyncio
34
35
  import base64
35
36
  import logging
36
37
  from typing import TYPE_CHECKING
@@ -142,6 +143,16 @@ class DiscordBot:
142
143
  a.content_type and a.content_type.startswith("image/")
143
144
  for a in message.attachments
144
145
  )
146
+ has_audio = any(
147
+ a.content_type and a.content_type.startswith("audio/")
148
+ for a in message.attachments
149
+ )
150
+
151
+ if has_audio and not content:
152
+ transcript = await self._transcribe_audio(message)
153
+ if transcript is None:
154
+ return
155
+ content = transcript
145
156
 
146
157
  if not content and not has_image:
147
158
  return
@@ -157,6 +168,9 @@ class DiscordBot:
157
168
  hint = content[len("!compact"):].strip() or None
158
169
  await self._cmd_compact(message, is_dm, hint)
159
170
  return
171
+ if content.startswith("!clear_files"):
172
+ await self._cmd_clear_files(message)
173
+ return
160
174
 
161
175
  chat_input = content or ""
162
176
  if has_image:
@@ -188,6 +202,40 @@ class DiscordBot:
188
202
  logger.warning("[Discord] Failed to download attachment %s", att.filename)
189
203
  return parts
190
204
 
205
+ # ── Voice / audio handling ─────────────────────────────────────────────────
206
+
207
+ @staticmethod
208
+ async def _transcribe_audio(message: discord.Message) -> str | None:
209
+ """Download the first audio attachment and transcribe via Deepgram."""
210
+ from ..core.stt import no_key_message, transcribe_bytes_async
211
+
212
+ for att in message.attachments:
213
+ if att.content_type and att.content_type.startswith("audio/"):
214
+ try:
215
+ data = await att.read()
216
+ except Exception:
217
+ logger.warning("[Discord] Failed to download audio %s", att.filename)
218
+ return None
219
+
220
+ mime = att.content_type.split(";")[0]
221
+ try:
222
+ transcript = await transcribe_bytes_async(data, mime)
223
+ except Exception as exc:
224
+ logger.warning("[Discord] Deepgram failed: %s", exc)
225
+ await message.reply(f"Voice transcription failed: {exc}")
226
+ return None
227
+
228
+ if transcript is None:
229
+ await message.reply(no_key_message())
230
+ return None
231
+ if not transcript.strip():
232
+ await message.reply("Could not recognise any speech in the audio.")
233
+ return None
234
+
235
+ logger.info("[Discord] Audio transcribed: %s", transcript[:80])
236
+ return transcript
237
+ return None
238
+
191
239
  # ── Command implementations ───────────────────────────────────────────────
192
240
 
193
241
  async def _cmd_reset(self, message: discord.Message, is_dm: bool) -> None:
@@ -213,6 +261,11 @@ class DiscordBot:
213
261
  )
214
262
  await message.reply(status)
215
263
 
264
+ async def _cmd_clear_files(self, message: discord.Message) -> None:
265
+ from .. import config as _cfg
266
+ count = _cfg.clear_files()
267
+ await message.reply(f"Cleared {count} file(s) from the downloads folder.")
268
+
216
269
  async def _cmd_compact(self, message: discord.Message, is_dm: bool, hint: str | None) -> None:
217
270
  sid = self._session_id(message.author.id if is_dm else message.channel.id, is_dm)
218
271
  agent = self._sm.get_or_create(sid)
@@ -240,8 +293,8 @@ class DiscordBot:
240
293
  async with message.channel.typing():
241
294
  try:
242
295
  async with self._sm.acquire(sid):
243
- import asyncio
244
296
  loop = asyncio.get_event_loop()
297
+ self._register_file_sender(loop, message.channel)
245
298
  response = await loop.run_in_executor(None, agent.chat, content)
246
299
  except Exception as exc:
247
300
  logger.exception("[Discord] Agent.chat() raised an exception")
@@ -249,6 +302,31 @@ class DiscordBot:
249
302
  for chunk in self._split_message(response or "(no response)"):
250
303
  await message.reply(chunk)
251
304
 
305
+ # ── File sending ──────────────────────────────────────────────────────────
306
+
307
+ def _register_file_sender(
308
+ self,
309
+ loop: asyncio.AbstractEventLoop,
310
+ channel: discord.abc.Messageable,
311
+ ) -> None:
312
+ """Register a sync callback so the Agent can send files via Discord."""
313
+ from ..core.tools import set_file_sender
314
+
315
+ def _sender(path: str, caption: str = "") -> None:
316
+ async def _do_send():
317
+ try:
318
+ await channel.send(
319
+ content=caption[:2000] if caption else None,
320
+ file=discord.File(path),
321
+ )
322
+ except Exception as exc:
323
+ logger.warning("[Discord] send_file failed: %s", exc)
324
+
325
+ future = asyncio.run_coroutine_threadsafe(_do_send(), loop)
326
+ future.result(timeout=60)
327
+
328
+ set_file_sender(_sender)
329
+
252
330
  # ── Lifecycle ─────────────────────────────────────────────────────────────
253
331
 
254
332
  async def start_async(self) -> None:
@@ -144,7 +144,8 @@ class TelegramBot:
144
144
  " /start \u2014 show this message\n"
145
145
  " /reset \u2014 start a fresh session\n"
146
146
  " /status \u2014 show session info\n"
147
- " /compact [hint] \u2014 compact conversation history"
147
+ " /compact [hint] \u2014 compact conversation history\n"
148
+ " /clear_files \u2014 delete all downloaded files"
148
149
  )
149
150
 
150
151
  async def _cmd_reset(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -186,6 +187,13 @@ class TelegramBot:
186
187
  for chunk in _split_message(result):
187
188
  await update.message.reply_text(chunk)
188
189
 
190
+ async def _cmd_clear_files(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
191
+ if not await self._check_access(update, context):
192
+ return
193
+ from .. import config as _cfg
194
+ count = _cfg.clear_files()
195
+ await update.message.reply_text(f"Cleared {count} file(s) from the downloads folder.")
196
+
189
197
  # ── Message handler (text + photos) ───────────────────────────────────────
190
198
 
191
199
  async def _handle_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -200,6 +208,13 @@ class TelegramBot:
200
208
  user_text = self._strip_mention(user_text)
201
209
 
202
210
  has_photo = bool(update.message.photo)
211
+ has_voice = bool(update.message.voice or update.message.audio)
212
+
213
+ if has_voice:
214
+ transcript = await self._transcribe_voice(update)
215
+ if transcript is None:
216
+ return
217
+ user_text = transcript
203
218
 
204
219
  if not user_text and not has_photo:
205
220
  return
@@ -229,6 +244,8 @@ class TelegramBot:
229
244
  try:
230
245
  async with self._sm.acquire(sid):
231
246
  loop = asyncio.get_event_loop()
247
+ chat_id = update.effective_chat.id
248
+ self._register_file_sender(loop, chat_id)
232
249
  future = loop.run_in_executor(
233
250
  None, agent.chat_stream, chat_input, token_queue.put,
234
251
  )
@@ -293,21 +310,27 @@ class TelegramBot:
293
310
  text = _clean_response(raw)
294
311
  now = time.monotonic()
295
312
 
296
- if text and text != live_text and (now - last_edit) >= THROTTLE:
313
+ # Only show up to the last paragraph break while streaming;
314
+ # the trailing incomplete line is held back to avoid flashing
315
+ # progress narration that will be stripped later.
316
+ last_break = text.rfind("\n\n")
317
+ display = text[:last_break].rstrip() if last_break > 0 else ""
318
+
319
+ if display and display != live_text and (now - last_edit) >= THROTTLE:
297
320
  try:
298
321
  if live_msg is None:
299
322
  live_msg = await update.message.reply_text(
300
- text[:4096],
323
+ display[:4096],
301
324
  )
302
- live_text = text[:4096]
303
- elif len(text) <= 4096:
304
- await live_msg.edit_text(text)
305
- live_text = text
325
+ live_text = display[:4096]
326
+ elif len(display) <= 4096:
327
+ await live_msg.edit_text(display)
328
+ live_text = display
306
329
  else:
307
- await live_msg.edit_text(live_text)
330
+ await live_msg.edit_text(display[:4096])
308
331
  live_msg = None
309
332
  live_text = ""
310
- buf = [text[len(live_text):] if live_text else text]
333
+ buf = [display[4096:] + text[last_break:]]
311
334
  sent_any = True
312
335
  except Exception:
313
336
  pass
@@ -345,6 +368,29 @@ class TelegramBot:
345
368
  for chunk in _split_message(text):
346
369
  await update.message.reply_text(chunk)
347
370
 
371
+ def _register_file_sender(self, loop: asyncio.AbstractEventLoop, chat_id: int) -> None:
372
+ """Register a sync callback so the Agent can send files via Telegram."""
373
+ from ..core.tools import set_file_sender
374
+
375
+ bot_app = self._app
376
+
377
+ def _sender(path: str, caption: str = "") -> None:
378
+ async def _do_send():
379
+ try:
380
+ with open(path, "rb") as f:
381
+ await bot_app.bot.send_document(
382
+ chat_id=chat_id,
383
+ document=f,
384
+ caption=caption[:1024] if caption else None,
385
+ )
386
+ except Exception as exc:
387
+ logger.warning("[Telegram] send_file failed: %s", exc)
388
+
389
+ future = asyncio.run_coroutine_threadsafe(_do_send(), loop)
390
+ future.result(timeout=60)
391
+
392
+ set_file_sender(_sender)
393
+
348
394
  async def _build_image_input(self, update: Update, caption: str) -> list:
349
395
  """Download photo and build a multimodal content array."""
350
396
  photo = update.message.photo[-1] # highest resolution
@@ -362,6 +408,37 @@ class TelegramBot:
362
408
  },
363
409
  ]
364
410
 
411
+ async def _transcribe_voice(self, update: Update) -> str | None:
412
+ """Download a voice/audio message and transcribe via Deepgram.
413
+
414
+ Returns the transcript text, or sends a hint to the user and
415
+ returns ``None`` if Deepgram is not configured.
416
+ """
417
+ from ..core.stt import no_key_message, transcribe_bytes_async
418
+
419
+ voice = update.message.voice or update.message.audio
420
+ tg_file = await voice.get_file()
421
+ audio_bytes = bytes(await tg_file.download_as_bytearray())
422
+ mime = voice.mime_type or "audio/ogg"
423
+
424
+ try:
425
+ transcript = await transcribe_bytes_async(audio_bytes, mime)
426
+ except Exception as exc:
427
+ logger.warning("[Telegram] Deepgram transcription failed: %s", exc)
428
+ await update.message.reply_text(f"Voice transcription failed: {exc}")
429
+ return None
430
+
431
+ if transcript is None:
432
+ await update.message.reply_text(no_key_message())
433
+ return None
434
+
435
+ if not transcript.strip():
436
+ await update.message.reply_text("Could not recognise any speech in the audio.")
437
+ return None
438
+
439
+ logger.info("[Telegram] Voice transcribed: %s", transcript[:80])
440
+ return transcript
441
+
365
442
  async def _keep_typing(self, chat_id: int) -> None:
366
443
  """Re-send the 'typing' chat action every 4 s until cancelled."""
367
444
  try:
@@ -380,6 +457,7 @@ class TelegramBot:
380
457
  BotCommand("reset", "Start a fresh session"),
381
458
  BotCommand("status", "Show session info"),
382
459
  BotCommand("compact", "Compact conversation history"),
460
+ BotCommand("clear_files", "Delete all downloaded files"),
383
461
  ]
384
462
 
385
463
  def build_application(self) -> Application:
@@ -388,8 +466,10 @@ class TelegramBot:
388
466
  app.add_handler(CommandHandler("reset", self._cmd_reset))
389
467
  app.add_handler(CommandHandler("status", self._cmd_status))
390
468
  app.add_handler(CommandHandler("compact", self._cmd_compact))
469
+ app.add_handler(CommandHandler("clear_files", self._cmd_clear_files))
391
470
  app.add_handler(MessageHandler(
392
- (filters.TEXT | filters.PHOTO) & ~filters.COMMAND,
471
+ (filters.TEXT | filters.PHOTO | filters.VOICE | filters.AUDIO)
472
+ & ~filters.COMMAND,
393
473
  self._handle_message,
394
474
  ))
395
475
  self._app = app
@@ -443,10 +523,18 @@ _LEAKED_TOOL_RE = re.compile(
443
523
  )
444
524
 
445
525
 
526
+ _PROGRESS_LINE_RE = re.compile(r'\n\n.{0,60}[::]\s*\n\n')
527
+
528
+
446
529
  def _clean_response(text: str) -> str:
447
- """Strip leaked tool-call XML/DSML markup from LLM output."""
530
+ """Strip leaked tool-call XML/DSML markup and excess whitespace."""
448
531
  text = _LEAKED_TOOL_RE.sub('', text)
449
532
  text = re.sub(r'\n{3,}', '\n\n', text)
533
+ for _ in range(10):
534
+ cleaned = _PROGRESS_LINE_RE.sub('\n\n', text)
535
+ if cleaned == text:
536
+ break
537
+ text = cleaned
450
538
  return text.strip()
451
539
 
452
540
 
@@ -98,6 +98,24 @@ class WhatsAppBot:
98
98
  self._locks[session_id] = threading.Lock()
99
99
  return self._locks[session_id]
100
100
 
101
+ # ── File sending ──────────────────────────────────────────────────────────
102
+
103
+ def _register_file_sender(self, client, wa_id: str) -> None:
104
+ """Register a sync callback so the Agent can send files via WhatsApp."""
105
+ from ..core.tools import set_file_sender
106
+
107
+ def _sender(path: str, caption: str = "") -> None:
108
+ try:
109
+ client.send_document(
110
+ to=wa_id,
111
+ document=path,
112
+ caption=caption[:1024] if caption else None,
113
+ )
114
+ except Exception as exc:
115
+ logger.warning("[WhatsApp] send_file failed: %s", exc)
116
+
117
+ set_file_sender(_sender)
118
+
101
119
  # ── Mount on FastAPI ──────────────────────────────────────────────────────
102
120
 
103
121
  def mount(self, app: "FastAPI") -> None:
@@ -133,6 +151,10 @@ class WhatsAppBot:
133
151
 
134
152
  text = (msg.text or "").strip()
135
153
  has_image = msg.has_media and getattr(msg, "image", None) is not None
154
+ has_audio = msg.has_media and (
155
+ getattr(msg, "audio", None) is not None
156
+ or getattr(msg, "voice", None) is not None
157
+ )
136
158
 
137
159
  # Group mention check
138
160
  is_group = getattr(msg, "is_group", False)
@@ -145,6 +167,12 @@ class WhatsAppBot:
145
167
  if not mentioned:
146
168
  return
147
169
 
170
+ if has_audio and not text:
171
+ transcript = _transcribe_wa_audio(client, msg)
172
+ if transcript is None:
173
+ return
174
+ text = transcript
175
+
148
176
  if not text and not has_image:
149
177
  return
150
178
 
@@ -180,6 +208,12 @@ class WhatsAppBot:
180
208
  msg.reply(chunk)
181
209
  return
182
210
 
211
+ if text.lower() == "!clear_files":
212
+ from .. import config as _cfg
213
+ count = _cfg.clear_files()
214
+ msg.reply(f"Cleared {count} file(s) from the downloads folder.")
215
+ return
216
+
183
217
  # Build input (text or multimodal)
184
218
  chat_input = text or "What's in this image?"
185
219
  if has_image:
@@ -191,6 +225,8 @@ class WhatsAppBot:
191
225
  if lock.locked():
192
226
  msg.reply("Processing previous message...")
193
227
 
228
+ bot._register_file_sender(client, wa_id)
229
+
194
230
  try:
195
231
  with lock:
196
232
  response = agent.chat(chat_input)
@@ -248,6 +284,39 @@ def _build_wa_image_input(client, msg, caption: str) -> list:
248
284
  return caption
249
285
 
250
286
 
287
+ def _transcribe_wa_audio(client, msg) -> str | None:
288
+ """Download WhatsApp voice/audio and transcribe via Deepgram."""
289
+ from ..core.stt import no_key_message, transcribe_bytes
290
+
291
+ media = getattr(msg, "voice", None) or getattr(msg, "audio", None)
292
+ if media is None:
293
+ return None
294
+
295
+ try:
296
+ data = media.download(in_memory=True)
297
+ except Exception:
298
+ logger.warning("[WhatsApp] Failed to download audio")
299
+ return None
300
+
301
+ mime = getattr(media, "mime_type", "audio/ogg")
302
+ try:
303
+ transcript = transcribe_bytes(data, mime)
304
+ except Exception as exc:
305
+ logger.warning("[WhatsApp] Deepgram failed: %s", exc)
306
+ msg.reply(f"Voice transcription failed: {exc}")
307
+ return None
308
+
309
+ if transcript is None:
310
+ msg.reply(no_key_message())
311
+ return None
312
+ if not transcript.strip():
313
+ msg.reply("Could not recognise any speech in the audio.")
314
+ return None
315
+
316
+ logger.info("[WhatsApp] Audio transcribed: %s", transcript[:80])
317
+ return transcript
318
+
319
+
251
320
  def create_bot(session_manager: "SessionManager") -> WhatsAppBot:
252
321
  """Create a WhatsAppBot from pythonclaw.json / env vars."""
253
322
  phone_id = config.get_str(
pythonclaw/config.py CHANGED
@@ -209,6 +209,31 @@ def group_context_dir(session_id: str) -> Path:
209
209
  return PYTHONCLAW_HOME / "context" / "groups" / safe
210
210
 
211
211
 
212
+ def files_dir() -> Path:
213
+ """Return the shared files directory (``~/.pythonclaw/context/files/``)."""
214
+ d = PYTHONCLAW_HOME / "context" / "files"
215
+ d.mkdir(parents=True, exist_ok=True)
216
+ return d
217
+
218
+
219
+ def clear_files() -> int:
220
+ """Delete all files in the shared files directory. Returns count removed."""
221
+ d = files_dir()
222
+ count = 0
223
+ for entry in d.iterdir():
224
+ try:
225
+ if entry.is_file():
226
+ entry.unlink()
227
+ count += 1
228
+ elif entry.is_dir():
229
+ import shutil
230
+ shutil.rmtree(entry)
231
+ count += 1
232
+ except OSError:
233
+ pass
234
+ return count
235
+
236
+
212
237
  def reset() -> None:
213
238
  """Clear the cached config (mainly for testing)."""
214
239
  global _config, _config_path
pythonclaw/core/agent.py CHANGED
@@ -207,6 +207,7 @@ class Agent:
207
207
  self.MAX_PARALLEL_SKILLS = config.get_int(
208
208
  "agent", "maxParallelSkills", default=5,
209
209
  )
210
+ self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="agent-bg")
210
211
 
211
212
  # Memory — with optional global fallback for per-group isolation
212
213
  mem_dir = memory_dir or config.get("memory", "dir", env="PYTHONCLAW_MEMORY_DIR")
@@ -342,9 +343,11 @@ You decide which mode fits. Don't announce the mode name.
342
343
  - Proactively `remember` user preferences, decisions, key facts.
343
344
  - Use `recall` when user references past context.
344
345
  - Memory auto-loaded at session start. INDEX.md = curated system info.
346
+ - All downloaded/generated files go in the shared files directory (`~/.pythonclaw/context/files/`). The `run_command` tool uses this as its working directory.
345
347
  - NEVER output tool calls as XML or text. Always use the function calling API.
346
348
 
347
349
  ### Response Guidelines
350
+ - **Language matching**: ALWAYS reply in the SAME language the user used in their message. If the user writes in Chinese, reply in Chinese. If in English, reply in English. Mirror the user's language exactly.
348
351
  - Answer the user's question directly and concisely.
349
352
  - Keep responses focused — under 300 words when possible. Break long answers into short paragraphs.
350
353
  - Do NOT mention what skills or tools you have available, unless explicitly asked.
@@ -798,7 +801,7 @@ Don't repeat this if `bot_name` already exists in memory.
798
801
  soft_threshold = int(self.compaction_threshold * 0.8)
799
802
 
800
803
  if not self._memory_flushed_this_cycle and tokens >= soft_threshold:
801
- self._proactive_memory_flush()
804
+ self._bg_executor.submit(self._proactive_memory_flush)
802
805
  self._memory_flushed_this_cycle = True
803
806
 
804
807
  if tokens < self.compaction_threshold:
@@ -955,7 +958,7 @@ Don't repeat this if `bot_name` already exists in memory.
955
958
 
956
959
  t0 = time.monotonic()
957
960
  results: dict[str, str] = {}
958
- with ThreadPoolExecutor(max_workers=min(len(tool_calls), 8)) as pool:
961
+ with ThreadPoolExecutor(max_workers=min(len(tool_calls), 16)) as pool:
959
962
  futures = {
960
963
  pool.submit(self._execute_tool_call, tc): tc
961
964
  for tc in tool_calls
@@ -1101,7 +1104,7 @@ Don't repeat this if `bot_name` already exists in memory.
1101
1104
 
1102
1105
  results: dict[str, str] = {}
1103
1106
  with ThreadPoolExecutor(
1104
- max_workers=min(len(tool_calls), 8)
1107
+ max_workers=min(len(tool_calls), 16),
1105
1108
  ) as pool:
1106
1109
  futures = {
1107
1110
  pool.submit(self._execute_tool_call, tc): tc