devduck 1.1.0__tar.gz → 1.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of devduck might be problematic. Click here for more details.

Files changed (53) hide show
  1. {devduck-1.1.0 → devduck-1.1.3}/.github/workflows/agent.yml +2 -2
  2. {devduck-1.1.0 → devduck-1.1.3}/.gitignore +4 -1
  3. {devduck-1.1.0 → devduck-1.1.3}/PKG-INFO +2 -1
  4. {devduck-1.1.0 → devduck-1.1.3}/devduck/__init__.py +99 -15
  5. {devduck-1.1.0 → devduck-1.1.3}/devduck/_version.py +3 -3
  6. devduck-1.1.3/devduck/tools/__init__.py +47 -0
  7. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/speech_to_speech.py +109 -9
  8. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/system_prompt.py +276 -153
  9. {devduck-1.1.0 → devduck-1.1.3}/devduck.egg-info/PKG-INFO +2 -1
  10. {devduck-1.1.0 → devduck-1.1.3}/devduck.egg-info/SOURCES.txt +3 -2
  11. {devduck-1.1.0 → devduck-1.1.3}/devduck.egg-info/requires.txt +1 -0
  12. {devduck-1.1.0 → devduck-1.1.3}/pyproject.toml +1 -0
  13. devduck-1.1.3/tools/fetch_github_tool.py +201 -0
  14. devduck-1.1.0/devduck/tools/__init__.py +0 -55
  15. {devduck-1.1.0 → devduck-1.1.3}/LICENSE +0 -0
  16. {devduck-1.1.0 → devduck-1.1.3}/MANIFEST.in +0 -0
  17. {devduck-1.1.0 → devduck-1.1.3}/README.md +0 -0
  18. {devduck-1.1.0 → devduck-1.1.3}/action.yml +0 -0
  19. {devduck-1.1.0 → devduck-1.1.3}/agent_runner.py +0 -0
  20. {devduck-1.1.0 → devduck-1.1.3}/devduck/__main__.py +0 -0
  21. {devduck-1.1.0 → devduck-1.1.3}/devduck/agentcore_handler.py +0 -0
  22. {devduck-1.1.0 → devduck-1.1.3}/devduck/test_redduck.py +0 -0
  23. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/_ambient_input.py +0 -0
  24. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/_tray_app.py +0 -0
  25. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/agentcore_agents.py +0 -0
  26. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/agentcore_config.py +0 -0
  27. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/agentcore_invoke.py +0 -0
  28. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/agentcore_logs.py +0 -0
  29. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/ambient.py +0 -0
  30. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/create_subagent.py +0 -0
  31. {devduck-1.1.0 → devduck-1.1.3/devduck}/tools/fetch_github_tool.py +0 -0
  32. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/install_tools.py +0 -0
  33. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/ipc.py +0 -0
  34. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/mcp_server.py +0 -0
  35. {devduck-1.1.0 → devduck-1.1.3/devduck}/tools/scraper.py +0 -0
  36. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/state_manager.py +0 -0
  37. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/store_in_kb.py +0 -0
  38. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/tcp.py +0 -0
  39. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/tray.py +0 -0
  40. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/use_github.py +0 -0
  41. {devduck-1.1.0 → devduck-1.1.3}/devduck/tools/websocket.py +0 -0
  42. {devduck-1.1.0 → devduck-1.1.3}/devduck.egg-info/dependency_links.txt +0 -0
  43. {devduck-1.1.0 → devduck-1.1.3}/devduck.egg-info/entry_points.txt +0 -0
  44. {devduck-1.1.0 → devduck-1.1.3}/devduck.egg-info/top_level.txt +0 -0
  45. {devduck-1.1.0 → devduck-1.1.3}/docs/index.html +0 -0
  46. {devduck-1.1.0 → devduck-1.1.3}/docs/mac-os-tray.jpg +0 -0
  47. {devduck-1.1.0 → devduck-1.1.3}/requirements.txt +0 -0
  48. {devduck-1.1.0 → devduck-1.1.3}/setup-aws-oidc.sh +0 -0
  49. {devduck-1.1.0 → devduck-1.1.3}/setup.cfg +0 -0
  50. {devduck-1.1.0 → devduck-1.1.3}/test.py +0 -0
  51. {devduck-1.1.0 → devduck-1.1.3}/tools/__init__.py +0 -0
  52. {devduck-1.1.0 → devduck-1.1.3}/tools/gist.py +0 -0
  53. {devduck-1.1.0 → devduck-1.1.3}/tools/github_tools.py +0 -0
@@ -1,8 +1,8 @@
1
1
  name: DevDuck
2
2
 
3
3
  on:
4
- schedule:
5
- - cron: '0 8 * * *'
4
+ # schedule:
5
+ # - cron: '0 8 * * *'
6
6
  issues:
7
7
  types: [opened, edited, closed, reopened, assigned, unassigned, labeled, unlabeled]
8
8
  issue_comment:
@@ -7,4 +7,7 @@ dist/
7
7
  build
8
8
  _version.py
9
9
  .bedrock_agentcore.yaml
10
- .bedrock_agentcore
10
+ .bedrock_agentcore
11
+ google_*.json
12
+ gmail_*.json
13
+ .DS_Store
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: devduck
3
- Version: 1.1.0
3
+ Version: 1.1.3
4
4
  Summary: 🦆 Extreme minimalist self-adapting AI agent - one file, self-healing, runtime dependencies
5
5
  Author-email: Cagatay Cali <cagataycali@icloud.com>
6
6
  License: Apache-2.0
@@ -40,6 +40,7 @@ Requires-Dist: beautifulsoup4
40
40
  Requires-Dist: colorama
41
41
  Requires-Dist: websockets
42
42
  Requires-Dist: strands-mcp-server
43
+ Requires-Dist: strands-google
43
44
  Requires-Dist: bedrock-agentcore-starter-toolkit
44
45
  Requires-Dist: bedrock-agentcore
45
46
  Requires-Dist: rumps; sys_platform == "darwin"
@@ -3,19 +3,31 @@
3
3
  🦆 devduck - extreme minimalist self-adapting agent
4
4
  one file. self-healing. runtime dependencies. adaptive.
5
5
  """
6
+ import os
6
7
  import sys
7
8
  import subprocess
8
- import os
9
+ import threading
9
10
  import platform
10
11
  import socket
11
12
  import logging
12
13
  import tempfile
13
14
  import time
14
15
  import warnings
16
+ import json
15
17
  from pathlib import Path
16
18
  from datetime import datetime
17
19
  from typing import Dict, Any
18
20
  from logging.handlers import RotatingFileHandler
21
+ from strands import Agent, tool
22
+
23
+ # Import system prompt helper for loading prompts from files
24
+ try:
25
+ from devduck.tools.system_prompt import _get_system_prompt
26
+ except ImportError:
27
+ # Fallback if tools module not available yet
28
+ def _get_system_prompt(repository=None, variable_name="SYSTEM_PROMPT"):
29
+ return os.getenv(variable_name, "")
30
+
19
31
 
20
32
  warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
21
33
  warnings.filterwarnings("ignore", message=".*cache_prompt is deprecated.*")
@@ -594,7 +606,7 @@ class DevDuck:
594
606
  servers = {
595
607
  "tcp": {
596
608
  "port": int(os.getenv("DEVDUCK_TCP_PORT", "9999")),
597
- "enabled": os.getenv("DEVDUCK_ENABLE_TCP", "true").lower()
609
+ "enabled": os.getenv("DEVDUCK_ENABLE_TCP", "false").lower()
598
610
  == "true",
599
611
  },
600
612
  "ws": {
@@ -604,25 +616,93 @@ class DevDuck:
604
616
  },
605
617
  "mcp": {
606
618
  "port": int(os.getenv("DEVDUCK_MCP_PORT", "8000")),
607
- "enabled": os.getenv("DEVDUCK_ENABLE_MCP", "true").lower()
619
+ "enabled": os.getenv("DEVDUCK_ENABLE_MCP", "false").lower()
608
620
  == "true",
609
621
  },
610
622
  "ipc": {
611
623
  "socket_path": os.getenv(
612
624
  "DEVDUCK_IPC_SOCKET", "/tmp/devduck_main.sock"
613
625
  ),
614
- "enabled": os.getenv("DEVDUCK_ENABLE_IPC", "true").lower()
626
+ "enabled": os.getenv("DEVDUCK_ENABLE_IPC", "false").lower()
615
627
  == "true",
616
628
  },
617
629
  }
618
630
 
619
- self.servers = servers
631
+ # Show server configuration status
632
+ enabled_servers = []
633
+ disabled_servers = []
634
+ for server_name, config in servers.items():
635
+ if config.get("enabled", False):
636
+ if "port" in config:
637
+ enabled_servers.append(
638
+ f"{server_name.upper()}:{config['port']}"
639
+ )
640
+ else:
641
+ enabled_servers.append(server_name.upper())
642
+ else:
643
+ disabled_servers.append(server_name.upper())
620
644
 
621
- from strands import Agent, tool
645
+ logger.debug(
646
+ f"🦆 Server config: {', '.join(enabled_servers) if enabled_servers else 'none enabled'}"
647
+ )
648
+ if disabled_servers:
649
+ logger.debug(f"🦆 Disabled: {', '.join(disabled_servers)}")
650
+
651
+ self.servers = servers
622
652
 
623
653
  # Load tools with flexible configuration
624
- # Default tool config - user can override with DEVDUCK_TOOLS env var
625
- default_tools = "devduck.tools:system_prompt,store_in_kb,ipc,tcp,websocket,mcp_server,state_manager,tray,ambient,agentcore_config,agentcore_invoke,agentcore_logs,agentcore_agents,install_tools,create_subagent,use_github;strands_tools:shell,editor,file_read,file_write,image_reader,load_tool,retrieve,calculator,use_agent,environment,mcp_client,speak,slack;strands_fun_tools:listen,cursor,clipboard,screen_reader,bluetooth,yolo_vision"
654
+ # Default tool config
655
+ # Agent can load additional tools on-demand via fetch_github_tool
656
+
657
+ # 🔧 Available DevDuck Tools (load on-demand):
658
+ # - system_prompt: https://github.com/cagataycali/devduck/blob/main/devduck/tools/system_prompt.py
659
+ # - store_in_kb: https://github.com/cagataycali/devduck/blob/main/devduck/tools/store_in_kb.py
660
+ # - ipc: https://github.com/cagataycali/devduck/blob/main/devduck/tools/ipc.py
661
+ # - tcp: https://github.com/cagataycali/devduck/blob/main/devduck/tools/tcp.py
662
+ # - websocket: https://github.com/cagataycali/devduck/blob/main/devduck/tools/websocket.py
663
+ # - mcp_server: https://github.com/cagataycali/devduck/blob/main/devduck/tools/mcp_server.py
664
+ # - scraper: https://github.com/cagataycali/devduck/blob/main/devduck/tools/scraper.py
665
+ # - tray: https://github.com/cagataycali/devduck/blob/main/devduck/tools/tray.py
666
+ # - ambient: https://github.com/cagataycali/devduck/blob/main/devduck/tools/ambient.py
667
+ # - agentcore_config: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_config.py
668
+ # - agentcore_invoke: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_invoke.py
669
+ # - agentcore_logs: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_logs.py
670
+ # - agentcore_agents: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_agents.py
671
+ # - create_subagent: https://github.com/cagataycali/devduck/blob/main/devduck/tools/create_subagent.py
672
+ # - use_github: https://github.com/cagataycali/devduck/blob/main/devduck/tools/use_github.py
673
+ # - speech_to_speech: https://github.com/cagataycali/devduck/blob/main/devduck/tools/speech_to_speech.py
674
+ # - state_manager: https://github.com/cagataycali/devduck/blob/main/devduck/tools/state_manager.py
675
+
676
+ # 📦 Strands Tools
677
+ # - editor, file_read, file_write, image_reader, load_tool, retrieve
678
+ # - calculator, use_agent, environment, mcp_client, speak, slack
679
+
680
+ # 🎮 Strands Fun Tools
681
+ # - listen, cursor, clipboard, screen_reader, bluetooth, yolo_vision
682
+
683
+ # 🔍 Strands Google
684
+ # - use_google, google_auth
685
+
686
+ # 🔧 Auto-append server tools based on enabled servers
687
+ server_tools_needed = []
688
+ if servers.get("tcp", {}).get("enabled", False):
689
+ server_tools_needed.append("tcp")
690
+ if servers.get("ws", {}).get("enabled", False):
691
+ server_tools_needed.append("websocket")
692
+ if servers.get("mcp", {}).get("enabled", False):
693
+ server_tools_needed.append("mcp_server")
694
+ if servers.get("ipc", {}).get("enabled", False):
695
+ server_tools_needed.append("ipc")
696
+
697
+ # Append to default tools if any server tools are needed
698
+ if server_tools_needed:
699
+ server_tools_str = ",".join(server_tools_needed)
700
+ default_tools = f"devduck.tools:system_prompt,fetch_github_tool,{server_tools_str};strands_tools:shell"
701
+ logger.info(f"Auto-added server tools: {server_tools_str}")
702
+ else:
703
+ default_tools = (
704
+ "devduck.tools:system_prompt,fetch_github_tool;strands_tools:shell"
705
+ )
626
706
 
627
707
  tools_config = os.getenv("DEVDUCK_TOOLS", default_tools)
628
708
  logger.info(f"Loading tools from config: {tools_config}")
@@ -776,8 +856,6 @@ class DevDuck:
776
856
  Returns:
777
857
  List of MCPClient instances ready for direct use in Agent
778
858
  """
779
- import json
780
-
781
859
  mcp_servers_json = os.getenv("MCP_SERVERS")
782
860
  if not mcp_servers_json:
783
861
  logger.debug("No MCP_SERVERS environment variable found")
@@ -1145,7 +1223,7 @@ When you learn something valuable during conversations:
1145
1223
  - Communication: **MINIMAL WORDS**
1146
1224
  - Efficiency: **Speed is paramount**
1147
1225
 
1148
- {os.getenv('SYSTEM_PROMPT', '')}"""
1226
+ {_get_system_prompt()}"""
1149
1227
 
1150
1228
  def _self_heal(self, error):
1151
1229
  """Attempt self-healing when errors occur"""
@@ -1192,7 +1270,6 @@ When you learn something valuable during conversations:
1192
1270
 
1193
1271
  def _is_socket_available(self, socket_path):
1194
1272
  """Check if a Unix socket is available"""
1195
- import os
1196
1273
 
1197
1274
  # If socket file doesn't exist, it's available
1198
1275
  if not os.path.exists(socket_path):
@@ -1430,11 +1507,11 @@ When you learn something valuable during conversations:
1430
1507
  def restart(self):
1431
1508
  """Restart the agent"""
1432
1509
  print("\n🦆 Restarting...")
1510
+ logger.debug("\n🦆 Restarting...")
1433
1511
  self.__init__()
1434
1512
 
1435
1513
  def _start_file_watcher(self):
1436
1514
  """Start background file watcher for auto hot-reload"""
1437
- import threading
1438
1515
 
1439
1516
  logger.info("Starting file watcher for hot-reload")
1440
1517
  # Get the path to this file
@@ -1534,6 +1611,7 @@ When you learn something valuable during conversations:
1534
1611
  self._watcher_running = False
1535
1612
 
1536
1613
  print("\n🦆 Restarting process with fresh code...")
1614
+ logger.debug("\n🦆 Restarting process with fresh code...")
1537
1615
 
1538
1616
  # Restart the entire Python process
1539
1617
  # This ensures all code is freshly loaded
@@ -1677,7 +1755,7 @@ def interactive():
1677
1755
  print(f"📝 Logs: {LOG_DIR}")
1678
1756
  print("Type 'exit', 'quit', or 'q' to quit.")
1679
1757
  print("Prefix with ! to run shell commands (e.g., ! ls -la)")
1680
- print("-" * 50)
1758
+ print("\n\n")
1681
1759
  logger.info("Interactive mode started")
1682
1760
 
1683
1761
  # Set up prompt_toolkit with history
@@ -1705,7 +1783,6 @@ def interactive():
1705
1783
  auto_suggest=AutoSuggestFromHistory(),
1706
1784
  completer=completer,
1707
1785
  complete_while_typing=True,
1708
- mouse_support=False, # breaks scrolling when enabled
1709
1786
  )
1710
1787
 
1711
1788
  # Reset interrupt count on successful prompt
@@ -1733,6 +1810,10 @@ def interactive():
1733
1810
  )
1734
1811
  devduck._agent_executing = False
1735
1812
 
1813
+ # Reset terminal to fix rendering issues after command output
1814
+ print("\r", end="", flush=True)
1815
+ sys.stdout.flush()
1816
+
1736
1817
  # Append shell command to history
1737
1818
  append_to_shell_history(q, result["content"][0]["text"])
1738
1819
 
@@ -1747,6 +1828,9 @@ def interactive():
1747
1828
  except Exception as e:
1748
1829
  devduck._agent_executing = False # Reset on error
1749
1830
  print(f"🦆 Shell command error: {e}")
1831
+ # Reset terminal on error too
1832
+ print("\r", end="", flush=True)
1833
+ sys.stdout.flush()
1750
1834
  continue
1751
1835
 
1752
1836
  # Execute the agent with user input
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '1.1.0'
32
- __version_tuple__ = version_tuple = (1, 1, 0)
31
+ __version__ = version = '1.1.3'
32
+ __version_tuple__ = version_tuple = (1, 1, 3)
33
33
 
34
- __commit_id__ = commit_id = 'g6a4263e8c'
34
+ __commit_id__ = commit_id = 'gacfc41193'
@@ -0,0 +1,47 @@
1
+ """
2
+ DevDuck Tools Package
3
+
4
+ This module exports all available tools for devduck.
5
+ """
6
+
7
+ from .agentcore_agents import agentcore_agents
8
+ from .agentcore_config import agentcore_config
9
+ from .agentcore_invoke import agentcore_invoke
10
+ from .agentcore_logs import agentcore_logs
11
+ from .ambient import ambient
12
+ from .create_subagent import create_subagent
13
+ from .fetch_github_tool import fetch_github_tool
14
+ from .install_tools import install_tools
15
+ from .ipc import ipc
16
+ from .mcp_server import mcp_server
17
+ from .scraper import scraper
18
+ from .speech_to_speech import speech_to_speech
19
+ from .state_manager import state_manager
20
+ from .store_in_kb import store_in_kb
21
+ from .system_prompt import system_prompt
22
+ from .tcp import tcp
23
+ from .tray import tray
24
+ from .use_github import use_github
25
+ from .websocket import websocket
26
+
27
+ __all__ = [
28
+ "agentcore_agents",
29
+ "agentcore_config",
30
+ "agentcore_invoke",
31
+ "agentcore_logs",
32
+ "ambient",
33
+ "create_subagent",
34
+ "fetch_github_tool",
35
+ "install_tools",
36
+ "ipc",
37
+ "mcp_server",
38
+ "scraper",
39
+ "speech_to_speech",
40
+ "state_manager",
41
+ "store_in_kb",
42
+ "system_prompt",
43
+ "tcp",
44
+ "tray",
45
+ "use_github",
46
+ "websocket",
47
+ ]
@@ -102,15 +102,21 @@ class SpeechSession:
102
102
  self,
103
103
  session_id: str,
104
104
  agent: BidiAgent,
105
+ input_device_index: Optional[int] = None,
106
+ output_device_index: Optional[int] = None,
105
107
  ):
106
108
  """Initialize speech session.
107
109
 
108
110
  Args:
109
111
  session_id: Unique session identifier
110
112
  agent: BidiAgent instance
113
+ input_device_index: PyAudio input device index
114
+ output_device_index: PyAudio output device index
111
115
  """
112
116
  self.session_id = session_id
113
117
  self.agent = agent
118
+ self.input_device_index = input_device_index
119
+ self.output_device_index = output_device_index
114
120
  self.active = False
115
121
  self.thread = None
116
122
  self.loop = None
@@ -187,8 +193,11 @@ class SpeechSession:
187
193
  async def _async_session(self) -> None:
188
194
  """Async session management using BidiAudioIO."""
189
195
  try:
190
- # Create audio I/O
191
- audio_io = BidiAudioIO()
196
+ # Create audio I/O with device indices
197
+ audio_io = BidiAudioIO(
198
+ input_device_index=self.input_device_index,
199
+ output_device_index=self.output_device_index,
200
+ )
192
201
 
193
202
  # Run agent with audio I/O
194
203
  await self.agent.run(inputs=[audio_io.input()], outputs=[audio_io.output()])
@@ -207,6 +216,9 @@ def speech_to_speech(
207
216
  tools: Optional[List[str]] = None,
208
217
  agent: Optional[Any] = None,
209
218
  load_history_from: Optional[str] = None,
219
+ inherit_system_prompt: bool = False,
220
+ input_device_index: Optional[int] = None,
221
+ output_device_index: Optional[int] = None,
210
222
  ) -> str:
211
223
  """Start, stop, or manage speech-to-speech conversations.
212
224
 
@@ -221,18 +233,22 @@ def speech_to_speech(
221
233
  - "status": Get session status
222
234
  - "list_history": List saved conversation histories
223
235
  - "read_history": Read a specific conversation history
236
+ - "list_audio_devices": List all available audio input/output devices
224
237
  provider: Model provider to use:
225
238
  - "novasonic": AWS Bedrock Nova Sonic
226
239
  - "openai": OpenAI Realtime API
227
240
  - "gemini_live": Google Gemini Live
228
241
  system_prompt: Custom system prompt for the agent. This will be appended
229
- to the parent agent's system prompt (if available). If not provided,
230
- uses default prompt that encourages tool usage.
242
+ to the parent agent's system prompt (if inherit_system_prompt=True).
243
+ If not provided, uses default prompt that encourages tool usage.
231
244
  session_id: Session identifier:
232
245
  - For "start": Custom ID (auto-generated if not provided)
233
246
  - For "stop": Specific session to stop (stops all if not provided)
234
247
  - For "read_history": Session ID to read history from
235
248
  - For "status": Not used
249
+ inherit_system_prompt: Whether to inherit parent agent's system prompt.
250
+ Set to False to use only the custom system_prompt (useful for OpenAI
251
+ which has 16K token limit). Default: False
236
252
  model_settings: Provider-specific configuration dictionary. Structure:
237
253
  {
238
254
  "model_id": "model-name",
@@ -260,6 +276,10 @@ def speech_to_speech(
260
276
  agent: Parent agent (automatically passed by Strands framework)
261
277
  load_history_from: Optional session ID to load conversation history from
262
278
  when starting a new session (provides context continuity)
279
+ input_device_index: Optional PyAudio input device index. If not specified,
280
+ uses system default. Use action="list_audio_devices" to see available devices.
281
+ output_device_index: Optional PyAudio output device index. If not specified,
282
+ uses system default. Use action="list_audio_devices" to see available devices.
263
283
 
264
284
  Returns:
265
285
  str: Status message with session details or error information
@@ -287,6 +307,9 @@ def speech_to_speech(
287
307
  tools,
288
308
  agent,
289
309
  load_history_from,
310
+ inherit_system_prompt,
311
+ input_device_index,
312
+ output_device_index,
290
313
  )
291
314
  elif action == "stop":
292
315
  return _stop_speech_session(session_id)
@@ -296,6 +319,8 @@ def speech_to_speech(
296
319
  return _list_conversation_histories()
297
320
  elif action == "read_history":
298
321
  return _read_conversation_history(session_id)
322
+ elif action == "list_audio_devices":
323
+ return _list_audio_devices()
299
324
  else:
300
325
  return f"Unknown action: {action}"
301
326
 
@@ -364,6 +389,9 @@ def _start_speech_session(
364
389
  tool_names: Optional[List[str]],
365
390
  parent_agent: Optional[Any],
366
391
  load_history_from: Optional[str],
392
+ inherit_system_prompt: bool,
393
+ input_device_index: Optional[int],
394
+ output_device_index: Optional[int],
367
395
  ) -> str:
368
396
  """Start a speech-to-speech session with full configuration support."""
369
397
  try:
@@ -384,7 +412,7 @@ def _start_speech_session(
384
412
  if provider == "novasonic":
385
413
  # Nova Sonic only available in: us-east-1, eu-north-1, ap-northeast-1
386
414
  default_settings = {
387
- "model_id": "amazon.nova-2-sonic-v1:0",
415
+ "model_id": os.getenv("BIDI_MODEL_ID", "amazon.nova-2-sonic-v1:0"),
388
416
  "provider_config": {
389
417
  "audio": {
390
418
  "voice": "tiffany",
@@ -421,7 +449,7 @@ def _start_speech_session(
421
449
  elif provider == "openai":
422
450
  # Read API key from environment if not provided in model_settings
423
451
  default_settings = {
424
- "model_id": "gpt-realtime",
452
+ "model_id": os.getenv("BIDI_MODEL_ID", "gpt-realtime"),
425
453
  "client_config": {
426
454
  "api_key": os.getenv("OPENAI_API_KEY"),
427
455
  },
@@ -457,7 +485,9 @@ def _start_speech_session(
457
485
  api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
458
486
 
459
487
  default_settings = {
460
- "model_id": "gemini-2.5-flash-native-audio-preview-09-2025",
488
+ "model_id": os.getenv(
489
+ "BIDI_MODEL_ID", "gemini-2.5-flash-native-audio-preview-09-2025"
490
+ ),
461
491
  "client_config": {
462
492
  "api_key": api_key,
463
493
  },
@@ -545,8 +575,12 @@ def _start_speech_session(
545
575
  # Build system prompt: parent prompt + custom prompt
546
576
  final_system_prompt = ""
547
577
 
548
- # Get parent agent's system prompt if available
549
- if parent_agent and hasattr(parent_agent, "system_prompt"):
578
+ # Get parent agent's system prompt if available and inheritance enabled
579
+ if (
580
+ inherit_system_prompt
581
+ and parent_agent
582
+ and hasattr(parent_agent, "system_prompt")
583
+ ):
550
584
  parent_prompt = parent_agent.system_prompt or ""
551
585
  if parent_prompt:
552
586
  final_system_prompt = parent_prompt
@@ -581,6 +615,8 @@ Keep your voice responses brief and natural."""
581
615
  session = SpeechSession(
582
616
  session_id=session_id,
583
617
  agent=bidi_agent,
618
+ input_device_index=input_device_index,
619
+ output_device_index=output_device_index,
584
620
  )
585
621
 
586
622
  session.start()
@@ -748,3 +784,67 @@ def _read_conversation_history(session_id: Optional[str]) -> str:
748
784
 
749
785
  except Exception as e:
750
786
  return f"❌ Error reading history: {e}"
787
+
788
+
789
+ def _list_audio_devices() -> str:
790
+ """List all available audio input and output devices."""
791
+ try:
792
+ import pyaudio
793
+
794
+ p = pyaudio.PyAudio()
795
+
796
+ lines = ["**Available Audio Devices:**\n"]
797
+
798
+ # List all devices
799
+ device_count = p.get_device_count()
800
+ default_input = p.get_default_input_device_info()["index"]
801
+ default_output = p.get_default_output_device_info()["index"]
802
+
803
+ lines.append(f"Total devices: {device_count}\n")
804
+
805
+ for i in range(device_count):
806
+ try:
807
+ info = p.get_device_info_by_index(i)
808
+ name = info["name"]
809
+ max_input_channels = info["maxInputChannels"]
810
+ max_output_channels = info["maxOutputChannels"]
811
+
812
+ device_type = []
813
+ is_default = []
814
+
815
+ if max_input_channels > 0:
816
+ device_type.append("INPUT")
817
+ if i == default_input:
818
+ is_default.append("default input")
819
+
820
+ if max_output_channels > 0:
821
+ device_type.append("OUTPUT")
822
+ if i == default_output:
823
+ is_default.append("default output")
824
+
825
+ type_str = "/".join(device_type) if device_type else "NONE"
826
+ default_str = f" [{', '.join(is_default)}]" if is_default else ""
827
+
828
+ lines.append(
829
+ f"- **Index {i}:** {name}\n"
830
+ f" Type: {type_str}{default_str}\n"
831
+ f" Input Channels: {max_input_channels}, Output Channels: {max_output_channels}"
832
+ )
833
+
834
+ except Exception as e:
835
+ lines.append(f"- **Index {i}:** Error reading device info - {e}")
836
+
837
+ p.terminate()
838
+
839
+ lines.append(
840
+ "\n**Usage:**\n"
841
+ "To use a specific device, pass the index:\n"
842
+ ' speech_to_speech(action="start", input_device_index=2, output_device_index=5)'
843
+ )
844
+
845
+ return "\n".join(lines)
846
+
847
+ except ImportError:
848
+ return "❌ PyAudio not installed. Install with: pip install pyaudio"
849
+ except Exception as e:
850
+ return f"❌ Error listing audio devices: {e}"