devduck 1.1.0__tar.gz → 1.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of devduck might be problematic. Click here for more details.
- {devduck-1.1.0 → devduck-1.1.4}/.github/workflows/agent.yml +2 -2
- {devduck-1.1.0 → devduck-1.1.4}/.gitignore +4 -1
- {devduck-1.1.0 → devduck-1.1.4}/PKG-INFO +2 -1
- {devduck-1.1.0 → devduck-1.1.4}/devduck/__init__.py +99 -15
- {devduck-1.1.0 → devduck-1.1.4}/devduck/_version.py +3 -3
- devduck-1.1.4/devduck/tools/__init__.py +47 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/speech_to_speech.py +109 -9
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/system_prompt.py +276 -153
- {devduck-1.1.0 → devduck-1.1.4}/devduck.egg-info/PKG-INFO +2 -1
- {devduck-1.1.0 → devduck-1.1.4}/devduck.egg-info/SOURCES.txt +5 -2
- {devduck-1.1.0 → devduck-1.1.4}/devduck.egg-info/requires.txt +1 -0
- devduck-1.1.4/docs/android.html +1425 -0
- devduck-1.1.4/docs/devduck_tools_config.json +671 -0
- {devduck-1.1.0 → devduck-1.1.4}/pyproject.toml +1 -0
- devduck-1.1.4/tools/fetch_github_tool.py +201 -0
- devduck-1.1.0/devduck/tools/__init__.py +0 -55
- {devduck-1.1.0 → devduck-1.1.4}/LICENSE +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/MANIFEST.in +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/README.md +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/action.yml +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/agent_runner.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/__main__.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/agentcore_handler.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/test_redduck.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/_ambient_input.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/_tray_app.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/agentcore_agents.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/agentcore_config.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/agentcore_invoke.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/agentcore_logs.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/ambient.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/create_subagent.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4/devduck}/tools/fetch_github_tool.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/install_tools.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/ipc.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/mcp_server.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4/devduck}/tools/scraper.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/state_manager.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/store_in_kb.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/tcp.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/tray.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/use_github.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck/tools/websocket.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck.egg-info/dependency_links.txt +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck.egg-info/entry_points.txt +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/devduck.egg-info/top_level.txt +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/docs/index.html +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/docs/mac-os-tray.jpg +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/requirements.txt +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/setup-aws-oidc.sh +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/setup.cfg +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/test.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/tools/__init__.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/tools/gist.py +0 -0
- {devduck-1.1.0 → devduck-1.1.4}/tools/github_tools.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: devduck
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.4
|
|
4
4
|
Summary: 🦆 Extreme minimalist self-adapting AI agent - one file, self-healing, runtime dependencies
|
|
5
5
|
Author-email: Cagatay Cali <cagataycali@icloud.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -40,6 +40,7 @@ Requires-Dist: beautifulsoup4
|
|
|
40
40
|
Requires-Dist: colorama
|
|
41
41
|
Requires-Dist: websockets
|
|
42
42
|
Requires-Dist: strands-mcp-server
|
|
43
|
+
Requires-Dist: strands-google
|
|
43
44
|
Requires-Dist: bedrock-agentcore-starter-toolkit
|
|
44
45
|
Requires-Dist: bedrock-agentcore
|
|
45
46
|
Requires-Dist: rumps; sys_platform == "darwin"
|
|
@@ -3,19 +3,31 @@
|
|
|
3
3
|
🦆 devduck - extreme minimalist self-adapting agent
|
|
4
4
|
one file. self-healing. runtime dependencies. adaptive.
|
|
5
5
|
"""
|
|
6
|
+
import os
|
|
6
7
|
import sys
|
|
7
8
|
import subprocess
|
|
8
|
-
import
|
|
9
|
+
import threading
|
|
9
10
|
import platform
|
|
10
11
|
import socket
|
|
11
12
|
import logging
|
|
12
13
|
import tempfile
|
|
13
14
|
import time
|
|
14
15
|
import warnings
|
|
16
|
+
import json
|
|
15
17
|
from pathlib import Path
|
|
16
18
|
from datetime import datetime
|
|
17
19
|
from typing import Dict, Any
|
|
18
20
|
from logging.handlers import RotatingFileHandler
|
|
21
|
+
from strands import Agent, tool
|
|
22
|
+
|
|
23
|
+
# Import system prompt helper for loading prompts from files
|
|
24
|
+
try:
|
|
25
|
+
from devduck.tools.system_prompt import _get_system_prompt
|
|
26
|
+
except ImportError:
|
|
27
|
+
# Fallback if tools module not available yet
|
|
28
|
+
def _get_system_prompt(repository=None, variable_name="SYSTEM_PROMPT"):
|
|
29
|
+
return os.getenv(variable_name, "")
|
|
30
|
+
|
|
19
31
|
|
|
20
32
|
warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
|
|
21
33
|
warnings.filterwarnings("ignore", message=".*cache_prompt is deprecated.*")
|
|
@@ -594,7 +606,7 @@ class DevDuck:
|
|
|
594
606
|
servers = {
|
|
595
607
|
"tcp": {
|
|
596
608
|
"port": int(os.getenv("DEVDUCK_TCP_PORT", "9999")),
|
|
597
|
-
"enabled": os.getenv("DEVDUCK_ENABLE_TCP", "
|
|
609
|
+
"enabled": os.getenv("DEVDUCK_ENABLE_TCP", "false").lower()
|
|
598
610
|
== "true",
|
|
599
611
|
},
|
|
600
612
|
"ws": {
|
|
@@ -604,25 +616,93 @@ class DevDuck:
|
|
|
604
616
|
},
|
|
605
617
|
"mcp": {
|
|
606
618
|
"port": int(os.getenv("DEVDUCK_MCP_PORT", "8000")),
|
|
607
|
-
"enabled": os.getenv("DEVDUCK_ENABLE_MCP", "
|
|
619
|
+
"enabled": os.getenv("DEVDUCK_ENABLE_MCP", "false").lower()
|
|
608
620
|
== "true",
|
|
609
621
|
},
|
|
610
622
|
"ipc": {
|
|
611
623
|
"socket_path": os.getenv(
|
|
612
624
|
"DEVDUCK_IPC_SOCKET", "/tmp/devduck_main.sock"
|
|
613
625
|
),
|
|
614
|
-
"enabled": os.getenv("DEVDUCK_ENABLE_IPC", "
|
|
626
|
+
"enabled": os.getenv("DEVDUCK_ENABLE_IPC", "false").lower()
|
|
615
627
|
== "true",
|
|
616
628
|
},
|
|
617
629
|
}
|
|
618
630
|
|
|
619
|
-
|
|
631
|
+
# Show server configuration status
|
|
632
|
+
enabled_servers = []
|
|
633
|
+
disabled_servers = []
|
|
634
|
+
for server_name, config in servers.items():
|
|
635
|
+
if config.get("enabled", False):
|
|
636
|
+
if "port" in config:
|
|
637
|
+
enabled_servers.append(
|
|
638
|
+
f"{server_name.upper()}:{config['port']}"
|
|
639
|
+
)
|
|
640
|
+
else:
|
|
641
|
+
enabled_servers.append(server_name.upper())
|
|
642
|
+
else:
|
|
643
|
+
disabled_servers.append(server_name.upper())
|
|
620
644
|
|
|
621
|
-
|
|
645
|
+
logger.debug(
|
|
646
|
+
f"🦆 Server config: {', '.join(enabled_servers) if enabled_servers else 'none enabled'}"
|
|
647
|
+
)
|
|
648
|
+
if disabled_servers:
|
|
649
|
+
logger.debug(f"🦆 Disabled: {', '.join(disabled_servers)}")
|
|
650
|
+
|
|
651
|
+
self.servers = servers
|
|
622
652
|
|
|
623
653
|
# Load tools with flexible configuration
|
|
624
|
-
# Default tool config
|
|
625
|
-
|
|
654
|
+
# Default tool config
|
|
655
|
+
# Agent can load additional tools on-demand via fetch_github_tool
|
|
656
|
+
|
|
657
|
+
# 🔧 Available DevDuck Tools (load on-demand):
|
|
658
|
+
# - system_prompt: https://github.com/cagataycali/devduck/blob/main/devduck/tools/system_prompt.py
|
|
659
|
+
# - store_in_kb: https://github.com/cagataycali/devduck/blob/main/devduck/tools/store_in_kb.py
|
|
660
|
+
# - ipc: https://github.com/cagataycali/devduck/blob/main/devduck/tools/ipc.py
|
|
661
|
+
# - tcp: https://github.com/cagataycali/devduck/blob/main/devduck/tools/tcp.py
|
|
662
|
+
# - websocket: https://github.com/cagataycali/devduck/blob/main/devduck/tools/websocket.py
|
|
663
|
+
# - mcp_server: https://github.com/cagataycali/devduck/blob/main/devduck/tools/mcp_server.py
|
|
664
|
+
# - scraper: https://github.com/cagataycali/devduck/blob/main/devduck/tools/scraper.py
|
|
665
|
+
# - tray: https://github.com/cagataycali/devduck/blob/main/devduck/tools/tray.py
|
|
666
|
+
# - ambient: https://github.com/cagataycali/devduck/blob/main/devduck/tools/ambient.py
|
|
667
|
+
# - agentcore_config: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_config.py
|
|
668
|
+
# - agentcore_invoke: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_invoke.py
|
|
669
|
+
# - agentcore_logs: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_logs.py
|
|
670
|
+
# - agentcore_agents: https://github.com/cagataycali/devduck/blob/main/devduck/tools/agentcore_agents.py
|
|
671
|
+
# - create_subagent: https://github.com/cagataycali/devduck/blob/main/devduck/tools/create_subagent.py
|
|
672
|
+
# - use_github: https://github.com/cagataycali/devduck/blob/main/devduck/tools/use_github.py
|
|
673
|
+
# - speech_to_speech: https://github.com/cagataycali/devduck/blob/main/devduck/tools/speech_to_speech.py
|
|
674
|
+
# - state_manager: https://github.com/cagataycali/devduck/blob/main/devduck/tools/state_manager.py
|
|
675
|
+
|
|
676
|
+
# 📦 Strands Tools
|
|
677
|
+
# - editor, file_read, file_write, image_reader, load_tool, retrieve
|
|
678
|
+
# - calculator, use_agent, environment, mcp_client, speak, slack
|
|
679
|
+
|
|
680
|
+
# 🎮 Strands Fun Tools
|
|
681
|
+
# - listen, cursor, clipboard, screen_reader, bluetooth, yolo_vision
|
|
682
|
+
|
|
683
|
+
# 🔍 Strands Google
|
|
684
|
+
# - use_google, google_auth
|
|
685
|
+
|
|
686
|
+
# 🔧 Auto-append server tools based on enabled servers
|
|
687
|
+
server_tools_needed = []
|
|
688
|
+
if servers.get("tcp", {}).get("enabled", False):
|
|
689
|
+
server_tools_needed.append("tcp")
|
|
690
|
+
if servers.get("ws", {}).get("enabled", True):
|
|
691
|
+
server_tools_needed.append("websocket")
|
|
692
|
+
if servers.get("mcp", {}).get("enabled", False):
|
|
693
|
+
server_tools_needed.append("mcp_server")
|
|
694
|
+
if servers.get("ipc", {}).get("enabled", False):
|
|
695
|
+
server_tools_needed.append("ipc")
|
|
696
|
+
|
|
697
|
+
# Append to default tools if any server tools are needed
|
|
698
|
+
if server_tools_needed:
|
|
699
|
+
server_tools_str = ",".join(server_tools_needed)
|
|
700
|
+
default_tools = f"devduck.tools:system_prompt,fetch_github_tool,{server_tools_str};strands_tools:shell"
|
|
701
|
+
logger.info(f"Auto-added server tools: {server_tools_str}")
|
|
702
|
+
else:
|
|
703
|
+
default_tools = (
|
|
704
|
+
"devduck.tools:system_prompt,fetch_github_tool;strands_tools:shell"
|
|
705
|
+
)
|
|
626
706
|
|
|
627
707
|
tools_config = os.getenv("DEVDUCK_TOOLS", default_tools)
|
|
628
708
|
logger.info(f"Loading tools from config: {tools_config}")
|
|
@@ -776,8 +856,6 @@ class DevDuck:
|
|
|
776
856
|
Returns:
|
|
777
857
|
List of MCPClient instances ready for direct use in Agent
|
|
778
858
|
"""
|
|
779
|
-
import json
|
|
780
|
-
|
|
781
859
|
mcp_servers_json = os.getenv("MCP_SERVERS")
|
|
782
860
|
if not mcp_servers_json:
|
|
783
861
|
logger.debug("No MCP_SERVERS environment variable found")
|
|
@@ -1145,7 +1223,7 @@ When you learn something valuable during conversations:
|
|
|
1145
1223
|
- Communication: **MINIMAL WORDS**
|
|
1146
1224
|
- Efficiency: **Speed is paramount**
|
|
1147
1225
|
|
|
1148
|
-
{
|
|
1226
|
+
{_get_system_prompt()}"""
|
|
1149
1227
|
|
|
1150
1228
|
def _self_heal(self, error):
|
|
1151
1229
|
"""Attempt self-healing when errors occur"""
|
|
@@ -1192,7 +1270,6 @@ When you learn something valuable during conversations:
|
|
|
1192
1270
|
|
|
1193
1271
|
def _is_socket_available(self, socket_path):
|
|
1194
1272
|
"""Check if a Unix socket is available"""
|
|
1195
|
-
import os
|
|
1196
1273
|
|
|
1197
1274
|
# If socket file doesn't exist, it's available
|
|
1198
1275
|
if not os.path.exists(socket_path):
|
|
@@ -1430,11 +1507,11 @@ When you learn something valuable during conversations:
|
|
|
1430
1507
|
def restart(self):
|
|
1431
1508
|
"""Restart the agent"""
|
|
1432
1509
|
print("\n🦆 Restarting...")
|
|
1510
|
+
logger.debug("\n🦆 Restarting...")
|
|
1433
1511
|
self.__init__()
|
|
1434
1512
|
|
|
1435
1513
|
def _start_file_watcher(self):
|
|
1436
1514
|
"""Start background file watcher for auto hot-reload"""
|
|
1437
|
-
import threading
|
|
1438
1515
|
|
|
1439
1516
|
logger.info("Starting file watcher for hot-reload")
|
|
1440
1517
|
# Get the path to this file
|
|
@@ -1534,6 +1611,7 @@ When you learn something valuable during conversations:
|
|
|
1534
1611
|
self._watcher_running = False
|
|
1535
1612
|
|
|
1536
1613
|
print("\n🦆 Restarting process with fresh code...")
|
|
1614
|
+
logger.debug("\n🦆 Restarting process with fresh code...")
|
|
1537
1615
|
|
|
1538
1616
|
# Restart the entire Python process
|
|
1539
1617
|
# This ensures all code is freshly loaded
|
|
@@ -1677,7 +1755,7 @@ def interactive():
|
|
|
1677
1755
|
print(f"📝 Logs: {LOG_DIR}")
|
|
1678
1756
|
print("Type 'exit', 'quit', or 'q' to quit.")
|
|
1679
1757
|
print("Prefix with ! to run shell commands (e.g., ! ls -la)")
|
|
1680
|
-
print("
|
|
1758
|
+
print("\n\n")
|
|
1681
1759
|
logger.info("Interactive mode started")
|
|
1682
1760
|
|
|
1683
1761
|
# Set up prompt_toolkit with history
|
|
@@ -1705,7 +1783,6 @@ def interactive():
|
|
|
1705
1783
|
auto_suggest=AutoSuggestFromHistory(),
|
|
1706
1784
|
completer=completer,
|
|
1707
1785
|
complete_while_typing=True,
|
|
1708
|
-
mouse_support=False, # breaks scrolling when enabled
|
|
1709
1786
|
)
|
|
1710
1787
|
|
|
1711
1788
|
# Reset interrupt count on successful prompt
|
|
@@ -1733,6 +1810,10 @@ def interactive():
|
|
|
1733
1810
|
)
|
|
1734
1811
|
devduck._agent_executing = False
|
|
1735
1812
|
|
|
1813
|
+
# Reset terminal to fix rendering issues after command output
|
|
1814
|
+
print("\r", end="", flush=True)
|
|
1815
|
+
sys.stdout.flush()
|
|
1816
|
+
|
|
1736
1817
|
# Append shell command to history
|
|
1737
1818
|
append_to_shell_history(q, result["content"][0]["text"])
|
|
1738
1819
|
|
|
@@ -1747,6 +1828,9 @@ def interactive():
|
|
|
1747
1828
|
except Exception as e:
|
|
1748
1829
|
devduck._agent_executing = False # Reset on error
|
|
1749
1830
|
print(f"🦆 Shell command error: {e}")
|
|
1831
|
+
# Reset terminal on error too
|
|
1832
|
+
print("\r", end="", flush=True)
|
|
1833
|
+
sys.stdout.flush()
|
|
1750
1834
|
continue
|
|
1751
1835
|
|
|
1752
1836
|
# Execute the agent with user input
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '1.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (1, 1,
|
|
31
|
+
__version__ = version = '1.1.4'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 1, 4)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g9882068fd'
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DevDuck Tools Package
|
|
3
|
+
|
|
4
|
+
This module exports all available tools for devduck.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .agentcore_agents import agentcore_agents
|
|
8
|
+
from .agentcore_config import agentcore_config
|
|
9
|
+
from .agentcore_invoke import agentcore_invoke
|
|
10
|
+
from .agentcore_logs import agentcore_logs
|
|
11
|
+
from .ambient import ambient
|
|
12
|
+
from .create_subagent import create_subagent
|
|
13
|
+
from .fetch_github_tool import fetch_github_tool
|
|
14
|
+
from .install_tools import install_tools
|
|
15
|
+
from .ipc import ipc
|
|
16
|
+
from .mcp_server import mcp_server
|
|
17
|
+
from .scraper import scraper
|
|
18
|
+
from .speech_to_speech import speech_to_speech
|
|
19
|
+
from .state_manager import state_manager
|
|
20
|
+
from .store_in_kb import store_in_kb
|
|
21
|
+
from .system_prompt import system_prompt
|
|
22
|
+
from .tcp import tcp
|
|
23
|
+
from .tray import tray
|
|
24
|
+
from .use_github import use_github
|
|
25
|
+
from .websocket import websocket
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"agentcore_agents",
|
|
29
|
+
"agentcore_config",
|
|
30
|
+
"agentcore_invoke",
|
|
31
|
+
"agentcore_logs",
|
|
32
|
+
"ambient",
|
|
33
|
+
"create_subagent",
|
|
34
|
+
"fetch_github_tool",
|
|
35
|
+
"install_tools",
|
|
36
|
+
"ipc",
|
|
37
|
+
"mcp_server",
|
|
38
|
+
"scraper",
|
|
39
|
+
"speech_to_speech",
|
|
40
|
+
"state_manager",
|
|
41
|
+
"store_in_kb",
|
|
42
|
+
"system_prompt",
|
|
43
|
+
"tcp",
|
|
44
|
+
"tray",
|
|
45
|
+
"use_github",
|
|
46
|
+
"websocket",
|
|
47
|
+
]
|
|
@@ -102,15 +102,21 @@ class SpeechSession:
|
|
|
102
102
|
self,
|
|
103
103
|
session_id: str,
|
|
104
104
|
agent: BidiAgent,
|
|
105
|
+
input_device_index: Optional[int] = None,
|
|
106
|
+
output_device_index: Optional[int] = None,
|
|
105
107
|
):
|
|
106
108
|
"""Initialize speech session.
|
|
107
109
|
|
|
108
110
|
Args:
|
|
109
111
|
session_id: Unique session identifier
|
|
110
112
|
agent: BidiAgent instance
|
|
113
|
+
input_device_index: PyAudio input device index
|
|
114
|
+
output_device_index: PyAudio output device index
|
|
111
115
|
"""
|
|
112
116
|
self.session_id = session_id
|
|
113
117
|
self.agent = agent
|
|
118
|
+
self.input_device_index = input_device_index
|
|
119
|
+
self.output_device_index = output_device_index
|
|
114
120
|
self.active = False
|
|
115
121
|
self.thread = None
|
|
116
122
|
self.loop = None
|
|
@@ -187,8 +193,11 @@ class SpeechSession:
|
|
|
187
193
|
async def _async_session(self) -> None:
|
|
188
194
|
"""Async session management using BidiAudioIO."""
|
|
189
195
|
try:
|
|
190
|
-
# Create audio I/O
|
|
191
|
-
audio_io = BidiAudioIO(
|
|
196
|
+
# Create audio I/O with device indices
|
|
197
|
+
audio_io = BidiAudioIO(
|
|
198
|
+
input_device_index=self.input_device_index,
|
|
199
|
+
output_device_index=self.output_device_index,
|
|
200
|
+
)
|
|
192
201
|
|
|
193
202
|
# Run agent with audio I/O
|
|
194
203
|
await self.agent.run(inputs=[audio_io.input()], outputs=[audio_io.output()])
|
|
@@ -207,6 +216,9 @@ def speech_to_speech(
|
|
|
207
216
|
tools: Optional[List[str]] = None,
|
|
208
217
|
agent: Optional[Any] = None,
|
|
209
218
|
load_history_from: Optional[str] = None,
|
|
219
|
+
inherit_system_prompt: bool = False,
|
|
220
|
+
input_device_index: Optional[int] = None,
|
|
221
|
+
output_device_index: Optional[int] = None,
|
|
210
222
|
) -> str:
|
|
211
223
|
"""Start, stop, or manage speech-to-speech conversations.
|
|
212
224
|
|
|
@@ -221,18 +233,22 @@ def speech_to_speech(
|
|
|
221
233
|
- "status": Get session status
|
|
222
234
|
- "list_history": List saved conversation histories
|
|
223
235
|
- "read_history": Read a specific conversation history
|
|
236
|
+
- "list_audio_devices": List all available audio input/output devices
|
|
224
237
|
provider: Model provider to use:
|
|
225
238
|
- "novasonic": AWS Bedrock Nova Sonic
|
|
226
239
|
- "openai": OpenAI Realtime API
|
|
227
240
|
- "gemini_live": Google Gemini Live
|
|
228
241
|
system_prompt: Custom system prompt for the agent. This will be appended
|
|
229
|
-
to the parent agent's system prompt (if
|
|
230
|
-
uses default prompt that encourages tool usage.
|
|
242
|
+
to the parent agent's system prompt (if inherit_system_prompt=True).
|
|
243
|
+
If not provided, uses default prompt that encourages tool usage.
|
|
231
244
|
session_id: Session identifier:
|
|
232
245
|
- For "start": Custom ID (auto-generated if not provided)
|
|
233
246
|
- For "stop": Specific session to stop (stops all if not provided)
|
|
234
247
|
- For "read_history": Session ID to read history from
|
|
235
248
|
- For "status": Not used
|
|
249
|
+
inherit_system_prompt: Whether to inherit parent agent's system prompt.
|
|
250
|
+
Set to False to use only the custom system_prompt (useful for OpenAI
|
|
251
|
+
which has 16K token limit). Default: False
|
|
236
252
|
model_settings: Provider-specific configuration dictionary. Structure:
|
|
237
253
|
{
|
|
238
254
|
"model_id": "model-name",
|
|
@@ -260,6 +276,10 @@ def speech_to_speech(
|
|
|
260
276
|
agent: Parent agent (automatically passed by Strands framework)
|
|
261
277
|
load_history_from: Optional session ID to load conversation history from
|
|
262
278
|
when starting a new session (provides context continuity)
|
|
279
|
+
input_device_index: Optional PyAudio input device index. If not specified,
|
|
280
|
+
uses system default. Use action="list_audio_devices" to see available devices.
|
|
281
|
+
output_device_index: Optional PyAudio output device index. If not specified,
|
|
282
|
+
uses system default. Use action="list_audio_devices" to see available devices.
|
|
263
283
|
|
|
264
284
|
Returns:
|
|
265
285
|
str: Status message with session details or error information
|
|
@@ -287,6 +307,9 @@ def speech_to_speech(
|
|
|
287
307
|
tools,
|
|
288
308
|
agent,
|
|
289
309
|
load_history_from,
|
|
310
|
+
inherit_system_prompt,
|
|
311
|
+
input_device_index,
|
|
312
|
+
output_device_index,
|
|
290
313
|
)
|
|
291
314
|
elif action == "stop":
|
|
292
315
|
return _stop_speech_session(session_id)
|
|
@@ -296,6 +319,8 @@ def speech_to_speech(
|
|
|
296
319
|
return _list_conversation_histories()
|
|
297
320
|
elif action == "read_history":
|
|
298
321
|
return _read_conversation_history(session_id)
|
|
322
|
+
elif action == "list_audio_devices":
|
|
323
|
+
return _list_audio_devices()
|
|
299
324
|
else:
|
|
300
325
|
return f"Unknown action: {action}"
|
|
301
326
|
|
|
@@ -364,6 +389,9 @@ def _start_speech_session(
|
|
|
364
389
|
tool_names: Optional[List[str]],
|
|
365
390
|
parent_agent: Optional[Any],
|
|
366
391
|
load_history_from: Optional[str],
|
|
392
|
+
inherit_system_prompt: bool,
|
|
393
|
+
input_device_index: Optional[int],
|
|
394
|
+
output_device_index: Optional[int],
|
|
367
395
|
) -> str:
|
|
368
396
|
"""Start a speech-to-speech session with full configuration support."""
|
|
369
397
|
try:
|
|
@@ -384,7 +412,7 @@ def _start_speech_session(
|
|
|
384
412
|
if provider == "novasonic":
|
|
385
413
|
# Nova Sonic only available in: us-east-1, eu-north-1, ap-northeast-1
|
|
386
414
|
default_settings = {
|
|
387
|
-
"model_id": "amazon.nova-2-sonic-v1:0",
|
|
415
|
+
"model_id": os.getenv("BIDI_MODEL_ID", "amazon.nova-2-sonic-v1:0"),
|
|
388
416
|
"provider_config": {
|
|
389
417
|
"audio": {
|
|
390
418
|
"voice": "tiffany",
|
|
@@ -421,7 +449,7 @@ def _start_speech_session(
|
|
|
421
449
|
elif provider == "openai":
|
|
422
450
|
# Read API key from environment if not provided in model_settings
|
|
423
451
|
default_settings = {
|
|
424
|
-
"model_id": "gpt-realtime",
|
|
452
|
+
"model_id": os.getenv("BIDI_MODEL_ID", "gpt-realtime"),
|
|
425
453
|
"client_config": {
|
|
426
454
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
427
455
|
},
|
|
@@ -457,7 +485,9 @@ def _start_speech_session(
|
|
|
457
485
|
api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
|
|
458
486
|
|
|
459
487
|
default_settings = {
|
|
460
|
-
"model_id":
|
|
488
|
+
"model_id": os.getenv(
|
|
489
|
+
"BIDI_MODEL_ID", "gemini-2.5-flash-native-audio-preview-09-2025"
|
|
490
|
+
),
|
|
461
491
|
"client_config": {
|
|
462
492
|
"api_key": api_key,
|
|
463
493
|
},
|
|
@@ -545,8 +575,12 @@ def _start_speech_session(
|
|
|
545
575
|
# Build system prompt: parent prompt + custom prompt
|
|
546
576
|
final_system_prompt = ""
|
|
547
577
|
|
|
548
|
-
# Get parent agent's system prompt if available
|
|
549
|
-
if
|
|
578
|
+
# Get parent agent's system prompt if available and inheritance enabled
|
|
579
|
+
if (
|
|
580
|
+
inherit_system_prompt
|
|
581
|
+
and parent_agent
|
|
582
|
+
and hasattr(parent_agent, "system_prompt")
|
|
583
|
+
):
|
|
550
584
|
parent_prompt = parent_agent.system_prompt or ""
|
|
551
585
|
if parent_prompt:
|
|
552
586
|
final_system_prompt = parent_prompt
|
|
@@ -581,6 +615,8 @@ Keep your voice responses brief and natural."""
|
|
|
581
615
|
session = SpeechSession(
|
|
582
616
|
session_id=session_id,
|
|
583
617
|
agent=bidi_agent,
|
|
618
|
+
input_device_index=input_device_index,
|
|
619
|
+
output_device_index=output_device_index,
|
|
584
620
|
)
|
|
585
621
|
|
|
586
622
|
session.start()
|
|
@@ -748,3 +784,67 @@ def _read_conversation_history(session_id: Optional[str]) -> str:
|
|
|
748
784
|
|
|
749
785
|
except Exception as e:
|
|
750
786
|
return f"❌ Error reading history: {e}"
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def _list_audio_devices() -> str:
|
|
790
|
+
"""List all available audio input and output devices."""
|
|
791
|
+
try:
|
|
792
|
+
import pyaudio
|
|
793
|
+
|
|
794
|
+
p = pyaudio.PyAudio()
|
|
795
|
+
|
|
796
|
+
lines = ["**Available Audio Devices:**\n"]
|
|
797
|
+
|
|
798
|
+
# List all devices
|
|
799
|
+
device_count = p.get_device_count()
|
|
800
|
+
default_input = p.get_default_input_device_info()["index"]
|
|
801
|
+
default_output = p.get_default_output_device_info()["index"]
|
|
802
|
+
|
|
803
|
+
lines.append(f"Total devices: {device_count}\n")
|
|
804
|
+
|
|
805
|
+
for i in range(device_count):
|
|
806
|
+
try:
|
|
807
|
+
info = p.get_device_info_by_index(i)
|
|
808
|
+
name = info["name"]
|
|
809
|
+
max_input_channels = info["maxInputChannels"]
|
|
810
|
+
max_output_channels = info["maxOutputChannels"]
|
|
811
|
+
|
|
812
|
+
device_type = []
|
|
813
|
+
is_default = []
|
|
814
|
+
|
|
815
|
+
if max_input_channels > 0:
|
|
816
|
+
device_type.append("INPUT")
|
|
817
|
+
if i == default_input:
|
|
818
|
+
is_default.append("default input")
|
|
819
|
+
|
|
820
|
+
if max_output_channels > 0:
|
|
821
|
+
device_type.append("OUTPUT")
|
|
822
|
+
if i == default_output:
|
|
823
|
+
is_default.append("default output")
|
|
824
|
+
|
|
825
|
+
type_str = "/".join(device_type) if device_type else "NONE"
|
|
826
|
+
default_str = f" [{', '.join(is_default)}]" if is_default else ""
|
|
827
|
+
|
|
828
|
+
lines.append(
|
|
829
|
+
f"- **Index {i}:** {name}\n"
|
|
830
|
+
f" Type: {type_str}{default_str}\n"
|
|
831
|
+
f" Input Channels: {max_input_channels}, Output Channels: {max_output_channels}"
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
except Exception as e:
|
|
835
|
+
lines.append(f"- **Index {i}:** Error reading device info - {e}")
|
|
836
|
+
|
|
837
|
+
p.terminate()
|
|
838
|
+
|
|
839
|
+
lines.append(
|
|
840
|
+
"\n**Usage:**\n"
|
|
841
|
+
"To use a specific device, pass the index:\n"
|
|
842
|
+
' speech_to_speech(action="start", input_device_index=2, output_device_index=5)'
|
|
843
|
+
)
|
|
844
|
+
|
|
845
|
+
return "\n".join(lines)
|
|
846
|
+
|
|
847
|
+
except ImportError:
|
|
848
|
+
return "❌ PyAudio not installed. Install with: pip install pyaudio"
|
|
849
|
+
except Exception as e:
|
|
850
|
+
return f"❌ Error listing audio devices: {e}"
|