devduck 0.7.2__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of devduck might be problematic. Click here for more details.

devduck/__init__.py CHANGED
@@ -189,18 +189,7 @@ def manage_tools_func(
189
189
  tool_names: str = None,
190
190
  tool_path: str = None,
191
191
  ) -> Dict[str, Any]:
192
- """
193
- Manage the agent's tool set at runtime using ToolRegistry.
194
-
195
- Args:
196
- action: Action to perform - "list", "add", "remove", "reload"
197
- package: Package name to load tools from (e.g., "strands_tools", "strands_fun_tools")
198
- tool_names: Comma-separated tool names (e.g., "shell,editor,calculator")
199
- tool_path: Path to a .py file to load as a tool
200
-
201
- Returns:
202
- Dict with status and content
203
- """
192
+ """Manage the agent's tool set at runtime - add, remove, list, reload tools on the fly."""
204
193
  try:
205
194
  if not hasattr(devduck, "agent") or not devduck.agent:
206
195
  return {"status": "error", "content": [{"text": "Agent not initialized"}]}
@@ -633,7 +622,7 @@ class DevDuck:
633
622
 
634
623
  # Load tools with flexible configuration
635
624
  # Default tool config - user can override with DEVDUCK_TOOLS env var
636
- default_tools = "devduck.tools:system_prompt,store_in_kb,ipc,tcp,websocket,mcp_server,state_manager,tray,ambient,agentcore_config,agentcore_invoke,agentcore_logs,agentcore_agents,install_tools,create_subagent,use_github:strands_tools:shell,editor,file_read,file_write,image_reader,load_tool,retrieve,calculator,use_agent,environment,mcp_client,speak,slack:strands_fun_tools:listen,cursor,clipboard,screen_reader,bluetooth,yolo_vision"
625
+ default_tools = "devduck.tools:system_prompt,store_in_kb,ipc,tcp,websocket,mcp_server,state_manager,tray,ambient,agentcore_config,agentcore_invoke,agentcore_logs,agentcore_agents,install_tools,create_subagent,use_github;strands_tools:shell,editor,file_read,file_write,image_reader,load_tool,retrieve,calculator,use_agent,environment,mcp_client,speak,slack;strands_fun_tools:listen,cursor,clipboard,screen_reader,bluetooth,yolo_vision"
637
626
 
638
627
  tools_config = os.getenv("DEVDUCK_TOOLS", default_tools)
639
628
  logger.info(f"Loading tools from config: {tools_config}")
@@ -657,7 +646,18 @@ class DevDuck:
657
646
  tool_names: str = None,
658
647
  tool_path: str = None,
659
648
  ) -> Dict[str, Any]:
660
- """Manage the agent's tool set at runtime - add, remove, list, reload tools on the fly."""
649
+ """
650
+ Manage the agent's tool set at runtime using ToolRegistry.
651
+
652
+ Args:
653
+ action: Action to perform - "list", "add", "remove", "reload"
654
+ package: Package name to load tools from (e.g., "strands_tools", "strands_fun_tools") or "devduck.tools:speech_to_speech,system_prompt,..."
655
+ tool_names: Comma-separated tool names (e.g., "shell,editor,calculator")
656
+ tool_path: Path to a .py file to load as a tool
657
+
658
+ Returns:
659
+ Dict with status and content
660
+ """
661
661
  return manage_tools_func(action, package, tool_names, tool_path)
662
662
 
663
663
  # Add built-in tools to the toolset
@@ -715,39 +715,39 @@ class DevDuck:
715
715
  """
716
716
  Load tools based on DEVDUCK_TOOLS configuration.
717
717
 
718
- Format: package:tool1,tool2:package2:tool3
719
- Example: strands_tools:shell,editor:strands_fun_tools:clipboard
718
+ Format: package1:tool1,tool2;package2:tool3,tool4
719
+ Examples:
720
+ - strands_tools:shell,editor;strands_action:use_github
721
+ - strands_action:use_github;strands_tools:shell,use_aws
720
722
 
721
723
  Note: Only loads what's specified in config - no automatic additions
722
724
  """
723
725
  tools = []
724
- current_package = None
725
-
726
- for segment in config.split(":"):
727
- segment = segment.strip()
728
-
729
- # Check if segment is a package name (contains '.' or '_' and no ',')
730
- is_package = "," not in segment and ("." in segment or "_" in segment)
731
-
732
- if is_package:
733
- # This is a package name - set as current package
734
- current_package = segment
735
- logger.debug(f"Switched to package: {current_package}")
736
- elif "," in segment:
737
- # Tool list from current package
738
- if current_package:
739
- for tool_name in segment.split(","):
740
- tool_name = tool_name.strip()
741
- tool = self._load_single_tool(current_package, tool_name)
742
- if tool:
743
- tools.append(tool)
744
- elif current_package:
745
- # Single tool from current package
746
- tool = self._load_single_tool(current_package, segment)
726
+
727
+ # Split by semicolon to get package groups
728
+ groups = config.split(";")
729
+
730
+ for group in groups:
731
+ group = group.strip()
732
+ if not group:
733
+ continue
734
+
735
+ # Split by colon to get package:tools
736
+ parts = group.split(":", 1)
737
+ if len(parts) != 2:
738
+ logger.warning(f"Invalid format: {group}")
739
+ continue
740
+
741
+ package = parts[0].strip()
742
+ tools_str = parts[1].strip()
743
+
744
+ # Parse tools (comma-separated)
745
+ tool_names = [t.strip() for t in tools_str.split(",") if t.strip()]
746
+
747
+ for tool_name in tool_names:
748
+ tool = self._load_single_tool(package, tool_name)
747
749
  if tool:
748
750
  tools.append(tool)
749
- else:
750
- logger.warning(f"Skipping segment '{segment}' - no package set")
751
751
 
752
752
  logger.info(f"Loaded {len(tools)} tools from configuration")
753
753
  return tools
@@ -1094,9 +1094,11 @@ You have full access to your own source code for self-awareness and self-modific
1094
1094
 
1095
1095
  ## Tool Configuration:
1096
1096
  Set DEVDUCK_TOOLS for custom tools:
1097
- - Format: package:tool1,tool2:package2:tool3
1098
- - Example: strands_tools:shell,editor:strands_fun_tools:clipboard
1097
+ - Format: package1:tool1,tool2;package2:tool3,tool4
1098
+ - Example: strands_tools:shell,editor;strands_fun_tools:clipboard
1099
1099
  - Tools are filtered - only specified tools are loaded
1100
+ - Load the speech_to_speech tool when it's needed
1101
+ - Offload the tools when you don't need
1100
1102
 
1101
1103
  ## MCP Integration:
1102
1104
  - **Expose as MCP Server** - Use mcp_server() to expose devduck via MCP protocol
devduck/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.7.2'
32
- __version_tuple__ = version_tuple = (0, 7, 2)
31
+ __version__ = version = '1.1.0'
32
+ __version_tuple__ = version_tuple = (1, 1, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -0,0 +1,750 @@
1
+ """Real-time speech-to-speech bidirectional streaming tool for DevDuck.
2
+
3
+ Provides background speech-to-speech conversation capability using Strands
4
+ experimental bidirectional streaming with full model provider support, tool
5
+ inheritance, and comprehensive configuration options.
6
+
7
+ This tool creates isolated bidirectional agent sessions that run in background
8
+ threads, enabling real-time voice conversations with AI models while the parent
9
+ agent remains responsive.
10
+
11
+ Key Features:
12
+ - **Background Execution:** Runs in separate thread - parent agent stays responsive
13
+ - **Real-Time Audio:** Microphone input and speaker output with pyaudio
14
+ - **Tool Inheritance:** Automatically inherits ALL tools from parent agent
15
+ - **System Prompt Inheritance:** Combines parent agent's prompt with custom prompts
16
+ - **Multiple Providers:** Nova Sonic, OpenAI Realtime API, Gemini Live
17
+ - **Full Configuration:** Per-provider custom settings and parameters
18
+ - **Environment API Keys:** Auto-loads API keys from environment variables
19
+ - **Built-in Stop:** Uses SDK's stop_conversation tool for graceful termination
20
+ - **Auto-Interruption:** Built-in VAD for natural conversation flow
21
+ - **Conversation History:** Automatically saves transcripts to files
22
+
23
+ Supported Providers:
24
+ -------------------
25
+ 1. **Nova Sonic (AWS Bedrock):**
26
+ - Region: us-east-1, eu-north-1, ap-northeast-1
27
+ - Model: amazon.nova-2-sonic-v1:0 (configurable)
28
+ - Voices: tiffany, matthew, amy, ambre, florian, beatrice, lorenzo, greta, lennart, lupe, carlos
29
+ - Requires AWS credentials (boto3 credential chain)
30
+
31
+ 2. **OpenAI Realtime API:**
32
+ - Models: gpt-realtime, gpt-4o-realtime-preview (configurable)
33
+ - Requires OPENAI_API_KEY environment variable
34
+ - Custom session config support
35
+
36
+ 3. **Gemini Live:**
37
+ - Model: gemini-2.5-flash-native-audio-preview-09-2025 (configurable)
38
+ - Requires GOOGLE_API_KEY or GEMINI_API_KEY environment variable
39
+ - Live config customization
40
+
41
+ Configuration Examples:
42
+ ----------------------
43
+ # Nova Sonic with custom voice
44
+ model_settings = {
45
+ "provider_config": {
46
+ "audio": {"voice": "matthew"}
47
+ },
48
+ "client_config": {"region": "us-east-1"}
49
+ }
50
+
51
+ # OpenAI Realtime with custom model
52
+ model_settings = {
53
+ "model_id": "gpt-4o-realtime-preview",
54
+ "provider_config": {
55
+ "audio": {"voice": "coral"}
56
+ }
57
+ }
58
+
59
+ # Gemini Live with custom voice
60
+ model_settings = {
61
+ "model_id": "gemini-2.5-flash-native-audio-preview-09-2025",
62
+ "provider_config": {
63
+ "audio": {"voice": "Kore"}
64
+ }
65
+ }
66
+ """
67
+
68
+ import os
69
+ import asyncio
70
+ import tempfile
71
+ import json
72
+ import logging
73
+ import threading
74
+ import traceback
75
+ from datetime import datetime
76
+ from pathlib import Path
77
+ from typing import Any, Dict, List, Optional
78
+
79
+ from strands import tool
80
+ from strands.experimental.bidi.agent.agent import BidiAgent
81
+ from strands.experimental.bidi.models.gemini_live import BidiGeminiLiveModel
82
+ from strands.experimental.bidi.models.nova_sonic import BidiNovaSonicModel
83
+ from strands.experimental.bidi.models.openai_realtime import BidiOpenAIRealtimeModel
84
+ from strands.experimental.bidi.io.audio import BidiAudioIO
85
+
86
+ logger = logging.getLogger(__name__)
87
+
88
+ # Global session tracking
89
+ _active_sessions = {}
90
+ _session_lock = threading.Lock()
91
+
92
+ # Session history storage location
93
+ BASE_DIR = Path(os.getenv("DEVDUCK_HOME", tempfile.gettempdir()))
94
+ HISTORY_DIR = BASE_DIR / ".devduck" / "speech_sessions"
95
+ HISTORY_DIR.mkdir(parents=True, exist_ok=True)
96
+
97
+
98
+ class SpeechSession:
99
+ """Manages a speech-to-speech conversation session with full lifecycle management."""
100
+
101
+ def __init__(
102
+ self,
103
+ session_id: str,
104
+ agent: BidiAgent,
105
+ ):
106
+ """Initialize speech session.
107
+
108
+ Args:
109
+ session_id: Unique session identifier
110
+ agent: BidiAgent instance
111
+ """
112
+ self.session_id = session_id
113
+ self.agent = agent
114
+ self.active = False
115
+ self.thread = None
116
+ self.loop = None
117
+ self.history_file = HISTORY_DIR / f"{session_id}.json"
118
+
119
+ def start(self) -> None:
120
+ """Start the speech session in background thread."""
121
+ if self.active:
122
+ raise ValueError("Session already active")
123
+
124
+ self.active = True
125
+ self.thread = threading.Thread(target=self._run_session, daemon=True)
126
+ self.thread.start()
127
+
128
+ def stop(self) -> None:
129
+ """Stop the speech session and cleanup resources."""
130
+ if not self.active:
131
+ return
132
+
133
+ self.active = False
134
+
135
+ # Stop the bidi agent using its event loop
136
+ if self.loop and self.loop.is_running():
137
+ # Schedule stop in the session's event loop and wait for it
138
+ future = asyncio.run_coroutine_threadsafe(self.agent.stop(), self.loop)
139
+ try:
140
+ # Wait up to 3 seconds for stop to complete
141
+ future.result(timeout=3.0)
142
+ logger.info(
143
+ f"Successfully stopped bidi agent for session {self.session_id}"
144
+ )
145
+ except Exception as e:
146
+ logger.warning(f"Error stopping bidi agent: {e}")
147
+
148
+ if self.thread:
149
+ self.thread.join(timeout=5.0)
150
+
151
+ # Save conversation history after session ends
152
+ self._save_history()
153
+
154
+ def _save_history(self) -> None:
155
+ """Save conversation history to file."""
156
+ try:
157
+ history_data = {
158
+ "session_id": self.session_id,
159
+ "timestamp": datetime.now().isoformat(),
160
+ "messages": self.agent.messages,
161
+ }
162
+
163
+ with open(self.history_file, "w") as f:
164
+ json.dump(history_data, f, indent=2)
165
+
166
+ logger.info(f"Saved conversation history to {self.history_file}")
167
+ except Exception as e:
168
+ logger.error(f"Failed to save history: {e}")
169
+
170
+ def _run_session(self) -> None:
171
+ """Main session runner in background thread."""
172
+ try:
173
+ # Create event loop for this thread
174
+ self.loop = asyncio.new_event_loop()
175
+ asyncio.set_event_loop(self.loop)
176
+
177
+ # Run the async session
178
+ self.loop.run_until_complete(self._async_session())
179
+ except Exception as e:
180
+ error_msg = f"Session error: {e}\n{traceback.format_exc()}"
181
+ logger.debug(error_msg)
182
+ print(f"\n🦆 Session error: {e}")
183
+ finally:
184
+ if self.loop:
185
+ self.loop.close()
186
+
187
+ async def _async_session(self) -> None:
188
+ """Async session management using BidiAudioIO."""
189
+ try:
190
+ # Create audio I/O
191
+ audio_io = BidiAudioIO()
192
+
193
+ # Run agent with audio I/O
194
+ await self.agent.run(inputs=[audio_io.input()], outputs=[audio_io.output()])
195
+
196
+ except Exception as e:
197
+ logger.debug(f"Async session error: {e}\n{traceback.format_exc()}")
198
+
199
+
200
+ @tool
201
+ def speech_to_speech(
202
+ action: str,
203
+ provider: str = "novasonic",
204
+ system_prompt: Optional[str] = None,
205
+ session_id: Optional[str] = None,
206
+ model_settings: Optional[Dict[str, Any]] = None,
207
+ tools: Optional[List[str]] = None,
208
+ agent: Optional[Any] = None,
209
+ load_history_from: Optional[str] = None,
210
+ ) -> str:
211
+ """Start, stop, or manage speech-to-speech conversations.
212
+
213
+ Creates a background bidirectional streaming session for real-time voice
214
+ conversations with AI. Supports full model configuration, tool inheritance,
215
+ and multiple model providers with custom settings.
216
+
217
+ Args:
218
+ action: Action to perform:
219
+ - "start": Start new speech session
220
+ - "stop": Stop session(s)
221
+ - "status": Get session status
222
+ - "list_history": List saved conversation histories
223
+ - "read_history": Read a specific conversation history
224
+ provider: Model provider to use:
225
+ - "novasonic": AWS Bedrock Nova Sonic
226
+ - "openai": OpenAI Realtime API
227
+ - "gemini_live": Google Gemini Live
228
+ system_prompt: Custom system prompt for the agent. This will be appended
229
+ to the parent agent's system prompt (if available). If not provided,
230
+ uses default prompt that encourages tool usage.
231
+ session_id: Session identifier:
232
+ - For "start": Custom ID (auto-generated if not provided)
233
+ - For "stop": Specific session to stop (stops all if not provided)
234
+ - For "read_history": Session ID to read history from
235
+ - For "status": Not used
236
+ model_settings: Provider-specific configuration dictionary. Structure:
237
+ {
238
+ "model_id": "model-name",
239
+ "provider_config": {
240
+ "audio": {"voice": "voice-name"},
241
+ "inference": {...}
242
+ },
243
+ "client_config": {
244
+ "region": "us-east-1", # for Nova Sonic
245
+ "api_key": "key" # for OpenAI/Gemini (auto-loaded from env if not provided)
246
+ }
247
+ }
248
+
249
+ Examples:
250
+ - Nova Sonic with custom voice:
251
+ {"provider_config": {"audio": {"voice": "matthew"}}}
252
+
253
+ - OpenAI with custom model:
254
+ {"model_id": "gpt-4o-realtime-preview"}
255
+
256
+ - Gemini with custom voice:
257
+ {"provider_config": {"audio": {"voice": "Kore"}}}
258
+ tools: List of tool names to make available. If not provided,
259
+ inherits ALL tools from parent agent.
260
+ agent: Parent agent (automatically passed by Strands framework)
261
+ load_history_from: Optional session ID to load conversation history from
262
+ when starting a new session (provides context continuity)
263
+
264
+ Returns:
265
+ str: Status message with session details or error information
266
+
267
+ Environment Variables:
268
+ - OPENAI_API_KEY: Required for OpenAI Realtime API (if not in model_settings)
269
+ - GOOGLE_API_KEY or GEMINI_API_KEY: Required for Gemini Live (if not in model_settings)
270
+ - AWS credentials: Required for Nova Sonic (boto3 default credential chain)
271
+
272
+ Nova Sonic Voice Options:
273
+ - English (US): tiffany (feminine), matthew (masculine)
274
+ - English (GB): amy (feminine)
275
+ - French: ambre (feminine), florian (masculine)
276
+ - Italian: beatrice (feminine), lorenzo (masculine)
277
+ - German: greta (feminine), lennart (masculine)
278
+ - Spanish: lupe (feminine), carlos (masculine)
279
+ """
280
+
281
+ if action == "start":
282
+ return _start_speech_session(
283
+ provider,
284
+ system_prompt,
285
+ session_id,
286
+ model_settings,
287
+ tools,
288
+ agent,
289
+ load_history_from,
290
+ )
291
+ elif action == "stop":
292
+ return _stop_speech_session(session_id)
293
+ elif action == "status":
294
+ return _get_session_status()
295
+ elif action == "list_history":
296
+ return _list_conversation_histories()
297
+ elif action == "read_history":
298
+ return _read_conversation_history(session_id)
299
+ else:
300
+ return f"Unknown action: {action}"
301
+
302
+
303
+ def _create_speech_session_tool(current_session_id: str, bidi_agent: BidiAgent):
304
+ """Create a speech_session tool for the given session.
305
+
306
+ This tool is attached to each bidi agent instance to allow session management
307
+ from within the speech conversation.
308
+ """
309
+
310
+ @tool
311
+ def speech_session(
312
+ action: str,
313
+ session_id: Optional[str] = None,
314
+ ) -> str:
315
+ """Manage the current speech conversation session.
316
+
317
+ Actions:
318
+ - "stop": Stop the current conversation
319
+ - "status": Get session status
320
+ - "list_history": List all saved conversation histories
321
+ - "read_history": Read a specific conversation history
322
+
323
+ Args:
324
+ action: Action to perform
325
+ session_id: Session ID (required for read_history)
326
+
327
+ Returns:
328
+ Status message
329
+ """
330
+ if action == "stop":
331
+ try:
332
+ # Stop the session (which will call bidi_agent.stop() properly)
333
+ with _session_lock:
334
+ if current_session_id in _active_sessions:
335
+ _active_sessions[current_session_id].stop()
336
+ del _active_sessions[current_session_id]
337
+ return "Conversation stopped successfully."
338
+ else:
339
+ return f"Session {current_session_id} not found in active sessions."
340
+ except Exception as e:
341
+ logger.error(f"Error stopping conversation: {e}")
342
+ return f"Error stopping conversation: {e}"
343
+
344
+ elif action == "status":
345
+ return _get_session_status()
346
+
347
+ elif action == "list_history":
348
+ return _list_conversation_histories()
349
+
350
+ elif action == "read_history":
351
+ return _read_conversation_history(session_id)
352
+
353
+ else:
354
+ return f"Unknown action: {action}. Available: stop, status, list_history, read_history"
355
+
356
+ return speech_session
357
+
358
+
359
+ def _start_speech_session(
360
+ provider: str,
361
+ system_prompt: Optional[str],
362
+ session_id: Optional[str],
363
+ model_settings: Optional[Dict[str, Any]],
364
+ tool_names: Optional[List[str]],
365
+ parent_agent: Optional[Any],
366
+ load_history_from: Optional[str],
367
+ ) -> str:
368
+ """Start a speech-to-speech session with full configuration support."""
369
+ try:
370
+ # Generate session ID if not provided
371
+ if not session_id:
372
+ session_id = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
373
+
374
+ # Check if session already exists
375
+ with _session_lock:
376
+ if session_id in _active_sessions:
377
+ return f"❌ Session already exists: {session_id}"
378
+
379
+ # Create model based on provider with custom settings
380
+ model_settings = model_settings or {}
381
+ model_info = f"{provider}"
382
+
383
+ try:
384
+ if provider == "novasonic":
385
+ # Nova Sonic only available in: us-east-1, eu-north-1, ap-northeast-1
386
+ default_settings = {
387
+ "model_id": "amazon.nova-2-sonic-v1:0",
388
+ "provider_config": {
389
+ "audio": {
390
+ "voice": "tiffany",
391
+ },
392
+ },
393
+ "client_config": {"region": "us-east-1"},
394
+ }
395
+
396
+ # Merge user settings with defaults (deep merge for nested dicts)
397
+ if model_settings:
398
+ # Merge top-level keys
399
+ for key, value in model_settings.items():
400
+ if (
401
+ key in default_settings
402
+ and isinstance(default_settings[key], dict)
403
+ and isinstance(value, dict)
404
+ ):
405
+ # Deep merge for nested dicts
406
+ default_settings[key].update(value)
407
+ else:
408
+ default_settings[key] = value
409
+
410
+ model = BidiNovaSonicModel(**default_settings)
411
+ region = default_settings.get("client_config", {}).get(
412
+ "region", "us-east-1"
413
+ )
414
+ voice = (
415
+ default_settings.get("provider_config", {})
416
+ .get("audio", {})
417
+ .get("voice", "tiffany")
418
+ )
419
+ model_info = f"Nova Sonic ({region}, voice: {voice})"
420
+
421
+ elif provider == "openai":
422
+ # Read API key from environment if not provided in model_settings
423
+ default_settings = {
424
+ "model_id": "gpt-realtime",
425
+ "client_config": {
426
+ "api_key": os.getenv("OPENAI_API_KEY"),
427
+ },
428
+ }
429
+
430
+ # Merge user settings
431
+ if model_settings:
432
+ for key, value in model_settings.items():
433
+ if (
434
+ key in default_settings
435
+ and isinstance(default_settings[key], dict)
436
+ and isinstance(value, dict)
437
+ ):
438
+ default_settings[key].update(value)
439
+ else:
440
+ default_settings[key] = value
441
+
442
+ # Check if API key is available
443
+ if not default_settings.get("client_config", {}).get("api_key"):
444
+ return "❌ OpenAI API key not found. Set OPENAI_API_KEY environment variable or provide in model_settings['client_config']['api_key']"
445
+
446
+ model = BidiOpenAIRealtimeModel(**default_settings)
447
+ model_id = default_settings.get("model_id", "gpt-realtime")
448
+ voice = (
449
+ default_settings.get("provider_config", {})
450
+ .get("audio", {})
451
+ .get("voice", "default")
452
+ )
453
+ model_info = f"OpenAI Realtime ({model_id}, voice: {voice})"
454
+
455
+ elif provider == "gemini_live":
456
+ # Read API key from environment if not provided in model_settings
457
+ api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
458
+
459
+ default_settings = {
460
+ "model_id": "gemini-2.5-flash-native-audio-preview-09-2025",
461
+ "client_config": {
462
+ "api_key": api_key,
463
+ },
464
+ }
465
+
466
+ # Merge user settings
467
+ if model_settings:
468
+ for key, value in model_settings.items():
469
+ if (
470
+ key in default_settings
471
+ and isinstance(default_settings[key], dict)
472
+ and isinstance(value, dict)
473
+ ):
474
+ default_settings[key].update(value)
475
+ else:
476
+ default_settings[key] = value
477
+
478
+ # Check if API key is available
479
+ if not default_settings.get("client_config", {}).get("api_key"):
480
+ return "❌ Google/Gemini API key not found. Set GOOGLE_API_KEY or GEMINI_API_KEY environment variable or provide in model_settings['client_config']['api_key']"
481
+
482
+ model = BidiGeminiLiveModel(**default_settings)
483
+ model_id = default_settings.get("model_id", "gemini-2.5-flash-live")
484
+ voice = (
485
+ default_settings.get("provider_config", {})
486
+ .get("audio", {})
487
+ .get("voice", "default")
488
+ )
489
+ model_info = f"Gemini Live ({model_id}, voice: {voice})"
490
+
491
+ else:
492
+ return f"❌ Unknown provider: {provider}. Supported: novasonic, openai, gemini_live"
493
+ except Exception as e:
494
+ return f"❌ Error creating {provider} model: {e}\n\nCheck your configuration and credentials."
495
+
496
+ # Get parent agent's tools
497
+ tools = []
498
+ inherited_count = 0
499
+
500
+ if parent_agent and hasattr(parent_agent, "tool_registry"):
501
+ try:
502
+ # Get all tool functions from parent agent's registry
503
+ registry_dict = parent_agent.tool_registry.registry
504
+
505
+ # If specific tools requested, filter; otherwise inherit all
506
+ if tool_names:
507
+ # User specified tool names - only include those
508
+ for tool_name in tool_names:
509
+ if tool_name not in ["speech_to_speech"]:
510
+ tool_func = registry_dict.get(tool_name)
511
+ if tool_func:
512
+ tools.append(tool_func)
513
+ inherited_count += 1
514
+ else:
515
+ logger.warning(
516
+ f"Tool '{tool_name}' not found in parent agent's registry"
517
+ )
518
+ else:
519
+ # No specific tools - inherit all except excluded
520
+ for tool_name, tool_func in registry_dict.items():
521
+ if tool_name not in ["speech_to_speech"]:
522
+ tools.append(tool_func)
523
+ inherited_count += 1
524
+
525
+ except Exception as e:
526
+ logger.warning(f"Could not inherit tools from parent agent: {e}")
527
+
528
+ # Load conversation history if requested
529
+ messages = None
530
+ if load_history_from:
531
+ history_file = HISTORY_DIR / f"{load_history_from}.json"
532
+ if history_file.exists():
533
+ try:
534
+ with open(history_file, "r") as f:
535
+ history_data = json.load(f)
536
+ messages = history_data.get("messages", [])
537
+ logger.info(
538
+ f"Loaded {len(messages)} messages from {load_history_from}"
539
+ )
540
+ except Exception as e:
541
+ logger.warning(
542
+ f"Failed to load history from {load_history_from}: {e}"
543
+ )
544
+
545
+ # Build system prompt: parent prompt + custom prompt
546
+ final_system_prompt = ""
547
+
548
+ # Get parent agent's system prompt if available
549
+ if parent_agent and hasattr(parent_agent, "system_prompt"):
550
+ parent_prompt = parent_agent.system_prompt or ""
551
+ if parent_prompt:
552
+ final_system_prompt = parent_prompt
553
+
554
+ # Add custom system prompt
555
+ if system_prompt:
556
+ if final_system_prompt:
557
+ final_system_prompt = f"{final_system_prompt}\n\n{system_prompt}"
558
+ else:
559
+ final_system_prompt = system_prompt
560
+
561
+ # Use default system prompt if nothing provided
562
+ if not final_system_prompt:
563
+ final_system_prompt = """You are a helpful AI assistant with access to powerful tools.
564
+ - To stop the conversation → Use speech_session tool with action="stop"
565
+ Keep your voice responses brief and natural."""
566
+
567
+ # Create bidirectional agent with inherited tools (speech_session will be added after)
568
+ bidi_agent = BidiAgent(
569
+ model=model,
570
+ tools=tools,
571
+ system_prompt=final_system_prompt,
572
+ messages=messages,
573
+ )
574
+
575
+ # Create and add speech_session tool to agent's registry
576
+ # This allows user to manage the session from within the conversation
577
+ speech_session_tool = _create_speech_session_tool(session_id, bidi_agent)
578
+ bidi_agent.tool_registry.registry["speech_session"] = speech_session_tool
579
+
580
+ # Create and start session
581
+ session = SpeechSession(
582
+ session_id=session_id,
583
+ agent=bidi_agent,
584
+ )
585
+
586
+ session.start()
587
+
588
+ # Register session
589
+ with _session_lock:
590
+ _active_sessions[session_id] = session
591
+
592
+ # Build settings summary
593
+ settings_summary = ""
594
+ if model_settings:
595
+ settings_lines = []
596
+ for key, value in model_settings.items():
597
+ if key not in ["api_key", "secret"]: # Hide sensitive data
598
+ settings_lines.append(f" - {key}: {value}")
599
+ if settings_lines:
600
+ settings_summary = "\n**Model Settings:**\n" + "\n".join(settings_lines)
601
+
602
+ # Add history info if loaded
603
+ history_info = ""
604
+ if messages:
605
+ history_info = f"\n**Loaded History:** {len(messages)} messages from session '{load_history_from}'"
606
+
607
+ return f"""✅ Speech session started!
608
+
609
+ **Session ID:** {session_id}
610
+ **Provider:** {model_info}
611
+ **Tools:** {inherited_count + 1} tools available (includes speech_session){settings_summary}{history_info}
612
+ **History Location:** {session.history_file}
613
+
614
+ The session is running in the background. Speak into your microphone to interact!
615
+
616
+ **To manage the session during conversation:**
617
+ - Stop: Say "stop the session" or "end conversation"
618
+ - Check status: Say "check session status"
619
+ - List histories: Say "list conversation histories"
620
+
621
+ **External Commands:**
622
+ - Check status: speech_to_speech(action="status")
623
+ - Stop session: speech_to_speech(action="stop", session_id="{session_id}")
624
+ - List histories: speech_to_speech(action="list_history")
625
+ - Read history: speech_to_speech(action="read_history", session_id="{session_id}")
626
+ """
627
+
628
+ except Exception as e:
629
+ logger.error(f"Error starting speech session: {e}\n{traceback.format_exc()}")
630
+ return f"❌ Error starting session: {e}\n\nCheck logs for details."
631
+
632
+
633
+ def _stop_speech_session(session_id: Optional[str]) -> str:
634
+ """Stop a speech session."""
635
+ with _session_lock:
636
+ if not session_id:
637
+ if not _active_sessions:
638
+ return "❌ No active sessions"
639
+ # Stop all sessions
640
+ session_ids = list(_active_sessions.keys())
641
+ for sid in session_ids:
642
+ _active_sessions[sid].stop()
643
+ del _active_sessions[sid]
644
+ return f"✅ Stopped {len(session_ids)} session(s)"
645
+
646
+ if session_id not in _active_sessions:
647
+ return f"❌ Session not found: {session_id}"
648
+
649
+ session = _active_sessions[session_id]
650
+ session.stop()
651
+ del _active_sessions[session_id]
652
+
653
+ return f"✅ Session stopped: {session_id}"
654
+
655
+
656
+ def _get_session_status() -> str:
657
+ """Get status of all active sessions."""
658
+ with _session_lock:
659
+ if not _active_sessions:
660
+ return "No active speech sessions"
661
+
662
+ status_lines = ["**Active Speech Sessions:**\n"]
663
+ for session_id, session in _active_sessions.items():
664
+ status_lines.append(
665
+ f"- **{session_id}**\n"
666
+ f" - Active: {'✅' if session.active else '❌'}\n"
667
+ f" - History File: {session.history_file}"
668
+ )
669
+
670
+ return "\n".join(status_lines)
671
+
672
+
673
+ def _list_conversation_histories() -> str:
674
+ """List all saved conversation histories."""
675
+ history_files = sorted(
676
+ HISTORY_DIR.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True
677
+ )
678
+
679
+ if not history_files:
680
+ return f"No saved conversation histories found in {HISTORY_DIR}"
681
+
682
+ lines = [f"**Saved Conversation Histories** ({len(history_files)} total):\n"]
683
+ lines.append(f"Location: {HISTORY_DIR}\n")
684
+
685
+ for history_file in history_files:
686
+ try:
687
+ with open(history_file, "r") as f:
688
+ data = json.load(f)
689
+ session_id = data.get("session_id", history_file.stem)
690
+ timestamp = data.get("timestamp", "unknown")
691
+ message_count = len(data.get("messages", []))
692
+
693
+ lines.append(
694
+ f"- **{session_id}**\n"
695
+ f" - Timestamp: {timestamp}\n"
696
+ f" - Messages: {message_count}\n"
697
+ f" - File: {history_file.name}"
698
+ )
699
+ except Exception as e:
700
+ lines.append(f"- **{history_file.stem}** (error reading: {e})")
701
+
702
+ return "\n".join(lines)
703
+
704
+
705
+ def _read_conversation_history(session_id: Optional[str]) -> str:
706
+ """Read a specific conversation history."""
707
+ if not session_id:
708
+ return "❌ session_id required for read_history action"
709
+
710
+ history_file = HISTORY_DIR / f"{session_id}.json"
711
+
712
+ if not history_file.exists():
713
+ return f"❌ No history found for session: {session_id}\n\nAvailable histories:\n{_list_conversation_histories()}"
714
+
715
+ try:
716
+ with open(history_file, "r") as f:
717
+ data = json.load(f)
718
+
719
+ messages = data.get("messages", [])
720
+ timestamp = data.get("timestamp", "unknown")
721
+
722
+ lines = [
723
+ f"**Conversation History: {session_id}**\n",
724
+ f"Timestamp: {timestamp}",
725
+ f"Messages: {len(messages)}\n",
726
+ "---\n",
727
+ ]
728
+
729
+ # Format messages
730
+ for i, msg in enumerate(messages, 1):
731
+ role = msg.get("role", "unknown")
732
+ content_blocks = msg.get("content", [])
733
+
734
+ lines.append(f"**{i}. {role.upper()}:**")
735
+
736
+ for block in content_blocks:
737
+ if "text" in block:
738
+ lines.append(f" {block['text']}")
739
+ elif "toolUse" in block:
740
+ tool_use = block["toolUse"]
741
+ lines.append(f" [Tool Call: {tool_use['name']}]")
742
+ elif "toolResult" in block:
743
+ lines.append(f" [Tool Result]")
744
+
745
+ lines.append("")
746
+
747
+ return "\n".join(lines)
748
+
749
+ except Exception as e:
750
+ return f"❌ Error reading history: {e}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: devduck
3
- Version: 0.7.2
3
+ Version: 1.1.0
4
4
  Summary: 🦆 Extreme minimalist self-adapting AI agent - one file, self-healing, runtime dependencies
5
5
  Author-email: Cagatay Cali <cagataycali@icloud.com>
6
6
  License: Apache-2.0
@@ -31,6 +31,7 @@ Requires-Dist: prompt_toolkit
31
31
  Requires-Dist: strands-agents[ollama]
32
32
  Requires-Dist: strands-agents[anthropic]
33
33
  Requires-Dist: strands-agents[openai]
34
+ Requires-Dist: strands-agents[bidi-all]; sys_platform == "darwin"
34
35
  Requires-Dist: strands-agents[otel]
35
36
  Requires-Dist: strands-agents-tools
36
37
  Requires-Dist: strands-fun-tools[all]; sys_platform == "darwin"
@@ -239,7 +240,7 @@ install_tools(action="list_loaded")
239
240
 
240
241
  ### Static Tool Configuration
241
242
 
242
- **Format:** `package:tool1,tool2:package2:tool3`
243
+ **Format:** `package1:tool1,tool2;package2:tool3,tool4`
243
244
 
244
245
  ```bash
245
246
  # Minimal (shell + editor only)
@@ -249,7 +250,7 @@ export DEVDUCK_TOOLS="strands_tools:shell,editor"
249
250
  export DEVDUCK_TOOLS="strands_tools:shell,editor,file_read,file_write,calculator"
250
251
 
251
252
  # Full stack + GitHub
252
- export DEVDUCK_TOOLS="devduck.tools:tcp,websocket,mcp_server,use_github:strands_tools:shell,editor,file_read"
253
+ export DEVDUCK_TOOLS="devduck.tools:tcp,websocket,mcp_server,use_github;strands_tools:shell,editor,file_read"
253
254
 
254
255
  devduck
255
256
  ```
@@ -572,7 +573,7 @@ devduck
572
573
  | `LITELLM_API_KEY` | - | LiteLLM API key (auto-detected) |
573
574
  | `LLAMAAPI_API_KEY` | - | LlamaAPI key (auto-detected) |
574
575
  | **Tools** | | |
575
- | `DEVDUCK_TOOLS` | 38 tools | Format: `package:tool1,tool2:package2:tool3` |
576
+ | `DEVDUCK_TOOLS` | 38 tools | Format: `package1:tool1,tool2;package2:tool3` |
576
577
  | `DEVDUCK_LOAD_TOOLS_FROM_DIR` | `false` | Auto-load from `./tools/` directory |
577
578
  | **Memory** | | |
578
579
  | `DEVDUCK_KNOWLEDGE_BASE_ID` | - | Bedrock KB ID for auto-RAG |
@@ -1,6 +1,6 @@
1
- devduck/__init__.py,sha256=jsamv9JKRuP5ludIQm3WAZWNd_MUPcN6Z60nerTJP5E,71612
1
+ devduck/__init__.py,sha256=P7xQxthfPj2sDfcxn-kwA24gtBbtfDYu2Tf_y8KuEic,71545
2
2
  devduck/__main__.py,sha256=aeF2RR4k7lzSR2X1QKV9XQPCKhtsH0JYUv2etBBqmL0,145
3
- devduck/_version.py,sha256=69rtUS5MR_8CGRaNqkaDM6V4ZDI_8FTMw2vDLxrWg0Q,704
3
+ devduck/_version.py,sha256=ePNVzJOkxR8FY5bezqKQ_fgBRbzH1G7QTaRDHvGQRAY,704
4
4
  devduck/agentcore_handler.py,sha256=0DKJTTjoH9P8a70G0f5dOIIwy6bjqaN46voAWaSOpDY,2221
5
5
  devduck/test_redduck.py,sha256=ILtKKMuoyVfmhnibmbojpbOsqbcKooZv4j9qtE2LWdw,1750
6
6
  devduck/tools/__init__.py,sha256=AmIy8MInaClaZ71fqzy4EQJnBWsLkrv4QW9IIN7UQyw,1367
@@ -15,6 +15,7 @@ devduck/tools/create_subagent.py,sha256=UzRz9BmU4PbTveZROEpZ311aH-u-i6x89gttu-Cn
15
15
  devduck/tools/install_tools.py,sha256=3uzRg5lEHX-L6gxnFn3mIKjGYDJ3h_AdwGnEwKA9qR0,14284
16
16
  devduck/tools/ipc.py,sha256=e3KJeR2HmCKEtVLGNOtf6CeFi3pTDehwd7Fu4JJ19Ms,18607
17
17
  devduck/tools/mcp_server.py,sha256=Ybp0PcJKW2TOvghsRL-i8Guqc9WokPwOD2bhVgzoj6Q,21490
18
+ devduck/tools/speech_to_speech.py,sha256=rw9Olrdd_JvfeInk1ZeztVzSSuw5QQ_2Vnt-WbtLu50,28627
18
19
  devduck/tools/state_manager.py,sha256=hrleqdVoCboNd8R3wDRUXVKYCZdGoe1j925i948LTHc,10563
19
20
  devduck/tools/store_in_kb.py,sha256=-JM-oRQKR3FBubKHFHmXRnZSvi9dVgHxG0lismMgG2k,6861
20
21
  devduck/tools/system_prompt.py,sha256=waAdmvRhyulorw_tLqpqUJN_AahuaeF2rXqjMqN7IRY,16905
@@ -22,9 +23,9 @@ devduck/tools/tcp.py,sha256=w2m_Jf6vZ4NYu0AwgZd7C7eKs4No2EVHZ2WYIl_Bt0A,22017
22
23
  devduck/tools/tray.py,sha256=FgVhUtLdsdv5_ERK-RyAIpDE8Zb0IfoqhHQdwMxrHUQ,7547
23
24
  devduck/tools/use_github.py,sha256=nr3JSGk48mKUobpgW__2gu6lFyUj93a1XRs3I6vH8W4,13682
24
25
  devduck/tools/websocket.py,sha256=A8bqgdDZs8hcf2HctkJzQOzMvb5mXUC7YZ-xqkOyn94,16959
25
- devduck-0.7.2.dist-info/licenses/LICENSE,sha256=UANcoWwfVeuM9597WUkjEQbzqIUH0bJoE9Tpwgj_LvU,11345
26
- devduck-0.7.2.dist-info/METADATA,sha256=za19Gpk4RiF-EI1LE2bY0wLH055aMBVffE4Z0rzJmZ8,21781
27
- devduck-0.7.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
28
- devduck-0.7.2.dist-info/entry_points.txt,sha256=BAMQaIg_BLZQOTk12bT7hy1dE9oGPLt-_dTbI4cnBnQ,40
29
- devduck-0.7.2.dist-info/top_level.txt,sha256=ySXWlVronp8xHYfQ_Hdfr463e0EnbWuqyuxs94EU7yk,8
30
- devduck-0.7.2.dist-info/RECORD,,
26
+ devduck-1.1.0.dist-info/licenses/LICENSE,sha256=UANcoWwfVeuM9597WUkjEQbzqIUH0bJoE9Tpwgj_LvU,11345
27
+ devduck-1.1.0.dist-info/METADATA,sha256=UM7NICyyRNPlMkvh-qKaGktgJGjxEi6qboUeIzLmwvo,21855
28
+ devduck-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ devduck-1.1.0.dist-info/entry_points.txt,sha256=PJ8gvdi2MnKEK8yCwcmZDeLQ-lx94EV_jp4-v8AKuTA,58
30
+ devduck-1.1.0.dist-info/top_level.txt,sha256=ySXWlVronp8xHYfQ_Hdfr463e0EnbWuqyuxs94EU7yk,8
31
+ devduck-1.1.0.dist-info/RECORD,,
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
+ dev = devduck:cli
2
3
  devduck = devduck:cli