devduck 1.1.0__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of devduck might be problematic. Click here for more details.

@@ -102,15 +102,21 @@ class SpeechSession:
102
102
  self,
103
103
  session_id: str,
104
104
  agent: BidiAgent,
105
+ input_device_index: Optional[int] = None,
106
+ output_device_index: Optional[int] = None,
105
107
  ):
106
108
  """Initialize speech session.
107
109
 
108
110
  Args:
109
111
  session_id: Unique session identifier
110
112
  agent: BidiAgent instance
113
+ input_device_index: PyAudio input device index
114
+ output_device_index: PyAudio output device index
111
115
  """
112
116
  self.session_id = session_id
113
117
  self.agent = agent
118
+ self.input_device_index = input_device_index
119
+ self.output_device_index = output_device_index
114
120
  self.active = False
115
121
  self.thread = None
116
122
  self.loop = None
@@ -187,8 +193,11 @@ class SpeechSession:
187
193
  async def _async_session(self) -> None:
188
194
  """Async session management using BidiAudioIO."""
189
195
  try:
190
- # Create audio I/O
191
- audio_io = BidiAudioIO()
196
+ # Create audio I/O with device indices
197
+ audio_io = BidiAudioIO(
198
+ input_device_index=self.input_device_index,
199
+ output_device_index=self.output_device_index,
200
+ )
192
201
 
193
202
  # Run agent with audio I/O
194
203
  await self.agent.run(inputs=[audio_io.input()], outputs=[audio_io.output()])
@@ -207,6 +216,9 @@ def speech_to_speech(
207
216
  tools: Optional[List[str]] = None,
208
217
  agent: Optional[Any] = None,
209
218
  load_history_from: Optional[str] = None,
219
+ inherit_system_prompt: bool = False,
220
+ input_device_index: Optional[int] = None,
221
+ output_device_index: Optional[int] = None,
210
222
  ) -> str:
211
223
  """Start, stop, or manage speech-to-speech conversations.
212
224
 
@@ -221,18 +233,22 @@ def speech_to_speech(
221
233
  - "status": Get session status
222
234
  - "list_history": List saved conversation histories
223
235
  - "read_history": Read a specific conversation history
236
+ - "list_audio_devices": List all available audio input/output devices
224
237
  provider: Model provider to use:
225
238
  - "novasonic": AWS Bedrock Nova Sonic
226
239
  - "openai": OpenAI Realtime API
227
240
  - "gemini_live": Google Gemini Live
228
241
  system_prompt: Custom system prompt for the agent. This will be appended
229
- to the parent agent's system prompt (if available). If not provided,
230
- uses default prompt that encourages tool usage.
242
+ to the parent agent's system prompt (if inherit_system_prompt=True).
243
+ If not provided, uses default prompt that encourages tool usage.
231
244
  session_id: Session identifier:
232
245
  - For "start": Custom ID (auto-generated if not provided)
233
246
  - For "stop": Specific session to stop (stops all if not provided)
234
247
  - For "read_history": Session ID to read history from
235
248
  - For "status": Not used
249
+ inherit_system_prompt: Whether to inherit parent agent's system prompt.
250
+ Set to False to use only the custom system_prompt (useful for OpenAI
251
+ which has 16K token limit). Default: False
236
252
  model_settings: Provider-specific configuration dictionary. Structure:
237
253
  {
238
254
  "model_id": "model-name",
@@ -260,6 +276,10 @@ def speech_to_speech(
260
276
  agent: Parent agent (automatically passed by Strands framework)
261
277
  load_history_from: Optional session ID to load conversation history from
262
278
  when starting a new session (provides context continuity)
279
+ input_device_index: Optional PyAudio input device index. If not specified,
280
+ uses system default. Use action="list_audio_devices" to see available devices.
281
+ output_device_index: Optional PyAudio output device index. If not specified,
282
+ uses system default. Use action="list_audio_devices" to see available devices.
263
283
 
264
284
  Returns:
265
285
  str: Status message with session details or error information
@@ -287,6 +307,9 @@ def speech_to_speech(
287
307
  tools,
288
308
  agent,
289
309
  load_history_from,
310
+ inherit_system_prompt,
311
+ input_device_index,
312
+ output_device_index,
290
313
  )
291
314
  elif action == "stop":
292
315
  return _stop_speech_session(session_id)
@@ -296,6 +319,8 @@ def speech_to_speech(
296
319
  return _list_conversation_histories()
297
320
  elif action == "read_history":
298
321
  return _read_conversation_history(session_id)
322
+ elif action == "list_audio_devices":
323
+ return _list_audio_devices()
299
324
  else:
300
325
  return f"Unknown action: {action}"
301
326
 
@@ -364,6 +389,9 @@ def _start_speech_session(
364
389
  tool_names: Optional[List[str]],
365
390
  parent_agent: Optional[Any],
366
391
  load_history_from: Optional[str],
392
+ inherit_system_prompt: bool,
393
+ input_device_index: Optional[int],
394
+ output_device_index: Optional[int],
367
395
  ) -> str:
368
396
  """Start a speech-to-speech session with full configuration support."""
369
397
  try:
@@ -384,7 +412,7 @@ def _start_speech_session(
384
412
  if provider == "novasonic":
385
413
  # Nova Sonic only available in: us-east-1, eu-north-1, ap-northeast-1
386
414
  default_settings = {
387
- "model_id": "amazon.nova-2-sonic-v1:0",
415
+ "model_id": os.getenv("BIDI_MODEL_ID", "amazon.nova-2-sonic-v1:0"),
388
416
  "provider_config": {
389
417
  "audio": {
390
418
  "voice": "tiffany",
@@ -421,7 +449,7 @@ def _start_speech_session(
421
449
  elif provider == "openai":
422
450
  # Read API key from environment if not provided in model_settings
423
451
  default_settings = {
424
- "model_id": "gpt-realtime",
452
+ "model_id": os.getenv("BIDI_MODEL_ID", "gpt-realtime"),
425
453
  "client_config": {
426
454
  "api_key": os.getenv("OPENAI_API_KEY"),
427
455
  },
@@ -457,7 +485,9 @@ def _start_speech_session(
457
485
  api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
458
486
 
459
487
  default_settings = {
460
- "model_id": "gemini-2.5-flash-native-audio-preview-09-2025",
488
+ "model_id": os.getenv(
489
+ "BIDI_MODEL_ID", "gemini-2.5-flash-native-audio-preview-09-2025"
490
+ ),
461
491
  "client_config": {
462
492
  "api_key": api_key,
463
493
  },
@@ -545,8 +575,12 @@ def _start_speech_session(
545
575
  # Build system prompt: parent prompt + custom prompt
546
576
  final_system_prompt = ""
547
577
 
548
- # Get parent agent's system prompt if available
549
- if parent_agent and hasattr(parent_agent, "system_prompt"):
578
+ # Get parent agent's system prompt if available and inheritance enabled
579
+ if (
580
+ inherit_system_prompt
581
+ and parent_agent
582
+ and hasattr(parent_agent, "system_prompt")
583
+ ):
550
584
  parent_prompt = parent_agent.system_prompt or ""
551
585
  if parent_prompt:
552
586
  final_system_prompt = parent_prompt
@@ -581,6 +615,8 @@ Keep your voice responses brief and natural."""
581
615
  session = SpeechSession(
582
616
  session_id=session_id,
583
617
  agent=bidi_agent,
618
+ input_device_index=input_device_index,
619
+ output_device_index=output_device_index,
584
620
  )
585
621
 
586
622
  session.start()
@@ -748,3 +784,67 @@ def _read_conversation_history(session_id: Optional[str]) -> str:
748
784
 
749
785
  except Exception as e:
750
786
  return f"❌ Error reading history: {e}"
787
+
788
+
789
+ def _list_audio_devices() -> str:
790
+ """List all available audio input and output devices."""
791
+ try:
792
+ import pyaudio
793
+
794
+ p = pyaudio.PyAudio()
795
+
796
+ lines = ["**Available Audio Devices:**\n"]
797
+
798
+ # List all devices
799
+ device_count = p.get_device_count()
800
+ default_input = p.get_default_input_device_info()["index"]
801
+ default_output = p.get_default_output_device_info()["index"]
802
+
803
+ lines.append(f"Total devices: {device_count}\n")
804
+
805
+ for i in range(device_count):
806
+ try:
807
+ info = p.get_device_info_by_index(i)
808
+ name = info["name"]
809
+ max_input_channels = info["maxInputChannels"]
810
+ max_output_channels = info["maxOutputChannels"]
811
+
812
+ device_type = []
813
+ is_default = []
814
+
815
+ if max_input_channels > 0:
816
+ device_type.append("INPUT")
817
+ if i == default_input:
818
+ is_default.append("default input")
819
+
820
+ if max_output_channels > 0:
821
+ device_type.append("OUTPUT")
822
+ if i == default_output:
823
+ is_default.append("default output")
824
+
825
+ type_str = "/".join(device_type) if device_type else "NONE"
826
+ default_str = f" [{', '.join(is_default)}]" if is_default else ""
827
+
828
+ lines.append(
829
+ f"- **Index {i}:** {name}\n"
830
+ f" Type: {type_str}{default_str}\n"
831
+ f" Input Channels: {max_input_channels}, Output Channels: {max_output_channels}"
832
+ )
833
+
834
+ except Exception as e:
835
+ lines.append(f"- **Index {i}:** Error reading device info - {e}")
836
+
837
+ p.terminate()
838
+
839
+ lines.append(
840
+ "\n**Usage:**\n"
841
+ "To use a specific device, pass the index:\n"
842
+ ' speech_to_speech(action="start", input_device_index=2, output_device_index=5)'
843
+ )
844
+
845
+ return "\n".join(lines)
846
+
847
+ except ImportError:
848
+ return "❌ PyAudio not installed. Install with: pip install pyaudio"
849
+ except Exception as e:
850
+ return f"❌ Error listing audio devices: {e}"