devduck 1.1.0__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of devduck might be problematic. Click here for more details.
- devduck/__init__.py +99 -15
- devduck/_version.py +2 -2
- devduck/tools/__init__.py +41 -49
- devduck/tools/fetch_github_tool.py +201 -0
- devduck/tools/scraper.py +935 -0
- devduck/tools/speech_to_speech.py +109 -9
- devduck/tools/system_prompt.py +276 -153
- {devduck-1.1.0.dist-info → devduck-1.1.4.dist-info}/METADATA +2 -1
- {devduck-1.1.0.dist-info → devduck-1.1.4.dist-info}/RECORD +13 -11
- {devduck-1.1.0.dist-info → devduck-1.1.4.dist-info}/WHEEL +0 -0
- {devduck-1.1.0.dist-info → devduck-1.1.4.dist-info}/entry_points.txt +0 -0
- {devduck-1.1.0.dist-info → devduck-1.1.4.dist-info}/licenses/LICENSE +0 -0
- {devduck-1.1.0.dist-info → devduck-1.1.4.dist-info}/top_level.txt +0 -0
|
@@ -102,15 +102,21 @@ class SpeechSession:
|
|
|
102
102
|
self,
|
|
103
103
|
session_id: str,
|
|
104
104
|
agent: BidiAgent,
|
|
105
|
+
input_device_index: Optional[int] = None,
|
|
106
|
+
output_device_index: Optional[int] = None,
|
|
105
107
|
):
|
|
106
108
|
"""Initialize speech session.
|
|
107
109
|
|
|
108
110
|
Args:
|
|
109
111
|
session_id: Unique session identifier
|
|
110
112
|
agent: BidiAgent instance
|
|
113
|
+
input_device_index: PyAudio input device index
|
|
114
|
+
output_device_index: PyAudio output device index
|
|
111
115
|
"""
|
|
112
116
|
self.session_id = session_id
|
|
113
117
|
self.agent = agent
|
|
118
|
+
self.input_device_index = input_device_index
|
|
119
|
+
self.output_device_index = output_device_index
|
|
114
120
|
self.active = False
|
|
115
121
|
self.thread = None
|
|
116
122
|
self.loop = None
|
|
@@ -187,8 +193,11 @@ class SpeechSession:
|
|
|
187
193
|
async def _async_session(self) -> None:
|
|
188
194
|
"""Async session management using BidiAudioIO."""
|
|
189
195
|
try:
|
|
190
|
-
# Create audio I/O
|
|
191
|
-
audio_io = BidiAudioIO(
|
|
196
|
+
# Create audio I/O with device indices
|
|
197
|
+
audio_io = BidiAudioIO(
|
|
198
|
+
input_device_index=self.input_device_index,
|
|
199
|
+
output_device_index=self.output_device_index,
|
|
200
|
+
)
|
|
192
201
|
|
|
193
202
|
# Run agent with audio I/O
|
|
194
203
|
await self.agent.run(inputs=[audio_io.input()], outputs=[audio_io.output()])
|
|
@@ -207,6 +216,9 @@ def speech_to_speech(
|
|
|
207
216
|
tools: Optional[List[str]] = None,
|
|
208
217
|
agent: Optional[Any] = None,
|
|
209
218
|
load_history_from: Optional[str] = None,
|
|
219
|
+
inherit_system_prompt: bool = False,
|
|
220
|
+
input_device_index: Optional[int] = None,
|
|
221
|
+
output_device_index: Optional[int] = None,
|
|
210
222
|
) -> str:
|
|
211
223
|
"""Start, stop, or manage speech-to-speech conversations.
|
|
212
224
|
|
|
@@ -221,18 +233,22 @@ def speech_to_speech(
|
|
|
221
233
|
- "status": Get session status
|
|
222
234
|
- "list_history": List saved conversation histories
|
|
223
235
|
- "read_history": Read a specific conversation history
|
|
236
|
+
- "list_audio_devices": List all available audio input/output devices
|
|
224
237
|
provider: Model provider to use:
|
|
225
238
|
- "novasonic": AWS Bedrock Nova Sonic
|
|
226
239
|
- "openai": OpenAI Realtime API
|
|
227
240
|
- "gemini_live": Google Gemini Live
|
|
228
241
|
system_prompt: Custom system prompt for the agent. This will be appended
|
|
229
|
-
to the parent agent's system prompt (if
|
|
230
|
-
uses default prompt that encourages tool usage.
|
|
242
|
+
to the parent agent's system prompt (if inherit_system_prompt=True).
|
|
243
|
+
If not provided, uses default prompt that encourages tool usage.
|
|
231
244
|
session_id: Session identifier:
|
|
232
245
|
- For "start": Custom ID (auto-generated if not provided)
|
|
233
246
|
- For "stop": Specific session to stop (stops all if not provided)
|
|
234
247
|
- For "read_history": Session ID to read history from
|
|
235
248
|
- For "status": Not used
|
|
249
|
+
inherit_system_prompt: Whether to inherit parent agent's system prompt.
|
|
250
|
+
Set to False to use only the custom system_prompt (useful for OpenAI
|
|
251
|
+
which has 16K token limit). Default: False
|
|
236
252
|
model_settings: Provider-specific configuration dictionary. Structure:
|
|
237
253
|
{
|
|
238
254
|
"model_id": "model-name",
|
|
@@ -260,6 +276,10 @@ def speech_to_speech(
|
|
|
260
276
|
agent: Parent agent (automatically passed by Strands framework)
|
|
261
277
|
load_history_from: Optional session ID to load conversation history from
|
|
262
278
|
when starting a new session (provides context continuity)
|
|
279
|
+
input_device_index: Optional PyAudio input device index. If not specified,
|
|
280
|
+
uses system default. Use action="list_audio_devices" to see available devices.
|
|
281
|
+
output_device_index: Optional PyAudio output device index. If not specified,
|
|
282
|
+
uses system default. Use action="list_audio_devices" to see available devices.
|
|
263
283
|
|
|
264
284
|
Returns:
|
|
265
285
|
str: Status message with session details or error information
|
|
@@ -287,6 +307,9 @@ def speech_to_speech(
|
|
|
287
307
|
tools,
|
|
288
308
|
agent,
|
|
289
309
|
load_history_from,
|
|
310
|
+
inherit_system_prompt,
|
|
311
|
+
input_device_index,
|
|
312
|
+
output_device_index,
|
|
290
313
|
)
|
|
291
314
|
elif action == "stop":
|
|
292
315
|
return _stop_speech_session(session_id)
|
|
@@ -296,6 +319,8 @@ def speech_to_speech(
|
|
|
296
319
|
return _list_conversation_histories()
|
|
297
320
|
elif action == "read_history":
|
|
298
321
|
return _read_conversation_history(session_id)
|
|
322
|
+
elif action == "list_audio_devices":
|
|
323
|
+
return _list_audio_devices()
|
|
299
324
|
else:
|
|
300
325
|
return f"Unknown action: {action}"
|
|
301
326
|
|
|
@@ -364,6 +389,9 @@ def _start_speech_session(
|
|
|
364
389
|
tool_names: Optional[List[str]],
|
|
365
390
|
parent_agent: Optional[Any],
|
|
366
391
|
load_history_from: Optional[str],
|
|
392
|
+
inherit_system_prompt: bool,
|
|
393
|
+
input_device_index: Optional[int],
|
|
394
|
+
output_device_index: Optional[int],
|
|
367
395
|
) -> str:
|
|
368
396
|
"""Start a speech-to-speech session with full configuration support."""
|
|
369
397
|
try:
|
|
@@ -384,7 +412,7 @@ def _start_speech_session(
|
|
|
384
412
|
if provider == "novasonic":
|
|
385
413
|
# Nova Sonic only available in: us-east-1, eu-north-1, ap-northeast-1
|
|
386
414
|
default_settings = {
|
|
387
|
-
"model_id": "amazon.nova-2-sonic-v1:0",
|
|
415
|
+
"model_id": os.getenv("BIDI_MODEL_ID", "amazon.nova-2-sonic-v1:0"),
|
|
388
416
|
"provider_config": {
|
|
389
417
|
"audio": {
|
|
390
418
|
"voice": "tiffany",
|
|
@@ -421,7 +449,7 @@ def _start_speech_session(
|
|
|
421
449
|
elif provider == "openai":
|
|
422
450
|
# Read API key from environment if not provided in model_settings
|
|
423
451
|
default_settings = {
|
|
424
|
-
"model_id": "gpt-realtime",
|
|
452
|
+
"model_id": os.getenv("BIDI_MODEL_ID", "gpt-realtime"),
|
|
425
453
|
"client_config": {
|
|
426
454
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
427
455
|
},
|
|
@@ -457,7 +485,9 @@ def _start_speech_session(
|
|
|
457
485
|
api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
|
|
458
486
|
|
|
459
487
|
default_settings = {
|
|
460
|
-
"model_id":
|
|
488
|
+
"model_id": os.getenv(
|
|
489
|
+
"BIDI_MODEL_ID", "gemini-2.5-flash-native-audio-preview-09-2025"
|
|
490
|
+
),
|
|
461
491
|
"client_config": {
|
|
462
492
|
"api_key": api_key,
|
|
463
493
|
},
|
|
@@ -545,8 +575,12 @@ def _start_speech_session(
|
|
|
545
575
|
# Build system prompt: parent prompt + custom prompt
|
|
546
576
|
final_system_prompt = ""
|
|
547
577
|
|
|
548
|
-
# Get parent agent's system prompt if available
|
|
549
|
-
if
|
|
578
|
+
# Get parent agent's system prompt if available and inheritance enabled
|
|
579
|
+
if (
|
|
580
|
+
inherit_system_prompt
|
|
581
|
+
and parent_agent
|
|
582
|
+
and hasattr(parent_agent, "system_prompt")
|
|
583
|
+
):
|
|
550
584
|
parent_prompt = parent_agent.system_prompt or ""
|
|
551
585
|
if parent_prompt:
|
|
552
586
|
final_system_prompt = parent_prompt
|
|
@@ -581,6 +615,8 @@ Keep your voice responses brief and natural."""
|
|
|
581
615
|
session = SpeechSession(
|
|
582
616
|
session_id=session_id,
|
|
583
617
|
agent=bidi_agent,
|
|
618
|
+
input_device_index=input_device_index,
|
|
619
|
+
output_device_index=output_device_index,
|
|
584
620
|
)
|
|
585
621
|
|
|
586
622
|
session.start()
|
|
@@ -748,3 +784,67 @@ def _read_conversation_history(session_id: Optional[str]) -> str:
|
|
|
748
784
|
|
|
749
785
|
except Exception as e:
|
|
750
786
|
return f"❌ Error reading history: {e}"
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def _list_audio_devices() -> str:
|
|
790
|
+
"""List all available audio input and output devices."""
|
|
791
|
+
try:
|
|
792
|
+
import pyaudio
|
|
793
|
+
|
|
794
|
+
p = pyaudio.PyAudio()
|
|
795
|
+
|
|
796
|
+
lines = ["**Available Audio Devices:**\n"]
|
|
797
|
+
|
|
798
|
+
# List all devices
|
|
799
|
+
device_count = p.get_device_count()
|
|
800
|
+
default_input = p.get_default_input_device_info()["index"]
|
|
801
|
+
default_output = p.get_default_output_device_info()["index"]
|
|
802
|
+
|
|
803
|
+
lines.append(f"Total devices: {device_count}\n")
|
|
804
|
+
|
|
805
|
+
for i in range(device_count):
|
|
806
|
+
try:
|
|
807
|
+
info = p.get_device_info_by_index(i)
|
|
808
|
+
name = info["name"]
|
|
809
|
+
max_input_channels = info["maxInputChannels"]
|
|
810
|
+
max_output_channels = info["maxOutputChannels"]
|
|
811
|
+
|
|
812
|
+
device_type = []
|
|
813
|
+
is_default = []
|
|
814
|
+
|
|
815
|
+
if max_input_channels > 0:
|
|
816
|
+
device_type.append("INPUT")
|
|
817
|
+
if i == default_input:
|
|
818
|
+
is_default.append("default input")
|
|
819
|
+
|
|
820
|
+
if max_output_channels > 0:
|
|
821
|
+
device_type.append("OUTPUT")
|
|
822
|
+
if i == default_output:
|
|
823
|
+
is_default.append("default output")
|
|
824
|
+
|
|
825
|
+
type_str = "/".join(device_type) if device_type else "NONE"
|
|
826
|
+
default_str = f" [{', '.join(is_default)}]" if is_default else ""
|
|
827
|
+
|
|
828
|
+
lines.append(
|
|
829
|
+
f"- **Index {i}:** {name}\n"
|
|
830
|
+
f" Type: {type_str}{default_str}\n"
|
|
831
|
+
f" Input Channels: {max_input_channels}, Output Channels: {max_output_channels}"
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
except Exception as e:
|
|
835
|
+
lines.append(f"- **Index {i}:** Error reading device info - {e}")
|
|
836
|
+
|
|
837
|
+
p.terminate()
|
|
838
|
+
|
|
839
|
+
lines.append(
|
|
840
|
+
"\n**Usage:**\n"
|
|
841
|
+
"To use a specific device, pass the index:\n"
|
|
842
|
+
' speech_to_speech(action="start", input_device_index=2, output_device_index=5)'
|
|
843
|
+
)
|
|
844
|
+
|
|
845
|
+
return "\n".join(lines)
|
|
846
|
+
|
|
847
|
+
except ImportError:
|
|
848
|
+
return "❌ PyAudio not installed. Install with: pip install pyaudio"
|
|
849
|
+
except Exception as e:
|
|
850
|
+
return f"❌ Error listing audio devices: {e}"
|