massgen 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +4 -0
- massgen/backend/claude_code.py +9 -1
- massgen/backend/gemini.py +35 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/chat_agent.py +9 -3
- massgen/cli.py +291 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +52 -6
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
- massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/configs/tools/memory/README.md +199 -0
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
- massgen/configs/tools/memory/test_context_window_management.py +286 -0
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +8 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +123 -3
- massgen/orchestrator.py +319 -38
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/tool/__init__.py +4 -0
- massgen/tool/_multimodal_tools/understand_audio.py +193 -0
- massgen/tool/_multimodal_tools/understand_file.py +550 -0
- massgen/tool/_multimodal_tools/understand_image.py +212 -0
- massgen/tool/_multimodal_tools/understand_video.py +313 -0
- massgen/tool/docs/multimodal_tools.md +779 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/METADATA +8 -3
- {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/RECORD +63 -36
- {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Unit tests for orchestration restart feature.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
# Ensure project root is on sys.path
|
|
13
|
+
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
14
|
+
if PROJECT_ROOT not in sys.path:
|
|
15
|
+
sys.path.insert(0, PROJECT_ROOT)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_coordination_config_restart_params():
|
|
19
|
+
"""Test that CoordinationConfig has restart parameters."""
|
|
20
|
+
from massgen.agent_config import CoordinationConfig
|
|
21
|
+
|
|
22
|
+
config = CoordinationConfig()
|
|
23
|
+
assert hasattr(config, "max_orchestration_restarts")
|
|
24
|
+
assert config.max_orchestration_restarts == 0 # Default
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_agent_config_debug_final_answer():
|
|
28
|
+
"""Test that AgentConfig has debug_final_answer parameter."""
|
|
29
|
+
from massgen.agent_config import AgentConfig
|
|
30
|
+
|
|
31
|
+
config = AgentConfig()
|
|
32
|
+
assert hasattr(config, "debug_final_answer")
|
|
33
|
+
assert config.debug_final_answer is None # Default
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_post_evaluation_toolkit_import():
|
|
37
|
+
"""Test that PostEvaluationToolkit can be imported."""
|
|
38
|
+
from massgen.tool.workflow_toolkits import PostEvaluationToolkit
|
|
39
|
+
|
|
40
|
+
assert PostEvaluationToolkit is not None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_post_evaluation_tools_function():
|
|
44
|
+
"""Test that get_post_evaluation_tools function exists."""
|
|
45
|
+
from massgen.tool import get_post_evaluation_tools
|
|
46
|
+
|
|
47
|
+
tools = get_post_evaluation_tools()
|
|
48
|
+
assert len(tools) == 2
|
|
49
|
+
assert tools[0]["function"]["name"] == "submit"
|
|
50
|
+
assert tools[1]["function"]["name"] == "restart_orchestration"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_submit_tool_schema():
|
|
54
|
+
"""Test submit tool has correct schema."""
|
|
55
|
+
from massgen.tool import get_post_evaluation_tools
|
|
56
|
+
|
|
57
|
+
tools = get_post_evaluation_tools()
|
|
58
|
+
submit_tool = tools[0]
|
|
59
|
+
|
|
60
|
+
assert submit_tool["function"]["name"] == "submit"
|
|
61
|
+
assert "confirmed" in submit_tool["function"]["parameters"]["properties"]
|
|
62
|
+
assert submit_tool["function"]["parameters"]["properties"]["confirmed"]["enum"] == [True]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_restart_orchestration_tool_schema():
|
|
66
|
+
"""Test restart_orchestration tool has correct schema."""
|
|
67
|
+
from massgen.tool import get_post_evaluation_tools
|
|
68
|
+
|
|
69
|
+
tools = get_post_evaluation_tools()
|
|
70
|
+
restart_tool = tools[1]
|
|
71
|
+
|
|
72
|
+
assert restart_tool["function"]["name"] == "restart_orchestration"
|
|
73
|
+
params = restart_tool["function"]["parameters"]["properties"]
|
|
74
|
+
assert "reason" in params
|
|
75
|
+
assert "instructions" in params
|
|
76
|
+
assert set(restart_tool["function"]["parameters"]["required"]) == {"reason", "instructions"}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_message_templates_post_evaluation():
|
|
80
|
+
"""Test that MessageTemplates has post-evaluation methods."""
|
|
81
|
+
from massgen.message_templates import MessageTemplates
|
|
82
|
+
|
|
83
|
+
templates = MessageTemplates()
|
|
84
|
+
assert hasattr(templates, "post_evaluation_system_message")
|
|
85
|
+
assert hasattr(templates, "format_restart_context")
|
|
86
|
+
|
|
87
|
+
# Test method returns strings
|
|
88
|
+
post_eval_msg = templates.post_evaluation_system_message()
|
|
89
|
+
assert isinstance(post_eval_msg, str)
|
|
90
|
+
assert "Post-Presentation Evaluation" in post_eval_msg
|
|
91
|
+
|
|
92
|
+
restart_context = templates.format_restart_context("test reason", "test instructions")
|
|
93
|
+
assert isinstance(restart_context, str)
|
|
94
|
+
assert "PREVIOUS ATTEMPT FEEDBACK" in restart_context
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_orchestrator_restart_state():
|
|
98
|
+
"""Test that Orchestrator has restart state tracking."""
|
|
99
|
+
from massgen.agent_config import AgentConfig, CoordinationConfig
|
|
100
|
+
from massgen.orchestrator import Orchestrator
|
|
101
|
+
|
|
102
|
+
config = AgentConfig()
|
|
103
|
+
config.coordination_config = CoordinationConfig(max_orchestration_restarts=2)
|
|
104
|
+
|
|
105
|
+
orchestrator = Orchestrator(agents={}, config=config)
|
|
106
|
+
|
|
107
|
+
assert hasattr(orchestrator, "current_attempt")
|
|
108
|
+
assert hasattr(orchestrator, "max_attempts")
|
|
109
|
+
assert hasattr(orchestrator, "restart_pending")
|
|
110
|
+
assert hasattr(orchestrator, "restart_reason")
|
|
111
|
+
assert hasattr(orchestrator, "restart_instructions")
|
|
112
|
+
|
|
113
|
+
assert orchestrator.current_attempt == 0
|
|
114
|
+
assert orchestrator.max_attempts == 3 # 1 + 2 restarts
|
|
115
|
+
assert orchestrator.restart_pending is False
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_orchestrator_post_evaluate_method():
|
|
119
|
+
"""Test that Orchestrator has post_evaluate_answer method."""
|
|
120
|
+
import inspect
|
|
121
|
+
|
|
122
|
+
from massgen.orchestrator import Orchestrator
|
|
123
|
+
|
|
124
|
+
assert hasattr(Orchestrator, "post_evaluate_answer")
|
|
125
|
+
sig = inspect.signature(Orchestrator.post_evaluate_answer)
|
|
126
|
+
assert "selected_agent_id" in sig.parameters
|
|
127
|
+
assert "final_answer" in sig.parameters
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def test_orchestrator_handle_restart_method():
|
|
131
|
+
"""Test that Orchestrator has handle_restart method."""
|
|
132
|
+
from massgen.orchestrator import Orchestrator
|
|
133
|
+
|
|
134
|
+
assert hasattr(Orchestrator, "handle_restart")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_handle_restart_resets_state():
|
|
138
|
+
"""Test that handle_restart resets orchestrator state."""
|
|
139
|
+
from massgen.agent_config import AgentConfig, CoordinationConfig
|
|
140
|
+
from massgen.orchestrator import Orchestrator
|
|
141
|
+
|
|
142
|
+
config = AgentConfig()
|
|
143
|
+
config.coordination_config = CoordinationConfig(max_orchestration_restarts=2)
|
|
144
|
+
|
|
145
|
+
# Initialize with empty agents dict (simpler for testing)
|
|
146
|
+
orchestrator = Orchestrator(agents={}, config=config)
|
|
147
|
+
|
|
148
|
+
# Simulate state after first attempt
|
|
149
|
+
orchestrator.current_attempt = 0
|
|
150
|
+
orchestrator.restart_reason = "test reason"
|
|
151
|
+
orchestrator.restart_instructions = "test instructions"
|
|
152
|
+
orchestrator.workflow_phase = "presenting"
|
|
153
|
+
orchestrator._selected_agent = "agent1"
|
|
154
|
+
orchestrator._final_presentation_content = "some content"
|
|
155
|
+
|
|
156
|
+
# Call handle_restart
|
|
157
|
+
orchestrator.handle_restart()
|
|
158
|
+
|
|
159
|
+
# Verify state reset
|
|
160
|
+
assert orchestrator.current_attempt == 1
|
|
161
|
+
assert orchestrator.workflow_phase == "idle"
|
|
162
|
+
assert orchestrator._selected_agent is None
|
|
163
|
+
assert orchestrator._final_presentation_content is None
|
|
164
|
+
# Restart reason/instructions should be preserved for next attempt
|
|
165
|
+
assert orchestrator.restart_reason == "test reason"
|
|
166
|
+
assert orchestrator.restart_instructions == "test instructions"
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def test_base_display_restart_methods():
|
|
170
|
+
"""Test that BaseDisplay has restart abstract methods."""
|
|
171
|
+
import inspect
|
|
172
|
+
|
|
173
|
+
from massgen.frontend.displays.base_display import BaseDisplay
|
|
174
|
+
|
|
175
|
+
abstract_methods = {name for name, method in inspect.getmembers(BaseDisplay, predicate=inspect.isfunction) if getattr(method, "__isabstractmethod__", False)}
|
|
176
|
+
|
|
177
|
+
assert "show_post_evaluation_content" in abstract_methods
|
|
178
|
+
assert "show_restart_banner" in abstract_methods
|
|
179
|
+
assert "show_restart_context_panel" in abstract_methods
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@pytest.mark.asyncio
|
|
183
|
+
async def test_post_evaluation_tools_api_formats():
|
|
184
|
+
"""Test post-evaluation tools work with different API formats."""
|
|
185
|
+
from massgen.tool import get_post_evaluation_tools
|
|
186
|
+
|
|
187
|
+
# Test chat_completions format (default)
|
|
188
|
+
tools_chat = get_post_evaluation_tools(api_format="chat_completions")
|
|
189
|
+
assert len(tools_chat) == 2
|
|
190
|
+
assert tools_chat[0]["type"] == "function"
|
|
191
|
+
|
|
192
|
+
# Test claude format
|
|
193
|
+
tools_claude = get_post_evaluation_tools(api_format="claude")
|
|
194
|
+
assert len(tools_claude) == 2
|
|
195
|
+
assert "input_schema" in tools_claude[0]
|
|
196
|
+
|
|
197
|
+
# Test response format
|
|
198
|
+
tools_response = get_post_evaluation_tools(api_format="response")
|
|
199
|
+
assert len(tools_response) == 2
|
|
200
|
+
assert tools_response[0]["type"] == "function"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
if __name__ == "__main__":
|
|
204
|
+
pytest.main([__file__, "-v"])
|
massgen/tool/__init__.py
CHANGED
|
@@ -8,8 +8,10 @@ from ._result import ExecutionResult
|
|
|
8
8
|
from .workflow_toolkits import (
|
|
9
9
|
BaseToolkit,
|
|
10
10
|
NewAnswerToolkit,
|
|
11
|
+
PostEvaluationToolkit,
|
|
11
12
|
ToolType,
|
|
12
13
|
VoteToolkit,
|
|
14
|
+
get_post_evaluation_tools,
|
|
13
15
|
get_workflow_tools,
|
|
14
16
|
)
|
|
15
17
|
|
|
@@ -35,5 +37,7 @@ __all__ = [
|
|
|
35
37
|
"ToolType",
|
|
36
38
|
"NewAnswerToolkit",
|
|
37
39
|
"VoteToolkit",
|
|
40
|
+
"PostEvaluationToolkit",
|
|
38
41
|
"get_workflow_tools",
|
|
42
|
+
"get_post_evaluation_tools",
|
|
39
43
|
]
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Transcribe audio file(s) to text using OpenAI's Transcription API.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
from openai import OpenAI
|
|
13
|
+
|
|
14
|
+
from massgen.tool._result import ExecutionResult, TextContent
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Validate that a path is within allowed directories.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
path: Path to validate
|
|
23
|
+
allowed_paths: List of allowed base paths (optional)
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If path is not within allowed directories
|
|
27
|
+
"""
|
|
28
|
+
if not allowed_paths:
|
|
29
|
+
return # No restrictions
|
|
30
|
+
|
|
31
|
+
for allowed_path in allowed_paths:
|
|
32
|
+
try:
|
|
33
|
+
path.relative_to(allowed_path)
|
|
34
|
+
return # Path is within this allowed directory
|
|
35
|
+
except ValueError:
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
raise ValueError(f"Path not in allowed directories: {path}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
async def understand_audio(
|
|
42
|
+
audio_paths: List[str],
|
|
43
|
+
model: str = "gpt-4o-transcribe",
|
|
44
|
+
allowed_paths: Optional[List[str]] = None,
|
|
45
|
+
) -> ExecutionResult:
|
|
46
|
+
"""
|
|
47
|
+
Transcribe audio file(s) to text using OpenAI's Transcription API.
|
|
48
|
+
|
|
49
|
+
This tool processes one or more audio files through OpenAI's Transcription API
|
|
50
|
+
to extract the text content from the audio. Each file is processed separately.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
audio_paths: List of paths to input audio files (WAV, MP3, M4A, etc.)
|
|
54
|
+
- Relative path: Resolved relative to workspace
|
|
55
|
+
- Absolute path: Must be within allowed directories
|
|
56
|
+
model: Model to use (default: "gpt-4o-transcribe")
|
|
57
|
+
allowed_paths: List of allowed base paths for validation (optional)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
ExecutionResult containing:
|
|
61
|
+
- success: Whether operation succeeded
|
|
62
|
+
- operation: "generate_text_with_input_audio"
|
|
63
|
+
- transcriptions: List of transcription results for each file
|
|
64
|
+
- audio_files: List of paths to the input audio files
|
|
65
|
+
- model: Model used
|
|
66
|
+
|
|
67
|
+
Examples:
|
|
68
|
+
generate_text_with_input_audio(["recording.wav"])
|
|
69
|
+
→ Returns transcription for recording.wav
|
|
70
|
+
|
|
71
|
+
generate_text_with_input_audio(["interview1.mp3", "interview2.mp3"])
|
|
72
|
+
→ Returns separate transcriptions for each file
|
|
73
|
+
|
|
74
|
+
Security:
|
|
75
|
+
- Requires valid OpenAI API key
|
|
76
|
+
- All input audio files must exist and be readable
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
# Convert allowed_paths from strings to Path objects
|
|
80
|
+
allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
|
|
81
|
+
|
|
82
|
+
# Load environment variables
|
|
83
|
+
script_dir = Path(__file__).parent.parent.parent.parent
|
|
84
|
+
env_path = script_dir / ".env"
|
|
85
|
+
if env_path.exists():
|
|
86
|
+
load_dotenv(env_path)
|
|
87
|
+
else:
|
|
88
|
+
load_dotenv()
|
|
89
|
+
|
|
90
|
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
91
|
+
|
|
92
|
+
if not openai_api_key:
|
|
93
|
+
result = {
|
|
94
|
+
"success": False,
|
|
95
|
+
"operation": "generate_text_with_input_audio",
|
|
96
|
+
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
97
|
+
}
|
|
98
|
+
return ExecutionResult(
|
|
99
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Initialize OpenAI client
|
|
103
|
+
client = OpenAI(api_key=openai_api_key)
|
|
104
|
+
|
|
105
|
+
# Validate and process input audio files
|
|
106
|
+
validated_audio_paths = []
|
|
107
|
+
audio_extensions = [".wav", ".mp3", ".m4a", ".mp4", ".ogg", ".flac", ".aac", ".wma", ".opus"]
|
|
108
|
+
|
|
109
|
+
for audio_path_str in audio_paths:
|
|
110
|
+
# Resolve audio path
|
|
111
|
+
if Path(audio_path_str).is_absolute():
|
|
112
|
+
audio_path = Path(audio_path_str).resolve()
|
|
113
|
+
else:
|
|
114
|
+
audio_path = (Path.cwd() / audio_path_str).resolve()
|
|
115
|
+
|
|
116
|
+
# Validate audio path
|
|
117
|
+
_validate_path_access(audio_path, allowed_paths_list)
|
|
118
|
+
|
|
119
|
+
if not audio_path.exists():
|
|
120
|
+
result = {
|
|
121
|
+
"success": False,
|
|
122
|
+
"operation": "generate_text_with_input_audio",
|
|
123
|
+
"error": f"Audio file does not exist: {audio_path}",
|
|
124
|
+
}
|
|
125
|
+
return ExecutionResult(
|
|
126
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Check if file is an audio file
|
|
130
|
+
if audio_path.suffix.lower() not in audio_extensions:
|
|
131
|
+
result = {
|
|
132
|
+
"success": False,
|
|
133
|
+
"operation": "generate_text_with_input_audio",
|
|
134
|
+
"error": f"File does not appear to be an audio file: {audio_path}",
|
|
135
|
+
}
|
|
136
|
+
return ExecutionResult(
|
|
137
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
validated_audio_paths.append(audio_path)
|
|
141
|
+
|
|
142
|
+
# Process each audio file separately using OpenAI Transcription API
|
|
143
|
+
transcriptions = []
|
|
144
|
+
|
|
145
|
+
for audio_path in validated_audio_paths:
|
|
146
|
+
try:
|
|
147
|
+
# Open audio file
|
|
148
|
+
with open(audio_path, "rb") as audio_file:
|
|
149
|
+
# Basic transcription without prompt
|
|
150
|
+
transcription = client.audio.transcriptions.create(
|
|
151
|
+
model=model,
|
|
152
|
+
file=audio_file,
|
|
153
|
+
response_format="text",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Add transcription to list
|
|
157
|
+
transcriptions.append(
|
|
158
|
+
{
|
|
159
|
+
"file": str(audio_path),
|
|
160
|
+
"transcription": transcription,
|
|
161
|
+
},
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
except Exception as api_error:
|
|
165
|
+
result = {
|
|
166
|
+
"success": False,
|
|
167
|
+
"operation": "generate_text_with_input_audio",
|
|
168
|
+
"error": f"Transcription API error for file {audio_path}: {str(api_error)}",
|
|
169
|
+
}
|
|
170
|
+
return ExecutionResult(
|
|
171
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
result = {
|
|
175
|
+
"success": True,
|
|
176
|
+
"operation": "generate_text_with_input_audio",
|
|
177
|
+
"transcriptions": transcriptions,
|
|
178
|
+
"audio_files": [str(p) for p in validated_audio_paths],
|
|
179
|
+
"model": model,
|
|
180
|
+
}
|
|
181
|
+
return ExecutionResult(
|
|
182
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
result = {
|
|
187
|
+
"success": False,
|
|
188
|
+
"operation": "generate_text_with_input_audio",
|
|
189
|
+
"error": f"Failed to transcribe audio: {str(e)}",
|
|
190
|
+
}
|
|
191
|
+
return ExecutionResult(
|
|
192
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
193
|
+
)
|