massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +56 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
- massgen/backend/capabilities.py +6 -6
- massgen/backend/chat_completions.py +18 -11
- massgen/backend/claude_code.py +9 -1
- massgen/backend/gemini.py +71 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/backend/grok.py +39 -6
- massgen/backend/response.py +18 -11
- massgen/chat_agent.py +9 -3
- massgen/cli.py +319 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +78 -20
- massgen/configs/basic/multi/three_agents_default.yaml +2 -2
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
- massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/configs/tools/memory/README.md +199 -0
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
- massgen/configs/tools/memory/test_context_window_management.py +286 -0
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +8 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +123 -3
- massgen/orchestrator.py +652 -44
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_intelligent_planning_mode.py +643 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/token_manager/token_manager.py +13 -4
- massgen/tool/__init__.py +4 -0
- massgen/tool/_multimodal_tools/understand_audio.py +193 -0
- massgen/tool/_multimodal_tools/understand_file.py +550 -0
- massgen/tool/_multimodal_tools/understand_image.py +212 -0
- massgen/tool/_multimodal_tools/understand_video.py +313 -0
- massgen/tool/docs/multimodal_tools.md +779 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -21,18 +21,14 @@ Tools provided:
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
import argparse
|
|
24
|
-
import base64
|
|
25
24
|
import difflib
|
|
26
25
|
import filecmp
|
|
27
26
|
import fnmatch
|
|
28
|
-
import os
|
|
29
27
|
import shutil
|
|
30
28
|
from pathlib import Path
|
|
31
29
|
from typing import Any, Dict, List, Optional, Tuple
|
|
32
30
|
|
|
33
31
|
import fastmcp
|
|
34
|
-
from dotenv import load_dotenv
|
|
35
|
-
from openai import OpenAI
|
|
36
32
|
|
|
37
33
|
|
|
38
34
|
def get_copy_file_pairs(
|
|
@@ -819,997 +815,4 @@ async def create_server() -> fastmcp.FastMCP:
|
|
|
819
815
|
except Exception as e:
|
|
820
816
|
return {"success": False, "operation": "compare_files", "error": str(e)}
|
|
821
817
|
|
|
822
|
-
@mcp.tool()
|
|
823
|
-
def generate_and_store_image_with_input_images(
|
|
824
|
-
base_image_paths: List[str],
|
|
825
|
-
prompt: str = "Create a variation of the provided images",
|
|
826
|
-
model: str = "gpt-4.1",
|
|
827
|
-
n: int = 1,
|
|
828
|
-
storage_path: Optional[str] = None,
|
|
829
|
-
) -> Dict[str, Any]:
|
|
830
|
-
"""
|
|
831
|
-
Create variations based on multiple input images using OpenAI's gpt-4.1 API.
|
|
832
|
-
|
|
833
|
-
This tool generates image variations based on multiple base images using OpenAI's gpt-4.1 API
|
|
834
|
-
and saves them to the workspace with automatic organization.
|
|
835
|
-
|
|
836
|
-
Args:
|
|
837
|
-
base_image_paths: List of paths to base images (PNG/JPEG files, less than 4MB)
|
|
838
|
-
- Relative path: Resolved relative to workspace
|
|
839
|
-
- Absolute path: Must be within allowed directories
|
|
840
|
-
prompt: Text description for the variation (default: "Create a variation of the provided images")
|
|
841
|
-
model: Model to use (default: "gpt-4.1")
|
|
842
|
-
n: Number of variations to generate (default: 1)
|
|
843
|
-
storage_path: Directory path where to save variations (optional)
|
|
844
|
-
- Relative path: Resolved relative to workspace
|
|
845
|
-
- Absolute path: Must be within allowed directories
|
|
846
|
-
- None/empty: Saves to workspace root
|
|
847
|
-
|
|
848
|
-
Returns:
|
|
849
|
-
Dictionary containing:
|
|
850
|
-
- success: Whether operation succeeded
|
|
851
|
-
- operation: "generate_and_store_image_with_input_images"
|
|
852
|
-
- note: Note about usage
|
|
853
|
-
- images: List of generated images with file paths and metadata
|
|
854
|
-
- model: Model used for generation
|
|
855
|
-
- prompt: The prompt used
|
|
856
|
-
- total_images: Total number of images generated
|
|
857
|
-
|
|
858
|
-
Examples:
|
|
859
|
-
generate_and_store_image_with_input_images(["cat.png", "dog.png"], "Combine these animals")
|
|
860
|
-
→ Generates a variation combining both images
|
|
861
|
-
|
|
862
|
-
generate_and_store_image_with_input_images(["art/logo.png", "art/icon.png"], "Create a unified design")
|
|
863
|
-
→ Generates variations based on both images
|
|
864
|
-
|
|
865
|
-
Security:
|
|
866
|
-
- Requires valid OpenAI API key
|
|
867
|
-
- Input images must be valid image files less than 4MB
|
|
868
|
-
- Files are saved to specified path within workspace
|
|
869
|
-
"""
|
|
870
|
-
from datetime import datetime
|
|
871
|
-
|
|
872
|
-
try:
|
|
873
|
-
# Load environment variables
|
|
874
|
-
script_dir = Path(__file__).parent.parent.parent
|
|
875
|
-
env_path = script_dir / ".env"
|
|
876
|
-
if env_path.exists():
|
|
877
|
-
load_dotenv(env_path)
|
|
878
|
-
else:
|
|
879
|
-
load_dotenv()
|
|
880
|
-
|
|
881
|
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
882
|
-
|
|
883
|
-
if not openai_api_key:
|
|
884
|
-
return {
|
|
885
|
-
"success": False,
|
|
886
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
887
|
-
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
888
|
-
}
|
|
889
|
-
|
|
890
|
-
# Initialize OpenAI client
|
|
891
|
-
client = OpenAI(api_key=openai_api_key)
|
|
892
|
-
|
|
893
|
-
# Prepare content list with prompt and images
|
|
894
|
-
content = [{"type": "input_text", "text": prompt}]
|
|
895
|
-
|
|
896
|
-
# Process and validate all input images
|
|
897
|
-
validated_paths = []
|
|
898
|
-
for image_path_str in base_image_paths:
|
|
899
|
-
# Resolve image path
|
|
900
|
-
if Path(image_path_str).is_absolute():
|
|
901
|
-
image_path = Path(image_path_str).resolve()
|
|
902
|
-
else:
|
|
903
|
-
image_path = (Path.cwd() / image_path_str).resolve()
|
|
904
|
-
|
|
905
|
-
# Validate image path
|
|
906
|
-
_validate_path_access(image_path, mcp.allowed_paths)
|
|
907
|
-
|
|
908
|
-
if not image_path.exists():
|
|
909
|
-
return {
|
|
910
|
-
"success": False,
|
|
911
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
912
|
-
"error": f"Image file does not exist: {image_path}",
|
|
913
|
-
}
|
|
914
|
-
|
|
915
|
-
# Allow both PNG and JPEG formats
|
|
916
|
-
if image_path.suffix.lower() not in [".png", ".jpg", ".jpeg"]:
|
|
917
|
-
return {
|
|
918
|
-
"success": False,
|
|
919
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
920
|
-
"error": f"Image must be PNG or JPEG format: {image_path}",
|
|
921
|
-
}
|
|
922
|
-
|
|
923
|
-
# Check file size (must be less than 4MB)
|
|
924
|
-
file_size = image_path.stat().st_size
|
|
925
|
-
if file_size > 4 * 1024 * 1024:
|
|
926
|
-
return {
|
|
927
|
-
"success": False,
|
|
928
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
929
|
-
"error": f"Image file too large (must be < 4MB): {image_path} is {file_size / (1024*1024):.2f}MB",
|
|
930
|
-
}
|
|
931
|
-
|
|
932
|
-
validated_paths.append(image_path)
|
|
933
|
-
|
|
934
|
-
# Read and encode image to base64
|
|
935
|
-
with open(image_path, "rb") as f:
|
|
936
|
-
image_data = f.read()
|
|
937
|
-
image_base64 = base64.b64encode(image_data).decode("utf-8")
|
|
938
|
-
|
|
939
|
-
# Determine MIME type
|
|
940
|
-
mime_type = "image/jpeg" if image_path.suffix.lower() in [".jpg", ".jpeg"] else "image/png"
|
|
941
|
-
|
|
942
|
-
# Add image to content
|
|
943
|
-
content.append(
|
|
944
|
-
{
|
|
945
|
-
"type": "input_image",
|
|
946
|
-
"image_url": f"data:{mime_type};base64,{image_base64}",
|
|
947
|
-
},
|
|
948
|
-
)
|
|
949
|
-
|
|
950
|
-
# Determine storage directory
|
|
951
|
-
if storage_path:
|
|
952
|
-
if Path(storage_path).is_absolute():
|
|
953
|
-
storage_dir = Path(storage_path).resolve()
|
|
954
|
-
else:
|
|
955
|
-
storage_dir = (Path.cwd() / storage_path).resolve()
|
|
956
|
-
else:
|
|
957
|
-
storage_dir = Path.cwd()
|
|
958
|
-
|
|
959
|
-
# Validate storage directory
|
|
960
|
-
_validate_path_access(storage_dir, mcp.allowed_paths)
|
|
961
|
-
storage_dir.mkdir(parents=True, exist_ok=True)
|
|
962
|
-
|
|
963
|
-
try:
|
|
964
|
-
# print("Content for OpenAI API:", str(content))
|
|
965
|
-
# Generate variations using gpt-4.1 API with all images at once
|
|
966
|
-
# append content to a file
|
|
967
|
-
response = client.responses.create(
|
|
968
|
-
model=model,
|
|
969
|
-
input=[
|
|
970
|
-
{
|
|
971
|
-
"role": "user",
|
|
972
|
-
"content": content,
|
|
973
|
-
},
|
|
974
|
-
],
|
|
975
|
-
tools=[{"type": "image_generation"}],
|
|
976
|
-
)
|
|
977
|
-
|
|
978
|
-
# Extract image generation calls from response
|
|
979
|
-
image_generation_calls = [output for output in response.output if output.type == "image_generation_call"]
|
|
980
|
-
|
|
981
|
-
all_variations = []
|
|
982
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
983
|
-
|
|
984
|
-
# Process generated images
|
|
985
|
-
for idx, output in enumerate(image_generation_calls):
|
|
986
|
-
if hasattr(output, "result"):
|
|
987
|
-
image_base64 = output.result
|
|
988
|
-
image_bytes = base64.b64decode(image_base64)
|
|
989
|
-
|
|
990
|
-
# Generate filename
|
|
991
|
-
if len(image_generation_calls) > 1:
|
|
992
|
-
filename = f"variation_{idx+1}_{timestamp}.png"
|
|
993
|
-
else:
|
|
994
|
-
filename = f"variation_{timestamp}.png"
|
|
995
|
-
|
|
996
|
-
# Full file path
|
|
997
|
-
file_path = storage_dir / filename
|
|
998
|
-
|
|
999
|
-
# Save image
|
|
1000
|
-
file_path.write_bytes(image_bytes)
|
|
1001
|
-
|
|
1002
|
-
all_variations.append(
|
|
1003
|
-
{
|
|
1004
|
-
"source_images": [str(p) for p in validated_paths],
|
|
1005
|
-
"file_path": str(file_path),
|
|
1006
|
-
"filename": filename,
|
|
1007
|
-
"size": len(image_bytes),
|
|
1008
|
-
"index": idx,
|
|
1009
|
-
},
|
|
1010
|
-
)
|
|
1011
|
-
|
|
1012
|
-
# If no images were generated, check for text response
|
|
1013
|
-
if not all_variations:
|
|
1014
|
-
text_outputs = [output.content for output in response.output if hasattr(output, "content")]
|
|
1015
|
-
if text_outputs:
|
|
1016
|
-
return {
|
|
1017
|
-
"success": False,
|
|
1018
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
1019
|
-
"error": f"No images generated. Response: {' '.join(text_outputs)}",
|
|
1020
|
-
}
|
|
1021
|
-
|
|
1022
|
-
except Exception as api_error:
|
|
1023
|
-
return {
|
|
1024
|
-
"success": False,
|
|
1025
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
1026
|
-
"error": f"OpenAI API error: {str(api_error)}",
|
|
1027
|
-
}
|
|
1028
|
-
|
|
1029
|
-
return {
|
|
1030
|
-
"success": True,
|
|
1031
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
1032
|
-
"note": "If no input images were provided, you must use generate_and_store_image_no_input_images tool.",
|
|
1033
|
-
"images": all_variations,
|
|
1034
|
-
"model": model,
|
|
1035
|
-
"prompt": prompt,
|
|
1036
|
-
"total_images": len(all_variations),
|
|
1037
|
-
}
|
|
1038
|
-
|
|
1039
|
-
except Exception as e:
|
|
1040
|
-
return {
|
|
1041
|
-
"success": False,
|
|
1042
|
-
"operation": "generate_and_store_image_with_input_images",
|
|
1043
|
-
"error": f"Failed to generate variations: {str(e)}",
|
|
1044
|
-
}
|
|
1045
|
-
|
|
1046
|
-
@mcp.tool()
|
|
1047
|
-
def generate_and_store_audio_no_input_audios(
|
|
1048
|
-
prompt: str,
|
|
1049
|
-
model: str = "gpt-4o-audio-preview",
|
|
1050
|
-
voice: str = "alloy",
|
|
1051
|
-
audio_format: str = "wav",
|
|
1052
|
-
storage_path: Optional[str] = None,
|
|
1053
|
-
) -> Dict[str, Any]:
|
|
1054
|
-
"""
|
|
1055
|
-
Generate audio from text using OpenAI's gpt-4o-audio-preview model and store it in the workspace.
|
|
1056
|
-
|
|
1057
|
-
This tool generates audio speech from text prompts using OpenAI's audio generation API
|
|
1058
|
-
and saves the audio files to the workspace with automatic organization.
|
|
1059
|
-
|
|
1060
|
-
Args:
|
|
1061
|
-
prompt: Text content to convert to audio speech
|
|
1062
|
-
model: Model to use for generation (default: "gpt-4o-audio-preview")
|
|
1063
|
-
voice: Voice to use for audio generation (default: "alloy")
|
|
1064
|
-
Options: "alloy", "echo", "fable", "onyx", "nova", "shimmer"
|
|
1065
|
-
audio_format: Audio format for output (default: "wav")
|
|
1066
|
-
Options: "wav", "mp3", "opus", "aac", "flac"
|
|
1067
|
-
storage_path: Directory path where to save the audio (optional)
|
|
1068
|
-
- Relative path: Resolved relative to workspace (e.g., "audio/generated")
|
|
1069
|
-
- Absolute path: Must be within allowed directories
|
|
1070
|
-
- None/empty: Saves to workspace root
|
|
1071
|
-
|
|
1072
|
-
Returns:
|
|
1073
|
-
Dictionary containing:
|
|
1074
|
-
- success: Whether operation succeeded
|
|
1075
|
-
- operation: "generate_and_store_audio_no_input_audios"
|
|
1076
|
-
- audio_file: Generated audio file with path and metadata
|
|
1077
|
-
- model: Model used for generation
|
|
1078
|
-
- prompt: The prompt used for generation
|
|
1079
|
-
- voice: Voice used for generation
|
|
1080
|
-
- format: Audio format used
|
|
1081
|
-
|
|
1082
|
-
Examples:
|
|
1083
|
-
generate_and_store_audio_no_input_audios("Is a golden retriever a good family dog?")
|
|
1084
|
-
→ Generates and saves to: 20240115_143022_audio.wav
|
|
1085
|
-
|
|
1086
|
-
generate_and_store_audio_no_input_audios("Hello world", voice="nova", audio_format="mp3")
|
|
1087
|
-
→ Generates with nova voice and saves as: 20240115_143022_audio.mp3
|
|
1088
|
-
|
|
1089
|
-
Security:
|
|
1090
|
-
- Requires valid OpenAI API key (automatically detected from .env or environment)
|
|
1091
|
-
- Files are saved to specified path within workspace
|
|
1092
|
-
- Path must be within allowed directories
|
|
1093
|
-
"""
|
|
1094
|
-
from datetime import datetime
|
|
1095
|
-
|
|
1096
|
-
try:
|
|
1097
|
-
# Load environment variables
|
|
1098
|
-
script_dir = Path(__file__).parent.parent.parent
|
|
1099
|
-
env_path = script_dir / ".env"
|
|
1100
|
-
if env_path.exists():
|
|
1101
|
-
load_dotenv(env_path)
|
|
1102
|
-
else:
|
|
1103
|
-
load_dotenv()
|
|
1104
|
-
|
|
1105
|
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
1106
|
-
|
|
1107
|
-
if not openai_api_key:
|
|
1108
|
-
return {
|
|
1109
|
-
"success": False,
|
|
1110
|
-
"operation": "generate_and_store_audio_no_input_audios",
|
|
1111
|
-
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
1112
|
-
}
|
|
1113
|
-
|
|
1114
|
-
# Initialize OpenAI client
|
|
1115
|
-
client = OpenAI(api_key=openai_api_key)
|
|
1116
|
-
|
|
1117
|
-
# Determine storage directory
|
|
1118
|
-
if storage_path:
|
|
1119
|
-
if Path(storage_path).is_absolute():
|
|
1120
|
-
storage_dir = Path(storage_path).resolve()
|
|
1121
|
-
else:
|
|
1122
|
-
storage_dir = (Path.cwd() / storage_path).resolve()
|
|
1123
|
-
else:
|
|
1124
|
-
storage_dir = Path.cwd()
|
|
1125
|
-
|
|
1126
|
-
# Validate storage directory is within allowed paths
|
|
1127
|
-
_validate_path_access(storage_dir, mcp.allowed_paths)
|
|
1128
|
-
|
|
1129
|
-
# Create directory if it doesn't exist
|
|
1130
|
-
storage_dir.mkdir(parents=True, exist_ok=True)
|
|
1131
|
-
|
|
1132
|
-
try:
|
|
1133
|
-
# Generate audio using OpenAI API
|
|
1134
|
-
completion = client.chat.completions.create(
|
|
1135
|
-
model=model,
|
|
1136
|
-
modalities=["text", "audio"],
|
|
1137
|
-
audio={"voice": voice, "format": audio_format},
|
|
1138
|
-
messages=[
|
|
1139
|
-
{
|
|
1140
|
-
"role": "user",
|
|
1141
|
-
"content": prompt,
|
|
1142
|
-
},
|
|
1143
|
-
],
|
|
1144
|
-
)
|
|
1145
|
-
|
|
1146
|
-
# Check if audio data is available
|
|
1147
|
-
if not completion.choices[0].message.audio or not completion.choices[0].message.audio.data:
|
|
1148
|
-
return {
|
|
1149
|
-
"success": False,
|
|
1150
|
-
"operation": "generate_and_store_audio_no_input_audios",
|
|
1151
|
-
"error": "No audio data received from API",
|
|
1152
|
-
}
|
|
1153
|
-
|
|
1154
|
-
# Decode audio data from base64
|
|
1155
|
-
audio_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
|
1156
|
-
|
|
1157
|
-
# Generate filename with timestamp
|
|
1158
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1159
|
-
|
|
1160
|
-
# Clean prompt for filename (first 30 chars)
|
|
1161
|
-
clean_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
|
|
1162
|
-
clean_prompt = clean_prompt.replace(" ", "_")
|
|
1163
|
-
|
|
1164
|
-
filename = f"{timestamp}_{clean_prompt}.{audio_format}"
|
|
1165
|
-
|
|
1166
|
-
# Full file path
|
|
1167
|
-
file_path = storage_dir / filename
|
|
1168
|
-
|
|
1169
|
-
# Write audio to file
|
|
1170
|
-
file_path.write_bytes(audio_bytes)
|
|
1171
|
-
file_size = len(audio_bytes)
|
|
1172
|
-
|
|
1173
|
-
# Get text response if available
|
|
1174
|
-
text_response = completion.choices[0].message.content if completion.choices[0].message.content else None
|
|
1175
|
-
|
|
1176
|
-
return {
|
|
1177
|
-
"success": True,
|
|
1178
|
-
"operation": "generate_and_store_audio_no_input_audios",
|
|
1179
|
-
"audio_file": {
|
|
1180
|
-
"file_path": str(file_path),
|
|
1181
|
-
"filename": filename,
|
|
1182
|
-
"size": file_size,
|
|
1183
|
-
"format": audio_format,
|
|
1184
|
-
},
|
|
1185
|
-
"model": model,
|
|
1186
|
-
"prompt": prompt,
|
|
1187
|
-
"voice": voice,
|
|
1188
|
-
"format": audio_format,
|
|
1189
|
-
"text_response": text_response,
|
|
1190
|
-
}
|
|
1191
|
-
|
|
1192
|
-
except Exception as api_error:
|
|
1193
|
-
return {
|
|
1194
|
-
"success": False,
|
|
1195
|
-
"operation": "generate_and_store_audio_no_input_audios",
|
|
1196
|
-
"error": f"OpenAI API error: {str(api_error)}",
|
|
1197
|
-
}
|
|
1198
|
-
|
|
1199
|
-
except Exception as e:
|
|
1200
|
-
return {
|
|
1201
|
-
"success": False,
|
|
1202
|
-
"operation": "generate_and_store_audio_no_input_audios",
|
|
1203
|
-
"error": f"Failed to generate or save audio: {str(e)}",
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
@mcp.tool()
|
|
1207
|
-
def generate_and_store_image_no_input_images(
|
|
1208
|
-
prompt: str,
|
|
1209
|
-
model: str = "gpt-4.1",
|
|
1210
|
-
storage_path: Optional[str] = None,
|
|
1211
|
-
) -> Dict[str, Any]:
|
|
1212
|
-
"""
|
|
1213
|
-
Generate image using OpenAI's response with gpt-4.1 **WITHOUT ANY INPUT IMAGES** and store it in the workspace.
|
|
1214
|
-
|
|
1215
|
-
This tool Generate image using OpenAI's response with gpt-4.1 **WITHOUT ANY INPUT IMAGES** and store it in the workspace.
|
|
1216
|
-
|
|
1217
|
-
Args:
|
|
1218
|
-
prompt: Text description of the image to generate
|
|
1219
|
-
model: Model to use for generation (default: "gpt-4.1")
|
|
1220
|
-
Options: "gpt-4.1"
|
|
1221
|
-
n: Number of images to generate (default: 1)
|
|
1222
|
-
- gpt-4.1: only 1
|
|
1223
|
-
storage_path: Directory path where to save the image (optional)
|
|
1224
|
-
- Relative path: Resolved relative to workspace (e.g., "images/generated")
|
|
1225
|
-
- Absolute path: Must be within allowed directories
|
|
1226
|
-
- None/empty: Saves to workspace root
|
|
1227
|
-
|
|
1228
|
-
Returns:
|
|
1229
|
-
Dictionary containing:
|
|
1230
|
-
- success: Whether operation succeeded
|
|
1231
|
-
- operation: "generate_and_store_image_no_input_images"
|
|
1232
|
-
- note: Note about operation
|
|
1233
|
-
- images: List of generated images with file paths and metadata
|
|
1234
|
-
- model: Model used for generation
|
|
1235
|
-
- prompt: The prompt used for generation
|
|
1236
|
-
- total_images: Total number of images generated and saved
|
|
1237
|
-
- images: List of generated images with file paths and metadata
|
|
1238
|
-
|
|
1239
|
-
Examples:
|
|
1240
|
-
generate_and_store_image_no_input_images("a cat in space")
|
|
1241
|
-
→ Generates and saves to: 20240115_143022_a_cat_in_space.png
|
|
1242
|
-
|
|
1243
|
-
generate_and_store_image_no_input_images("sunset over mountains", storage_path="art/landscapes")
|
|
1244
|
-
→ Generates and saves to: art/landscapes/20240115_143022_sunset_over_mountains.png
|
|
1245
|
-
|
|
1246
|
-
Security:
|
|
1247
|
-
- Requires valid OpenAI API key (automatically detected from .env or environment)
|
|
1248
|
-
- Files are saved to specified path within workspace
|
|
1249
|
-
- Path must be within allowed directories
|
|
1250
|
-
|
|
1251
|
-
Note:
|
|
1252
|
-
API key is automatically detected in this order:
|
|
1253
|
-
1. First checks .env file in current directory or parent directories
|
|
1254
|
-
2. Then checks environment variables
|
|
1255
|
-
"""
|
|
1256
|
-
from datetime import datetime
|
|
1257
|
-
|
|
1258
|
-
try:
|
|
1259
|
-
# Try to find and load .env file from multiple locations
|
|
1260
|
-
# 1. Try loading from script directory
|
|
1261
|
-
script_dir = Path(__file__).parent.parent.parent # Go up to project root
|
|
1262
|
-
env_path = script_dir / ".env"
|
|
1263
|
-
if env_path.exists():
|
|
1264
|
-
load_dotenv(env_path)
|
|
1265
|
-
else:
|
|
1266
|
-
# 2. Try loading from current directory and parent directories
|
|
1267
|
-
load_dotenv()
|
|
1268
|
-
|
|
1269
|
-
# Get API key from environment (load_dotenv will have loaded .env file)
|
|
1270
|
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
1271
|
-
|
|
1272
|
-
if not openai_api_key:
|
|
1273
|
-
return {
|
|
1274
|
-
"success": False,
|
|
1275
|
-
"operation": "generate_and_store_image",
|
|
1276
|
-
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
1277
|
-
}
|
|
1278
|
-
|
|
1279
|
-
# Initialize OpenAI client
|
|
1280
|
-
client = OpenAI(api_key=openai_api_key)
|
|
1281
|
-
|
|
1282
|
-
# Determine storage directory
|
|
1283
|
-
if storage_path:
|
|
1284
|
-
if Path(storage_path).is_absolute():
|
|
1285
|
-
storage_dir = Path(storage_path).resolve()
|
|
1286
|
-
else:
|
|
1287
|
-
storage_dir = (Path.cwd() / storage_path).resolve()
|
|
1288
|
-
else:
|
|
1289
|
-
storage_dir = Path.cwd()
|
|
1290
|
-
|
|
1291
|
-
# Validate storage directory is within allowed paths
|
|
1292
|
-
_validate_path_access(storage_dir, mcp.allowed_paths)
|
|
1293
|
-
|
|
1294
|
-
# Create directory if it doesn't exist
|
|
1295
|
-
storage_dir.mkdir(parents=True, exist_ok=True)
|
|
1296
|
-
|
|
1297
|
-
try:
|
|
1298
|
-
# Generate image using OpenAI API with gpt-4.1 non-streaming format
|
|
1299
|
-
response = client.responses.create(
|
|
1300
|
-
model=model,
|
|
1301
|
-
input=prompt,
|
|
1302
|
-
tools=[{"type": "image_generation"}],
|
|
1303
|
-
)
|
|
1304
|
-
|
|
1305
|
-
# Extract image data from response
|
|
1306
|
-
image_data = [output.result for output in response.output if output.type == "image_generation_call"]
|
|
1307
|
-
|
|
1308
|
-
saved_images = []
|
|
1309
|
-
|
|
1310
|
-
if image_data:
|
|
1311
|
-
# Generate filename with timestamp
|
|
1312
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1313
|
-
|
|
1314
|
-
# Clean prompt for filename
|
|
1315
|
-
clean_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
|
|
1316
|
-
clean_prompt = clean_prompt.replace(" ", "_")
|
|
1317
|
-
|
|
1318
|
-
for idx, image_base64 in enumerate(image_data):
|
|
1319
|
-
# Decode base64 image data
|
|
1320
|
-
image_bytes = base64.b64decode(image_base64)
|
|
1321
|
-
|
|
1322
|
-
# Add index if generating multiple images
|
|
1323
|
-
if len(image_data) > 1:
|
|
1324
|
-
filename = f"{timestamp}_{clean_prompt}_{idx+1}.png"
|
|
1325
|
-
else:
|
|
1326
|
-
filename = f"{timestamp}_{clean_prompt}.png"
|
|
1327
|
-
|
|
1328
|
-
# Full file path
|
|
1329
|
-
file_path = storage_dir / filename
|
|
1330
|
-
|
|
1331
|
-
# Write image to file
|
|
1332
|
-
file_path.write_bytes(image_bytes)
|
|
1333
|
-
file_size = len(image_bytes)
|
|
1334
|
-
|
|
1335
|
-
saved_images.append(
|
|
1336
|
-
{
|
|
1337
|
-
"file_path": str(file_path),
|
|
1338
|
-
"filename": filename,
|
|
1339
|
-
"size": file_size,
|
|
1340
|
-
"index": idx,
|
|
1341
|
-
},
|
|
1342
|
-
)
|
|
1343
|
-
|
|
1344
|
-
result = {
|
|
1345
|
-
"success": True,
|
|
1346
|
-
"operation": "generate_and_store_image_no_input_images",
|
|
1347
|
-
"note": "New images are generated and saved to the specified path.",
|
|
1348
|
-
"images": saved_images,
|
|
1349
|
-
"model": model,
|
|
1350
|
-
"prompt": prompt,
|
|
1351
|
-
"total_images": len(saved_images),
|
|
1352
|
-
}
|
|
1353
|
-
|
|
1354
|
-
return result
|
|
1355
|
-
|
|
1356
|
-
except Exception as api_error:
|
|
1357
|
-
print(f"OpenAI API error: {str(api_error)}")
|
|
1358
|
-
return {
|
|
1359
|
-
"success": False,
|
|
1360
|
-
"operation": "generate_and_store_image_no_input_images",
|
|
1361
|
-
"error": f"OpenAI API error: {str(api_error)}",
|
|
1362
|
-
}
|
|
1363
|
-
|
|
1364
|
-
except Exception as e:
|
|
1365
|
-
return {
|
|
1366
|
-
"success": False,
|
|
1367
|
-
"operation": "generate_and_store_image_no_input_images",
|
|
1368
|
-
"error": f"Failed to generate or save image: {str(e)}",
|
|
1369
|
-
}
|
|
1370
|
-
|
|
1371
|
-
@mcp.tool()
|
|
1372
|
-
def generate_text_with_input_audio(
|
|
1373
|
-
audio_paths: List[str],
|
|
1374
|
-
model: str = "gpt-4o-transcribe",
|
|
1375
|
-
) -> Dict[str, Any]:
|
|
1376
|
-
"""
|
|
1377
|
-
Transcribe audio file(s) to text using OpenAI's Transcription API.
|
|
1378
|
-
|
|
1379
|
-
This tool processes one or more audio files through OpenAI's Transcription API
|
|
1380
|
-
to extract the text content from the audio. Each file is processed separately.
|
|
1381
|
-
|
|
1382
|
-
Args:
|
|
1383
|
-
audio_paths: List of paths to input audio files (WAV, MP3, M4A, etc.)
|
|
1384
|
-
- Relative path: Resolved relative to workspace
|
|
1385
|
-
- Absolute path: Must be within allowed directories
|
|
1386
|
-
model: Model to use (default: "gpt-4o-transcribe")
|
|
1387
|
-
|
|
1388
|
-
Returns:
|
|
1389
|
-
Dictionary containing:
|
|
1390
|
-
- success: Whether operation succeeded
|
|
1391
|
-
- operation: "generate_text_with_input_audio"
|
|
1392
|
-
- transcriptions: List of transcription results for each file
|
|
1393
|
-
- audio_files: List of paths to the input audio files
|
|
1394
|
-
- model: Model used
|
|
1395
|
-
|
|
1396
|
-
Examples:
|
|
1397
|
-
generate_text_with_input_audio(["recording.wav"])
|
|
1398
|
-
→ Returns transcription for recording.wav
|
|
1399
|
-
|
|
1400
|
-
generate_text_with_input_audio(["interview1.mp3", "interview2.mp3"])
|
|
1401
|
-
→ Returns separate transcriptions for each file
|
|
1402
|
-
|
|
1403
|
-
Security:
|
|
1404
|
-
- Requires valid OpenAI API key
|
|
1405
|
-
- All input audio files must exist and be readable
|
|
1406
|
-
"""
|
|
1407
|
-
try:
|
|
1408
|
-
# Load environment variables
|
|
1409
|
-
script_dir = Path(__file__).parent.parent.parent
|
|
1410
|
-
env_path = script_dir / ".env"
|
|
1411
|
-
if env_path.exists():
|
|
1412
|
-
load_dotenv(env_path)
|
|
1413
|
-
else:
|
|
1414
|
-
load_dotenv()
|
|
1415
|
-
|
|
1416
|
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
1417
|
-
|
|
1418
|
-
if not openai_api_key:
|
|
1419
|
-
return {
|
|
1420
|
-
"success": False,
|
|
1421
|
-
"operation": "generate_text_with_input_audio",
|
|
1422
|
-
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
1423
|
-
}
|
|
1424
|
-
|
|
1425
|
-
# Initialize OpenAI client
|
|
1426
|
-
client = OpenAI(api_key=openai_api_key)
|
|
1427
|
-
|
|
1428
|
-
# Validate and process input audio files
|
|
1429
|
-
validated_audio_paths = []
|
|
1430
|
-
audio_extensions = [".wav", ".mp3", ".m4a", ".mp4", ".ogg", ".flac", ".aac", ".wma", ".opus"]
|
|
1431
|
-
|
|
1432
|
-
for audio_path_str in audio_paths:
|
|
1433
|
-
# Resolve audio path
|
|
1434
|
-
if Path(audio_path_str).is_absolute():
|
|
1435
|
-
audio_path = Path(audio_path_str).resolve()
|
|
1436
|
-
else:
|
|
1437
|
-
audio_path = (Path.cwd() / audio_path_str).resolve()
|
|
1438
|
-
|
|
1439
|
-
# Validate audio path
|
|
1440
|
-
_validate_path_access(audio_path, mcp.allowed_paths)
|
|
1441
|
-
|
|
1442
|
-
if not audio_path.exists():
|
|
1443
|
-
return {
|
|
1444
|
-
"success": False,
|
|
1445
|
-
"operation": "generate_text_with_input_audio",
|
|
1446
|
-
"error": f"Audio file does not exist: {audio_path}",
|
|
1447
|
-
}
|
|
1448
|
-
|
|
1449
|
-
# Check if file is an audio file
|
|
1450
|
-
if audio_path.suffix.lower() not in audio_extensions:
|
|
1451
|
-
return {
|
|
1452
|
-
"success": False,
|
|
1453
|
-
"operation": "generate_text_with_input_audio",
|
|
1454
|
-
"error": f"File does not appear to be an audio file: {audio_path}",
|
|
1455
|
-
}
|
|
1456
|
-
|
|
1457
|
-
validated_audio_paths.append(audio_path)
|
|
1458
|
-
|
|
1459
|
-
# Process each audio file separately using OpenAI Transcription API
|
|
1460
|
-
transcriptions = []
|
|
1461
|
-
|
|
1462
|
-
for audio_path in validated_audio_paths:
|
|
1463
|
-
try:
|
|
1464
|
-
# Open audio file
|
|
1465
|
-
with open(audio_path, "rb") as audio_file:
|
|
1466
|
-
# Basic transcription without prompt
|
|
1467
|
-
transcription = client.audio.transcriptions.create(
|
|
1468
|
-
model=model,
|
|
1469
|
-
file=audio_file,
|
|
1470
|
-
response_format="text",
|
|
1471
|
-
)
|
|
1472
|
-
|
|
1473
|
-
# Add transcription to list
|
|
1474
|
-
transcriptions.append(
|
|
1475
|
-
{
|
|
1476
|
-
"file": str(audio_path),
|
|
1477
|
-
"transcription": transcription,
|
|
1478
|
-
},
|
|
1479
|
-
)
|
|
1480
|
-
|
|
1481
|
-
except Exception as api_error:
|
|
1482
|
-
return {
|
|
1483
|
-
"success": False,
|
|
1484
|
-
"operation": "generate_text_with_input_audio",
|
|
1485
|
-
"error": f"Transcription API error for file {audio_path}: {str(api_error)}",
|
|
1486
|
-
}
|
|
1487
|
-
|
|
1488
|
-
return {
|
|
1489
|
-
"success": True,
|
|
1490
|
-
"operation": "generate_text_with_input_audio",
|
|
1491
|
-
"transcriptions": transcriptions,
|
|
1492
|
-
"audio_files": [str(p) for p in validated_audio_paths],
|
|
1493
|
-
"model": model,
|
|
1494
|
-
}
|
|
1495
|
-
|
|
1496
|
-
except Exception as e:
|
|
1497
|
-
return {
|
|
1498
|
-
"success": False,
|
|
1499
|
-
"operation": "generate_text_with_input_audio",
|
|
1500
|
-
"error": f"Failed to transcribe audio: {str(e)}",
|
|
1501
|
-
}
|
|
1502
|
-
|
|
1503
|
-
@mcp.tool()
|
|
1504
|
-
def convert_text_to_speech(
|
|
1505
|
-
input_text: str,
|
|
1506
|
-
model: str = "gpt-4o-mini-tts",
|
|
1507
|
-
voice: str = "alloy",
|
|
1508
|
-
instructions: Optional[str] = None,
|
|
1509
|
-
storage_path: Optional[str] = None,
|
|
1510
|
-
audio_format: str = "mp3",
|
|
1511
|
-
) -> Dict[str, Any]:
|
|
1512
|
-
"""
|
|
1513
|
-
Convert text (transcription) directly to speech using OpenAI's TTS API with streaming response.
|
|
1514
|
-
|
|
1515
|
-
This tool converts text directly to speech audio using OpenAI's Text-to-Speech API,
|
|
1516
|
-
designed specifically for converting transcriptions or any text content to spoken audio.
|
|
1517
|
-
Uses streaming response for efficient file handling.
|
|
1518
|
-
|
|
1519
|
-
Args:
|
|
1520
|
-
input_text: The text content to convert to speech (e.g., transcription text)
|
|
1521
|
-
model: TTS model to use (default: "gpt-4o-mini-tts")
|
|
1522
|
-
Options: "gpt-4o-mini-tts", "tts-1", "tts-1-hd"
|
|
1523
|
-
voice: Voice to use for speech synthesis (default: "alloy")
|
|
1524
|
-
Options: "alloy", "echo", "fable", "onyx", "nova", "shimmer", "coral", "sage"
|
|
1525
|
-
instructions: Optional speaking instructions for tone and style (e.g., "Speak in a cheerful tone")
|
|
1526
|
-
storage_path: Directory path where to save the audio file (optional)
|
|
1527
|
-
- Relative path: Resolved relative to workspace
|
|
1528
|
-
- Absolute path: Must be within allowed directories
|
|
1529
|
-
- None/empty: Saves to workspace root
|
|
1530
|
-
audio_format: Output audio format (default: "mp3")
|
|
1531
|
-
Options: "mp3", "opus", "aac", "flac", "wav", "pcm"
|
|
1532
|
-
|
|
1533
|
-
Returns:
|
|
1534
|
-
Dictionary containing:
|
|
1535
|
-
- success: Whether operation succeeded
|
|
1536
|
-
- operation: "convert_text_to_speech"
|
|
1537
|
-
- audio_file: Generated audio file with path and metadata
|
|
1538
|
-
- model: TTS model used
|
|
1539
|
-
- voice: Voice used
|
|
1540
|
-
- format: Audio format used
|
|
1541
|
-
- text_length: Length of input text
|
|
1542
|
-
- instructions: Speaking instructions if provided
|
|
1543
|
-
|
|
1544
|
-
Examples:
|
|
1545
|
-
convert_text_to_speech("Hello world, this is a test.")
|
|
1546
|
-
→ Converts text to speech and saves as MP3
|
|
1547
|
-
|
|
1548
|
-
convert_text_to_speech(
|
|
1549
|
-
"Today is a wonderful day to build something people love!",
|
|
1550
|
-
voice="coral",
|
|
1551
|
-
instructions="Speak in a cheerful and positive tone."
|
|
1552
|
-
)
|
|
1553
|
-
→ Converts with specific voice and speaking instructions
|
|
1554
|
-
|
|
1555
|
-
Security:
|
|
1556
|
-
- Requires valid OpenAI API key
|
|
1557
|
-
- Files are saved to specified path within workspace
|
|
1558
|
-
- Path must be within allowed directories
|
|
1559
|
-
"""
|
|
1560
|
-
from datetime import datetime
|
|
1561
|
-
|
|
1562
|
-
try:
|
|
1563
|
-
# Load environment variables
|
|
1564
|
-
script_dir = Path(__file__).parent.parent.parent
|
|
1565
|
-
env_path = script_dir / ".env"
|
|
1566
|
-
if env_path.exists():
|
|
1567
|
-
load_dotenv(env_path)
|
|
1568
|
-
else:
|
|
1569
|
-
load_dotenv()
|
|
1570
|
-
|
|
1571
|
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
1572
|
-
|
|
1573
|
-
if not openai_api_key:
|
|
1574
|
-
return {
|
|
1575
|
-
"success": False,
|
|
1576
|
-
"operation": "convert_text_to_speech",
|
|
1577
|
-
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
1578
|
-
}
|
|
1579
|
-
|
|
1580
|
-
# Initialize OpenAI client
|
|
1581
|
-
client = OpenAI(api_key=openai_api_key)
|
|
1582
|
-
|
|
1583
|
-
# Determine storage directory
|
|
1584
|
-
if storage_path:
|
|
1585
|
-
if Path(storage_path).is_absolute():
|
|
1586
|
-
storage_dir = Path(storage_path).resolve()
|
|
1587
|
-
else:
|
|
1588
|
-
storage_dir = (Path.cwd() / storage_path).resolve()
|
|
1589
|
-
else:
|
|
1590
|
-
storage_dir = Path.cwd()
|
|
1591
|
-
|
|
1592
|
-
# Validate storage directory is within allowed paths
|
|
1593
|
-
_validate_path_access(storage_dir, mcp.allowed_paths)
|
|
1594
|
-
|
|
1595
|
-
# Create directory if it doesn't exist
|
|
1596
|
-
storage_dir.mkdir(parents=True, exist_ok=True)
|
|
1597
|
-
|
|
1598
|
-
# Generate filename with timestamp
|
|
1599
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1600
|
-
|
|
1601
|
-
# Clean text for filename (first 30 chars)
|
|
1602
|
-
clean_text = "".join(c for c in input_text[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
|
|
1603
|
-
clean_text = clean_text.replace(" ", "_")
|
|
1604
|
-
|
|
1605
|
-
filename = f"speech_{timestamp}_{clean_text}.{audio_format}"
|
|
1606
|
-
file_path = storage_dir / filename
|
|
1607
|
-
|
|
1608
|
-
try:
|
|
1609
|
-
# Prepare request parameters
|
|
1610
|
-
request_params = {
|
|
1611
|
-
"model": model,
|
|
1612
|
-
"voice": voice,
|
|
1613
|
-
"input": input_text,
|
|
1614
|
-
}
|
|
1615
|
-
|
|
1616
|
-
# Add instructions if provided (only for models that support it)
|
|
1617
|
-
if instructions and model in ["gpt-4o-mini-tts"]:
|
|
1618
|
-
request_params["instructions"] = instructions
|
|
1619
|
-
|
|
1620
|
-
# Use streaming response for efficient file handling
|
|
1621
|
-
with client.audio.speech.with_streaming_response.create(**request_params) as response:
|
|
1622
|
-
# Stream directly to file
|
|
1623
|
-
response.stream_to_file(file_path)
|
|
1624
|
-
|
|
1625
|
-
# Get file size
|
|
1626
|
-
file_size = file_path.stat().st_size
|
|
1627
|
-
|
|
1628
|
-
return {
|
|
1629
|
-
"success": True,
|
|
1630
|
-
"operation": "convert_text_to_speech",
|
|
1631
|
-
"audio_file": {
|
|
1632
|
-
"file_path": str(file_path),
|
|
1633
|
-
"filename": filename,
|
|
1634
|
-
"size": file_size,
|
|
1635
|
-
"format": audio_format,
|
|
1636
|
-
},
|
|
1637
|
-
"model": model,
|
|
1638
|
-
"voice": voice,
|
|
1639
|
-
"format": audio_format,
|
|
1640
|
-
"text_length": len(input_text),
|
|
1641
|
-
"instructions": instructions if instructions else None,
|
|
1642
|
-
}
|
|
1643
|
-
|
|
1644
|
-
except Exception as api_error:
|
|
1645
|
-
return {
|
|
1646
|
-
"success": False,
|
|
1647
|
-
"operation": "convert_text_to_speech",
|
|
1648
|
-
"error": f"OpenAI TTS API error: {str(api_error)}",
|
|
1649
|
-
}
|
|
1650
|
-
|
|
1651
|
-
except Exception as e:
|
|
1652
|
-
return {
|
|
1653
|
-
"success": False,
|
|
1654
|
-
"operation": "convert_text_to_speech",
|
|
1655
|
-
"error": f"Failed to convert text to speech: {str(e)}",
|
|
1656
|
-
}
|
|
1657
|
-
|
|
1658
|
-
@mcp.tool()
|
|
1659
|
-
def generate_and_store_video_no_input_images(
|
|
1660
|
-
prompt: str,
|
|
1661
|
-
model: str = "sora-2",
|
|
1662
|
-
seconds: int = 4,
|
|
1663
|
-
storage_path: Optional[str] = None,
|
|
1664
|
-
) -> Dict[str, Any]:
|
|
1665
|
-
"""
|
|
1666
|
-
Generate a video from a text prompt using OpenAI's Sora-2 API.
|
|
1667
|
-
|
|
1668
|
-
This tool generates a video based on a text prompt using OpenAI's Sora-2 API
|
|
1669
|
-
and saves it to the workspace with automatic organization.
|
|
1670
|
-
|
|
1671
|
-
Args:
|
|
1672
|
-
prompt: Text description for the video to generate
|
|
1673
|
-
model: Model to use (default: "sora-2")
|
|
1674
|
-
storage_path: Directory path where to save the video (optional)
|
|
1675
|
-
- Relative path: Resolved relative to workspace
|
|
1676
|
-
- Absolute path: Must be within allowed directories
|
|
1677
|
-
- None/empty: Saves to workspace root
|
|
1678
|
-
|
|
1679
|
-
Returns:
|
|
1680
|
-
Dictionary containing:
|
|
1681
|
-
- success: Whether operation succeeded
|
|
1682
|
-
- operation: "generate_and_store_video_no_input_images"
|
|
1683
|
-
- video_path: Path to the saved video file
|
|
1684
|
-
- model: Model used for generation
|
|
1685
|
-
- prompt: The prompt used
|
|
1686
|
-
- duration: Time taken for generation in seconds
|
|
1687
|
-
|
|
1688
|
-
Examples:
|
|
1689
|
-
generate_and_store_video_no_input_images("A cool cat on a motorcycle in the night")
|
|
1690
|
-
→ Generates a video and saves to workspace root
|
|
1691
|
-
|
|
1692
|
-
generate_and_store_video_no_input_images("Dancing robot", storage_path="videos/")
|
|
1693
|
-
→ Generates a video and saves to videos/ directory
|
|
1694
|
-
|
|
1695
|
-
Security:
|
|
1696
|
-
- Requires valid OpenAI API key with Sora-2 access
|
|
1697
|
-
- Files are saved to specified path within workspace
|
|
1698
|
-
"""
|
|
1699
|
-
import time
|
|
1700
|
-
from datetime import datetime
|
|
1701
|
-
|
|
1702
|
-
try:
|
|
1703
|
-
# Load environment variables
|
|
1704
|
-
script_dir = Path(__file__).parent.parent.parent
|
|
1705
|
-
env_path = script_dir / ".env"
|
|
1706
|
-
if env_path.exists():
|
|
1707
|
-
load_dotenv(env_path)
|
|
1708
|
-
else:
|
|
1709
|
-
load_dotenv()
|
|
1710
|
-
|
|
1711
|
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
1712
|
-
|
|
1713
|
-
if not openai_api_key:
|
|
1714
|
-
return {
|
|
1715
|
-
"success": False,
|
|
1716
|
-
"operation": "generate_and_store_video_no_input_images",
|
|
1717
|
-
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
1718
|
-
}
|
|
1719
|
-
|
|
1720
|
-
# Initialize OpenAI client
|
|
1721
|
-
client = OpenAI(api_key=openai_api_key)
|
|
1722
|
-
|
|
1723
|
-
# Determine storage directory
|
|
1724
|
-
if storage_path:
|
|
1725
|
-
if Path(storage_path).is_absolute():
|
|
1726
|
-
storage_dir = Path(storage_path).resolve()
|
|
1727
|
-
else:
|
|
1728
|
-
storage_dir = (Path.cwd() / storage_path).resolve()
|
|
1729
|
-
else:
|
|
1730
|
-
storage_dir = Path.cwd()
|
|
1731
|
-
|
|
1732
|
-
# Validate storage directory is within allowed paths
|
|
1733
|
-
_validate_path_access(storage_dir, mcp.allowed_paths)
|
|
1734
|
-
|
|
1735
|
-
# Create directory if it doesn't exist
|
|
1736
|
-
storage_dir.mkdir(parents=True, exist_ok=True)
|
|
1737
|
-
|
|
1738
|
-
try:
|
|
1739
|
-
start_time = time.time()
|
|
1740
|
-
|
|
1741
|
-
# Start video generation (no print statements to avoid MCP JSON parsing issues)
|
|
1742
|
-
video = client.videos.create(
|
|
1743
|
-
model=model,
|
|
1744
|
-
prompt=prompt,
|
|
1745
|
-
seconds=str(seconds),
|
|
1746
|
-
)
|
|
1747
|
-
|
|
1748
|
-
getattr(video, "progress", 0)
|
|
1749
|
-
|
|
1750
|
-
# Monitor progress (silently, no stdout writes)
|
|
1751
|
-
while video.status in ("in_progress", "queued"):
|
|
1752
|
-
# Refresh status
|
|
1753
|
-
video = client.videos.retrieve(video.id)
|
|
1754
|
-
getattr(video, "progress", 0)
|
|
1755
|
-
time.sleep(2)
|
|
1756
|
-
|
|
1757
|
-
if video.status == "failed":
|
|
1758
|
-
message = getattr(
|
|
1759
|
-
getattr(video, "error", None),
|
|
1760
|
-
"message",
|
|
1761
|
-
"Video generation failed",
|
|
1762
|
-
)
|
|
1763
|
-
return {
|
|
1764
|
-
"success": False,
|
|
1765
|
-
"operation": "generate_and_store_video_no_input_images",
|
|
1766
|
-
"error": message,
|
|
1767
|
-
}
|
|
1768
|
-
|
|
1769
|
-
# Download video content
|
|
1770
|
-
content = client.videos.download_content(video.id, variant="video")
|
|
1771
|
-
|
|
1772
|
-
# Generate filename with timestamp
|
|
1773
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1774
|
-
clean_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
|
|
1775
|
-
clean_prompt = clean_prompt.replace(" ", "_")
|
|
1776
|
-
filename = f"{timestamp}_{clean_prompt}.mp4"
|
|
1777
|
-
|
|
1778
|
-
# Full file path
|
|
1779
|
-
file_path = storage_dir / filename
|
|
1780
|
-
|
|
1781
|
-
# Write video to file
|
|
1782
|
-
content.write_to_file(str(file_path))
|
|
1783
|
-
|
|
1784
|
-
# Calculate duration
|
|
1785
|
-
duration = time.time() - start_time
|
|
1786
|
-
|
|
1787
|
-
# Get file size
|
|
1788
|
-
file_size = file_path.stat().st_size
|
|
1789
|
-
|
|
1790
|
-
return {
|
|
1791
|
-
"success": True,
|
|
1792
|
-
"operation": "generate_and_store_video_no_input_images",
|
|
1793
|
-
"video_path": str(file_path),
|
|
1794
|
-
"filename": filename,
|
|
1795
|
-
"size": file_size,
|
|
1796
|
-
"model": model,
|
|
1797
|
-
"prompt": prompt,
|
|
1798
|
-
"duration": duration,
|
|
1799
|
-
}
|
|
1800
|
-
|
|
1801
|
-
except Exception as api_error:
|
|
1802
|
-
return {
|
|
1803
|
-
"success": False,
|
|
1804
|
-
"operation": "generate_and_store_video_no_input_images",
|
|
1805
|
-
"error": f"OpenAI API error: {str(api_error)}",
|
|
1806
|
-
}
|
|
1807
|
-
|
|
1808
|
-
except Exception as e:
|
|
1809
|
-
return {
|
|
1810
|
-
"success": False,
|
|
1811
|
-
"operation": "generate_and_store_video_no_input_images",
|
|
1812
|
-
"error": f"Failed to generate or save video: {str(e)}",
|
|
1813
|
-
}
|
|
1814
|
-
|
|
1815
818
|
return mcp
|