massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,455 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Generate text content using OpenAI API and save it as various file formats (TXT, MD, PDF).
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import List, Optional
11
+
12
+ from dotenv import load_dotenv
13
+ from openai import OpenAI
14
+
15
+ from massgen.tool._result import ExecutionResult, TextContent
16
+
17
+
18
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
19
+ """
20
+ Validate that a path is within allowed directories.
21
+
22
+ Args:
23
+ path: Path to validate
24
+ allowed_paths: List of allowed base paths (optional)
25
+ agent_cwd: Agent\'s current working directory (automatically injected)
26
+
27
+ Raises:
28
+ ValueError: If path is not within allowed directories
29
+ """
30
+ if not allowed_paths:
31
+ return # No restrictions
32
+
33
+ for allowed_path in allowed_paths:
34
+ try:
35
+ path.relative_to(allowed_path)
36
+ return # Path is within this allowed directory
37
+ except ValueError:
38
+ continue
39
+
40
+ raise ValueError(f"Path not in allowed directories: {path}")
41
+
42
+
43
+ def _generate_pdf(content: str, file_path: Path) -> None:
44
+ """
45
+ Generate a PDF file from text content.
46
+
47
+ Args:
48
+ content: Text content to write to PDF
49
+ file_path: Path where PDF will be saved
50
+ """
51
+ try:
52
+ from reportlab.lib.pagesizes import letter
53
+ from reportlab.lib.styles import getSampleStyleSheet
54
+ from reportlab.lib.units import inch
55
+ from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
56
+
57
+ # Create PDF
58
+ doc = SimpleDocTemplate(str(file_path), pagesize=letter)
59
+ story = []
60
+ styles = getSampleStyleSheet()
61
+
62
+ # Split content into paragraphs
63
+ paragraphs = content.split("\n\n")
64
+
65
+ for para in paragraphs:
66
+ if para.strip():
67
+ # Handle special markdown-like formatting
68
+ if para.startswith("#"):
69
+ # Use heading style
70
+ p = Paragraph(para.replace("#", "").strip(), styles["Heading1"])
71
+ else:
72
+ p = Paragraph(para.replace("\n", "<br/>"), styles["BodyText"])
73
+ story.append(p)
74
+ story.append(Spacer(1, 0.2 * inch))
75
+
76
+ doc.build(story)
77
+
78
+ except ImportError:
79
+ # Fallback: use fpdf if reportlab is not available
80
+ try:
81
+ from fpdf import FPDF
82
+
83
+ pdf = FPDF()
84
+ pdf.add_page()
85
+ pdf.set_font("Arial", size=12)
86
+
87
+ # Split content into lines and add to PDF
88
+ for line in content.split("\n"):
89
+ pdf.multi_cell(0, 10, txt=line)
90
+
91
+ pdf.output(str(file_path))
92
+
93
+ except ImportError:
94
+ raise ImportError(
95
+ "PDF generation requires either 'reportlab' or 'fpdf2' library. " "Install with: pip install reportlab OR pip install fpdf2",
96
+ )
97
+
98
+
99
+ def _generate_pptx(content: str, file_path: Path) -> None:
100
+ """
101
+ Generate a PowerPoint presentation from text content.
102
+
103
+ Args:
104
+ content: Text content to convert to PPTX (expects slide-based structure)
105
+ file_path: Path where PPTX will be saved
106
+ """
107
+ try:
108
+ from pptx import Presentation
109
+ from pptx.util import Inches
110
+
111
+ # Create presentation
112
+ prs = Presentation()
113
+ prs.slide_width = Inches(10)
114
+ prs.slide_height = Inches(7.5)
115
+
116
+ # Parse content into slides
117
+ # Expected format: slides separated by "---" or "Slide X:" markers
118
+ # Or parse based on headers (##)
119
+
120
+ slides_content = []
121
+ current_slide = {"title": "", "content": []}
122
+
123
+ lines = content.split("\n")
124
+ i = 0
125
+
126
+ while i < len(lines):
127
+ line = lines[i].strip()
128
+
129
+ # Check for slide delimiter
130
+ if line.startswith("---") or line.startswith("==="):
131
+ if current_slide["title"] or current_slide["content"]:
132
+ slides_content.append(current_slide)
133
+ current_slide = {"title": "", "content": []}
134
+ i += 1
135
+ continue
136
+
137
+ # Check for title (marked with # or ##)
138
+ if line.startswith("# "):
139
+ if current_slide["title"] or current_slide["content"]:
140
+ slides_content.append(current_slide)
141
+ current_slide = {"title": "", "content": []}
142
+ current_slide["title"] = line.lstrip("#").strip()
143
+ i += 1
144
+ continue
145
+
146
+ # Check for subtitle/section (## or "Slide X:")
147
+ if line.startswith("## ") or line.lower().startswith("slide "):
148
+ if current_slide["title"] or current_slide["content"]:
149
+ slides_content.append(current_slide)
150
+ current_slide = {"title": "", "content": []}
151
+ current_slide["title"] = line.lstrip("#").strip()
152
+ i += 1
153
+ continue
154
+
155
+ # Add content to current slide
156
+ if line:
157
+ current_slide["content"].append(line)
158
+
159
+ i += 1
160
+
161
+ # Add last slide if it has content
162
+ if current_slide["title"] or current_slide["content"]:
163
+ slides_content.append(current_slide)
164
+
165
+ # If no slides were parsed, create a single slide with all content
166
+ if not slides_content:
167
+ slides_content = [
168
+ {
169
+ "title": "Generated Content",
170
+ "content": [line.strip() for line in content.split("\n") if line.strip()],
171
+ },
172
+ ]
173
+
174
+ # Create slides
175
+ for slide_data in slides_content:
176
+ # Add title slide if it's the first slide and has only title
177
+ if len(prs.slides) == 0 and slide_data["title"] and not slide_data["content"]:
178
+ slide_layout = prs.slide_layouts[0] # Title slide
179
+ slide = prs.slides.add_slide(slide_layout)
180
+ title = slide.shapes.title
181
+ title.text = slide_data["title"]
182
+ else:
183
+ # Add title and content slide
184
+ slide_layout = prs.slide_layouts[1] # Title and content
185
+ slide = prs.slides.add_slide(slide_layout)
186
+
187
+ # Set title
188
+ title = slide.shapes.title
189
+ title.text = slide_data["title"] if slide_data["title"] else "Content"
190
+
191
+ # Set content
192
+ if len(slide.shapes) > 1:
193
+ content_shape = slide.shapes[1]
194
+ text_frame = content_shape.text_frame
195
+ text_frame.clear()
196
+
197
+ for idx, content_line in enumerate(slide_data["content"]):
198
+ if idx == 0:
199
+ p = text_frame.paragraphs[0]
200
+ else:
201
+ p = text_frame.add_paragraph()
202
+
203
+ # Handle bullet points
204
+ if content_line.startswith("- ") or content_line.startswith("* "):
205
+ p.text = content_line[2:].strip()
206
+ p.level = 0
207
+ elif content_line.startswith(" - ") or content_line.startswith(" * "):
208
+ p.text = content_line[4:].strip()
209
+ p.level = 1
210
+ else:
211
+ p.text = content_line
212
+ p.level = 0
213
+
214
+ # Save presentation
215
+ prs.save(str(file_path))
216
+
217
+ except ImportError:
218
+ raise ImportError(
219
+ "PPTX generation requires 'python-pptx' library. " "Install with: pip install python-pptx",
220
+ )
221
+
222
+
223
+ async def text_to_file_generation(
224
+ prompt: str,
225
+ file_format: str = "txt",
226
+ filename: Optional[str] = None,
227
+ model: str = "gpt-4o",
228
+ storage_path: Optional[str] = None,
229
+ allowed_paths: Optional[List[str]] = None,
230
+ agent_cwd: Optional[str] = None,
231
+ ) -> ExecutionResult:
232
+ """
233
+ Generate text content using OpenAI API and save it as various file formats.
234
+
235
+ This tool uses OpenAI's chat completion API to generate text content based on a prompt,
236
+ then saves the generated content in the specified file format (TXT, MD, PDF, or PPTX).
237
+
238
+ Args:
239
+ prompt: Description of the content to generate (e.g., "Write a technical report about AI")
240
+ file_format: Output file format - Options: "txt", "md", "pdf", "pptx" (default: "txt")
241
+ filename: Custom filename without extension (optional)
242
+ If not provided, generates from prompt and timestamp
243
+ model: OpenAI model to use (default: "gpt-4o")
244
+ Options: "gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"
245
+ storage_path: Directory path where to save the file (optional)
246
+ - **IMPORTANT**: Must be a DIRECTORY path only, NOT a file path (e.g., "documents/reports" NOT "documents/report.txt")
247
+ - The filename is automatically generated from the prompt or custom filename parameter
248
+ - Relative path: Resolved relative to agent's workspace (e.g., "documents/reports")
249
+ - Absolute path: Must be within allowed directories
250
+ - None/empty: Saves to agent's workspace root
251
+ allowed_paths: List of allowed base paths for validation (optional)
252
+ agent_cwd: Agent\'s current working directory (automatically injected)
253
+
254
+ Returns:
255
+ ExecutionResult containing:
256
+ - success: Whether operation succeeded
257
+ - operation: "generate_and_store_file"
258
+ - file_path: Path to the generated file
259
+ - filename: Name of the generated file
260
+ - file_format: Format of the generated file
261
+ - content_preview: First 500 characters of generated content
262
+ - file_size: Size of the generated file in bytes
263
+ - model: Model used for generation
264
+ - prompt: The prompt used
265
+
266
+ Examples:
267
+ text_to_file_generation("Write a blog post about Python", file_format="md")
268
+ → Generates markdown file with blog post content
269
+
270
+ text_to_file_generation(
271
+ "Create a technical report on machine learning",
272
+ file_format="pdf",
273
+ filename="ml_report"
274
+ )
275
+ → Generates PDF file named "ml_report.pdf"
276
+
277
+ text_to_file_generation(
278
+ "Write meeting notes for today's standup",
279
+ file_format="txt",
280
+ storage_path="documents/notes"
281
+ )
282
+ → Generates text file in documents/notes/ directory
283
+
284
+ Security:
285
+ - Requires valid OpenAI API key
286
+ - Files are saved to specified path within workspace
287
+ - Path must be within allowed directories
288
+
289
+ Note:
290
+ - PDF generation requires either 'reportlab' or 'fpdf2' library
291
+ - PPTX generation requires 'python-pptx' library
292
+ - For PPTX format, structure your prompt to include slide titles (using # or ##) and bullet points (using -)
293
+ - The quality and format of generated content depends on the prompt
294
+ - Longer content may consume more tokens
295
+ """
296
+ try:
297
+ # Validate file format
298
+ supported_formats = ["txt", "md", "pdf", "pptx"]
299
+ file_format = file_format.lower()
300
+ if file_format not in supported_formats:
301
+ result = {
302
+ "success": False,
303
+ "operation": "generate_and_store_file",
304
+ "error": f"Unsupported file format: {file_format}. Supported formats: {', '.join(supported_formats)}",
305
+ }
306
+ return ExecutionResult(
307
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
308
+ )
309
+
310
+ # Convert allowed_paths from strings to Path objects
311
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
312
+
313
+ # Use agent_cwd if available, otherwise fall back to base_dir
314
+ base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
315
+
316
+ # Load environment variables
317
+ script_dir = Path(__file__).parent.parent.parent.parent
318
+ env_path = script_dir / ".env"
319
+ if env_path.exists():
320
+ load_dotenv(env_path)
321
+ else:
322
+ load_dotenv()
323
+
324
+ openai_api_key = os.getenv("OPENAI_API_KEY")
325
+
326
+ if not openai_api_key:
327
+ result = {
328
+ "success": False,
329
+ "operation": "generate_and_store_file",
330
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
331
+ }
332
+ return ExecutionResult(
333
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
334
+ )
335
+
336
+ # Initialize OpenAI client
337
+ client = OpenAI(api_key=openai_api_key)
338
+
339
+ # Determine storage directory
340
+ if storage_path:
341
+ if Path(storage_path).is_absolute():
342
+ storage_dir = Path(storage_path).resolve()
343
+ else:
344
+ storage_dir = (base_dir / storage_path).resolve()
345
+ else:
346
+ storage_dir = base_dir
347
+
348
+ # Validate storage directory
349
+ _validate_path_access(storage_dir, allowed_paths_list)
350
+ storage_dir.mkdir(parents=True, exist_ok=True)
351
+
352
+ try:
353
+ # Generate content using OpenAI API
354
+ response = client.chat.completions.create(
355
+ model=model,
356
+ messages=[
357
+ {
358
+ "role": "system",
359
+ "content": f"You are a professional content writer. Generate high-quality {file_format.upper()} content based on the user's request.",
360
+ },
361
+ {
362
+ "role": "user",
363
+ "content": prompt,
364
+ },
365
+ ],
366
+ temperature=0.7,
367
+ )
368
+
369
+ # Extract generated content
370
+ generated_content = response.choices[0].message.content
371
+
372
+ if not generated_content:
373
+ result = {
374
+ "success": False,
375
+ "operation": "generate_and_store_file",
376
+ "error": "No content generated from OpenAI API",
377
+ }
378
+ return ExecutionResult(
379
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
380
+ )
381
+
382
+ except Exception as api_error:
383
+ result = {
384
+ "success": False,
385
+ "operation": "generate_and_store_file",
386
+ "error": f"OpenAI API error: {str(api_error)}",
387
+ }
388
+ return ExecutionResult(
389
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
390
+ )
391
+
392
+ # Generate filename
393
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
394
+
395
+ if filename:
396
+ # Use custom filename (remove extension if provided)
397
+ clean_filename = filename.rsplit(".", 1)[0]
398
+ file_name = f"{clean_filename}.{file_format}"
399
+ else:
400
+ # Generate filename from prompt
401
+ clean_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
402
+ clean_prompt = clean_prompt.replace(" ", "_")
403
+ file_name = f"{timestamp}_{clean_prompt}.{file_format}"
404
+
405
+ # Full file path
406
+ file_path = storage_dir / file_name
407
+
408
+ # Save content based on format
409
+ try:
410
+ if file_format == "pdf":
411
+ _generate_pdf(generated_content, file_path)
412
+ elif file_format == "pptx":
413
+ _generate_pptx(generated_content, file_path)
414
+ else:
415
+ # For txt and md, save as plain text
416
+ file_path.write_text(generated_content, encoding="utf-8")
417
+
418
+ file_size = file_path.stat().st_size
419
+
420
+ except Exception as save_error:
421
+ result = {
422
+ "success": False,
423
+ "operation": "generate_and_store_file",
424
+ "error": f"Failed to save file: {str(save_error)}",
425
+ }
426
+ return ExecutionResult(
427
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
428
+ )
429
+
430
+ # Create result
431
+ result = {
432
+ "success": True,
433
+ "operation": "generate_and_store_file",
434
+ "file_path": str(file_path),
435
+ "filename": file_name,
436
+ "file_format": file_format,
437
+ "content_preview": generated_content[:500] + ("..." if len(generated_content) > 500 else ""),
438
+ "file_size": file_size,
439
+ "model": model,
440
+ "prompt": prompt,
441
+ }
442
+
443
+ return ExecutionResult(
444
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
445
+ )
446
+
447
+ except Exception as e:
448
+ result = {
449
+ "success": False,
450
+ "operation": "generate_and_store_file",
451
+ "error": f"Failed to generate file: {str(e)}",
452
+ }
453
+ return ExecutionResult(
454
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
455
+ )
@@ -0,0 +1,222 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Generate image using OpenAI's response with gpt-4.1 WITHOUT ANY INPUT IMAGES and store it in the workspace.
4
+ """
5
+
6
+ import base64
7
+ import json
8
+ import os
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import List, Optional
12
+
13
+ from dotenv import load_dotenv
14
+ from openai import OpenAI
15
+
16
+ from massgen.tool._result import ExecutionResult, TextContent
17
+
18
+
19
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
20
+ """
21
+ Validate that a path is within allowed directories.
22
+
23
+ Args:
24
+ path: Path to validate
25
+ allowed_paths: List of allowed base paths (optional)
26
+
27
+ Raises:
28
+ ValueError: If path is not within allowed directories
29
+ """
30
+ if not allowed_paths:
31
+ return # No restrictions
32
+
33
+ for allowed_path in allowed_paths:
34
+ try:
35
+ path.relative_to(allowed_path)
36
+ return # Path is within this allowed directory
37
+ except ValueError:
38
+ continue
39
+
40
+ raise ValueError(f"Path not in allowed directories: {path}")
41
+
42
+
43
+ async def text_to_image_generation(
44
+ prompt: str,
45
+ model: str = "gpt-4.1",
46
+ storage_path: Optional[str] = None,
47
+ allowed_paths: Optional[List[str]] = None,
48
+ agent_cwd: Optional[str] = None,
49
+ ) -> ExecutionResult:
50
+ """
51
+ Generate image using OpenAI's response with gpt-4.1 **WITHOUT ANY INPUT IMAGES** and store it in the workspace.
52
+
53
+ This tool Generate image using OpenAI's response with gpt-4.1 **WITHOUT ANY INPUT IMAGES** and store it in the workspace.
54
+
55
+ Args:
56
+ prompt: Text description of the image to generate
57
+ model: Model to use for generation (default: "gpt-4.1")
58
+ Options: "gpt-4.1"
59
+ storage_path: Directory path where to save the image (optional)
60
+ - **IMPORTANT**: Must be a DIRECTORY path only, NOT a file path (e.g., "images/generated" NOT "images/cat.png")
61
+ - The filename is automatically generated from the prompt
62
+ - Relative path: Resolved relative to agent's workspace (e.g., "images/generated")
63
+ - Absolute path: Must be within allowed directories
64
+ - None/empty: Saves to agent's workspace root
65
+ allowed_paths: List of allowed base paths for validation (optional)
66
+ agent_cwd: Agent's current working directory (automatically injected)
67
+
68
+ Returns:
69
+ ExecutionResult containing:
70
+ - success: Whether operation succeeded
71
+ - operation: "generate_and_store_image_no_input_images"
72
+ - note: Note about operation
73
+ - images: List of generated images with file paths and metadata
74
+ - model: Model used for generation
75
+ - prompt: The prompt used for generation
76
+ - total_images: Total number of images generated and saved
77
+
78
+ Examples:
79
+ generate_and_store_image_no_input_images("a cat in space")
80
+ → Generates and saves to: 20240115_143022_a_cat_in_space.png
81
+
82
+ generate_and_store_image_no_input_images("sunset over mountains", storage_path="art/landscapes")
83
+ → Generates and saves to: art/landscapes/20240115_143022_sunset_over_mountains.png
84
+
85
+ Security:
86
+ - Requires valid OpenAI API key (automatically detected from .env or environment)
87
+ - Files are saved to specified path within workspace
88
+ - Path must be within allowed directories
89
+
90
+ Note:
91
+ API key is automatically detected in this order:
92
+ 1. First checks .env file in current directory or parent directories
93
+ 2. Then checks environment variables
94
+ """
95
+ try:
96
+ # Convert allowed_paths from strings to Path objects
97
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
98
+
99
+ # Try to find and load .env file from multiple locations
100
+ # 1. Try loading from script directory
101
+ script_dir = Path(__file__).parent.parent.parent.parent # Go up to project root
102
+ env_path = script_dir / ".env"
103
+ if env_path.exists():
104
+ load_dotenv(env_path)
105
+ else:
106
+ # 2. Try loading from current directory and parent directories
107
+ load_dotenv()
108
+
109
+ # Get API key from environment (load_dotenv will have loaded .env file)
110
+ openai_api_key = os.getenv("OPENAI_API_KEY")
111
+
112
+ if not openai_api_key:
113
+ result = {
114
+ "success": False,
115
+ "operation": "generate_and_store_image",
116
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
117
+ }
118
+ return ExecutionResult(
119
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
120
+ )
121
+
122
+ # Initialize OpenAI client
123
+ client = OpenAI(api_key=openai_api_key)
124
+
125
+ # Determine storage directory
126
+ # Use agent_cwd if available, otherwise fall back to Path.cwd()
127
+ base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
128
+
129
+ if storage_path:
130
+ if Path(storage_path).is_absolute():
131
+ storage_dir = Path(storage_path).resolve()
132
+ else:
133
+ storage_dir = (base_dir / storage_path).resolve()
134
+ else:
135
+ storage_dir = base_dir
136
+
137
+ # Validate storage directory is within allowed paths
138
+ _validate_path_access(storage_dir, allowed_paths_list)
139
+
140
+ # Create directory if it doesn't exist
141
+ storage_dir.mkdir(parents=True, exist_ok=True)
142
+
143
+ try:
144
+ # Generate image using OpenAI API with gpt-4.1 non-streaming format
145
+ response = client.responses.create(
146
+ model=model,
147
+ input=prompt,
148
+ tools=[{"type": "image_generation"}],
149
+ )
150
+
151
+ # Extract image data from response
152
+ image_data = [output.result for output in response.output if output.type == "image_generation_call"]
153
+
154
+ saved_images = []
155
+
156
+ if image_data:
157
+ # Generate filename with timestamp
158
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
159
+
160
+ # Clean prompt for filename
161
+ clean_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
162
+ clean_prompt = clean_prompt.replace(" ", "_")
163
+
164
+ for idx, image_base64 in enumerate(image_data):
165
+ # Decode base64 image data
166
+ image_bytes = base64.b64decode(image_base64)
167
+
168
+ # Add index if generating multiple images
169
+ if len(image_data) > 1:
170
+ filename = f"{timestamp}_{clean_prompt}_{idx+1}.png"
171
+ else:
172
+ filename = f"{timestamp}_{clean_prompt}.png"
173
+
174
+ # Full file path
175
+ file_path = storage_dir / filename
176
+
177
+ # Write image to file
178
+ file_path.write_bytes(image_bytes)
179
+ file_size = len(image_bytes)
180
+
181
+ saved_images.append(
182
+ {
183
+ "file_path": str(file_path),
184
+ "filename": filename,
185
+ "size": file_size,
186
+ "index": idx,
187
+ },
188
+ )
189
+
190
+ result = {
191
+ "success": True,
192
+ "operation": "generate_and_store_image_no_input_images",
193
+ "note": "New images are generated and saved to the specified path.",
194
+ "images": saved_images,
195
+ "model": model,
196
+ "prompt": prompt,
197
+ "total_images": len(saved_images),
198
+ }
199
+
200
+ return ExecutionResult(
201
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
202
+ )
203
+
204
+ except Exception as api_error:
205
+ result = {
206
+ "success": False,
207
+ "operation": "generate_and_store_image_no_input_images",
208
+ "error": f"OpenAI API error: {str(api_error)}",
209
+ }
210
+ return ExecutionResult(
211
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
212
+ )
213
+
214
+ except Exception as e:
215
+ result = {
216
+ "success": False,
217
+ "operation": "generate_and_store_image_no_input_images",
218
+ "error": f"Failed to generate or save image: {str(e)}",
219
+ }
220
+ return ExecutionResult(
221
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
222
+ )