massgen 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
- massgen/api_params_handler/_claude_api_params_handler.py +4 -0
- massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
- massgen/api_params_handler/_response_api_params_handler.py +4 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
- massgen/backend/docs/permissions_and_context_files.md +2 -2
- massgen/backend/response.py +2 -0
- massgen/chat_agent.py +340 -20
- massgen/cli.py +326 -19
- massgen/configs/README.md +92 -41
- massgen/configs/memory/gpt5mini_gemini_baseline_research_to_implementation.yaml +94 -0
- massgen/configs/memory/gpt5mini_gemini_context_window_management.yaml +187 -0
- massgen/configs/memory/gpt5mini_gemini_research_to_implementation.yaml +127 -0
- massgen/configs/memory/gpt5mini_high_reasoning_gemini.yaml +107 -0
- massgen/configs/memory/single_agent_compression_test.yaml +64 -0
- massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +1 -1
- massgen/filesystem_manager/_filesystem_manager.py +1 -0
- massgen/filesystem_manager/_path_permission_manager.py +148 -0
- massgen/memory/README.md +277 -0
- massgen/memory/__init__.py +26 -0
- massgen/memory/_base.py +193 -0
- massgen/memory/_compression.py +237 -0
- massgen/memory/_context_monitor.py +211 -0
- massgen/memory/_conversation.py +255 -0
- massgen/memory/_fact_extraction_prompts.py +333 -0
- massgen/memory/_mem0_adapters.py +257 -0
- massgen/memory/_persistent.py +687 -0
- massgen/memory/docker-compose.qdrant.yml +36 -0
- massgen/memory/docs/DESIGN.md +388 -0
- massgen/memory/docs/QUICKSTART.md +409 -0
- massgen/memory/docs/SUMMARY.md +319 -0
- massgen/memory/docs/agent_use_memory.md +408 -0
- massgen/memory/docs/orchestrator_use_memory.md +586 -0
- massgen/memory/examples.py +237 -0
- massgen/message_templates.py +160 -12
- massgen/orchestrator.py +223 -7
- massgen/tests/memory/test_agent_compression.py +174 -0
- massgen/{configs/tools → tests}/memory/test_context_window_management.py +30 -30
- massgen/tests/memory/test_force_compression.py +154 -0
- massgen/tests/memory/test_simple_compression.py +147 -0
- massgen/tests/test_agent_memory.py +534 -0
- massgen/tests/test_binary_file_blocking.py +274 -0
- massgen/tests/test_case_studies.md +12 -12
- massgen/tests/test_conversation_memory.py +382 -0
- massgen/tests/test_multimodal_size_limits.py +407 -0
- massgen/tests/test_orchestrator_memory.py +620 -0
- massgen/tests/test_persistent_memory.py +435 -0
- massgen/token_manager/token_manager.py +6 -0
- massgen/tool/_manager.py +7 -2
- massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
- massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
- massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
- massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
- massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
- massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
- massgen/tool/_multimodal_tools/understand_audio.py +19 -1
- massgen/tool/_multimodal_tools/understand_file.py +6 -1
- massgen/tool/_multimodal_tools/understand_image.py +112 -8
- massgen/tool/_multimodal_tools/understand_video.py +32 -5
- massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
- massgen/tool/docs/multimodal_tools.md +589 -0
- massgen/tools/__init__.py +8 -0
- massgen/tools/_planning_mcp_server.py +520 -0
- massgen/tools/planning_dataclasses.py +434 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/METADATA +142 -82
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/RECORD +84 -41
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +0 -67
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +0 -68
- massgen/configs/tools/memory/README.md +0 -199
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +0 -131
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +0 -133
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +0 -97
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/WHEEL +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Usage examples for MassGen memory system.
|
|
4
|
+
|
|
5
|
+
These examples demonstrate how to use conversation and persistent memory
|
|
6
|
+
in your MassGen agents.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def example_conversation_memory():
|
|
13
|
+
"""Example: Using ConversationMemory for short-term dialogue."""
|
|
14
|
+
from massgen.memory import ConversationMemory
|
|
15
|
+
|
|
16
|
+
print("=" * 60)
|
|
17
|
+
print("Example 1: Conversation Memory (Short-term)")
|
|
18
|
+
print("=" * 60)
|
|
19
|
+
|
|
20
|
+
memory = ConversationMemory()
|
|
21
|
+
|
|
22
|
+
# Simulate a conversation
|
|
23
|
+
conversation = [
|
|
24
|
+
{"role": "user", "content": "Hello! My name is Alice."},
|
|
25
|
+
{"role": "assistant", "content": "Hi Alice! How can I help you today?"},
|
|
26
|
+
{"role": "user", "content": "I'm interested in learning about Python."},
|
|
27
|
+
{
|
|
28
|
+
"role": "assistant",
|
|
29
|
+
"content": "Great! Python is a versatile programming language...",
|
|
30
|
+
},
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
# Add messages one by one
|
|
34
|
+
for msg in conversation:
|
|
35
|
+
await memory.add(msg)
|
|
36
|
+
print(f"Added: {msg['role']} - {msg['content'][:50]}...")
|
|
37
|
+
|
|
38
|
+
# Get all messages
|
|
39
|
+
await memory.get_messages()
|
|
40
|
+
print(f"\nTotal messages: {await memory.size()}")
|
|
41
|
+
|
|
42
|
+
# Get last message
|
|
43
|
+
last = await memory.get_last_message()
|
|
44
|
+
print(f"Last message role: {last['role']}")
|
|
45
|
+
|
|
46
|
+
# Filter by role
|
|
47
|
+
user_messages = await memory.get_messages_by_role("user")
|
|
48
|
+
print(f"User messages: {len(user_messages)}")
|
|
49
|
+
|
|
50
|
+
# Truncate to keep only recent messages
|
|
51
|
+
await memory.truncate_to_size(2)
|
|
52
|
+
print(f"After truncation: {await memory.size()} messages")
|
|
53
|
+
|
|
54
|
+
# Save and restore state
|
|
55
|
+
state = memory.state_dict()
|
|
56
|
+
print(f"\nState saved: {len(state['messages'])} messages")
|
|
57
|
+
|
|
58
|
+
# Create new memory from state
|
|
59
|
+
restored_memory = ConversationMemory()
|
|
60
|
+
restored_memory.load_state_dict(state)
|
|
61
|
+
print(f"State restored: {await restored_memory.size()} messages")
|
|
62
|
+
|
|
63
|
+
print("\n✅ Conversation memory example completed!\n")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def example_persistent_memory():
|
|
67
|
+
"""
|
|
68
|
+
Example: Using PersistentMemory for long-term storage.
|
|
69
|
+
|
|
70
|
+
Note: This requires mem0 to be installed and proper backends configured.
|
|
71
|
+
This is a conceptual example - adjust backends as needed.
|
|
72
|
+
"""
|
|
73
|
+
print("=" * 60)
|
|
74
|
+
print("Example 2: Persistent Memory (Long-term)")
|
|
75
|
+
print("=" * 60)
|
|
76
|
+
|
|
77
|
+
# NOTE: This is a conceptual example
|
|
78
|
+
# In practice, you need to provide actual MassGen backends
|
|
79
|
+
print("\n⚠️ This example requires actual LLM and embedding backends.")
|
|
80
|
+
print(" Uncomment and configure backends to run this example.\n")
|
|
81
|
+
|
|
82
|
+
# Conceptual usage:
|
|
83
|
+
"""
|
|
84
|
+
from massgen.memory import PersistentMemory
|
|
85
|
+
from massgen.backend import OpenAIBackend # Or your backend
|
|
86
|
+
|
|
87
|
+
# Initialize backends
|
|
88
|
+
llm_backend = OpenAIBackend(model="gpt-4")
|
|
89
|
+
embedding_backend = OpenAIBackend(model="text-embedding-3-small")
|
|
90
|
+
|
|
91
|
+
# Create persistent memory
|
|
92
|
+
memory = PersistentMemory(
|
|
93
|
+
agent_name="learning_assistant",
|
|
94
|
+
user_name="alice",
|
|
95
|
+
llm_backend=llm_backend,
|
|
96
|
+
embedding_backend=embedding_backend,
|
|
97
|
+
on_disk=True
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Record a conversation
|
|
101
|
+
await memory.record([
|
|
102
|
+
{"role": "user", "content": "I love Python programming"},
|
|
103
|
+
{"role": "assistant", "content": "That's great! Python is very versatile."}
|
|
104
|
+
])
|
|
105
|
+
print("✓ Recorded conversation to long-term memory")
|
|
106
|
+
|
|
107
|
+
# Retrieve relevant memories
|
|
108
|
+
query = "What programming languages does the user like?"
|
|
109
|
+
relevant = await memory.retrieve(query)
|
|
110
|
+
print(f"Retrieved: {relevant}")
|
|
111
|
+
|
|
112
|
+
# Agent-controlled memory saving
|
|
113
|
+
result = await memory.save_to_memory(
|
|
114
|
+
thinking="User expressed interest in a topic",
|
|
115
|
+
content=["User likes Python", "User is a beginner"]
|
|
116
|
+
)
|
|
117
|
+
print(f"Save result: {result}")
|
|
118
|
+
|
|
119
|
+
# Agent-controlled memory recall
|
|
120
|
+
result = await memory.recall_from_memory(
|
|
121
|
+
keywords=["programming", "Python"],
|
|
122
|
+
limit=3
|
|
123
|
+
)
|
|
124
|
+
print(f"Recalled {result['count']} memories")
|
|
125
|
+
for mem in result['memories']:
|
|
126
|
+
print(f" - {mem}")
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
print("✅ Persistent memory example completed!\n")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
async def example_combined_usage():
|
|
133
|
+
"""Example: Using both memory types together."""
|
|
134
|
+
print("=" * 60)
|
|
135
|
+
print("Example 3: Combined Memory Usage")
|
|
136
|
+
print("=" * 60)
|
|
137
|
+
|
|
138
|
+
from massgen.memory import ConversationMemory
|
|
139
|
+
|
|
140
|
+
# Short-term memory for active conversation
|
|
141
|
+
short_term = ConversationMemory()
|
|
142
|
+
|
|
143
|
+
# Simulate ongoing conversation
|
|
144
|
+
messages = [
|
|
145
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
146
|
+
{"role": "user", "content": "What's the weather like?"},
|
|
147
|
+
{"role": "assistant", "content": "I can help you check the weather!"},
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
for msg in messages:
|
|
151
|
+
await short_term.add(msg)
|
|
152
|
+
|
|
153
|
+
print(f"Short-term memory: {await short_term.size()} messages")
|
|
154
|
+
|
|
155
|
+
# In a real agent, you would:
|
|
156
|
+
# 1. Retrieve relevant long-term memories based on current message
|
|
157
|
+
# 2. Inject them into the conversation context
|
|
158
|
+
# 3. Generate response
|
|
159
|
+
# 4. Add response to short-term memory
|
|
160
|
+
# 5. Optionally save important parts to long-term memory
|
|
161
|
+
|
|
162
|
+
print("\n💡 In production, this would be integrated with:")
|
|
163
|
+
print(" - LLM backend for generating responses")
|
|
164
|
+
print(" - Persistent memory for cross-session knowledge")
|
|
165
|
+
print(" - Tool system for agent-controlled memory")
|
|
166
|
+
|
|
167
|
+
print("\n✅ Combined usage example completed!\n")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def example_memory_management():
|
|
171
|
+
"""Example: Memory management best practices."""
|
|
172
|
+
print("=" * 60)
|
|
173
|
+
print("Example 4: Memory Management")
|
|
174
|
+
print("=" * 60)
|
|
175
|
+
|
|
176
|
+
from massgen.memory import ConversationMemory
|
|
177
|
+
|
|
178
|
+
memory = ConversationMemory()
|
|
179
|
+
|
|
180
|
+
# Add many messages to simulate long conversation
|
|
181
|
+
for i in range(100):
|
|
182
|
+
await memory.add(
|
|
183
|
+
{
|
|
184
|
+
"role": "user" if i % 2 == 0 else "assistant",
|
|
185
|
+
"content": f"Message {i}",
|
|
186
|
+
},
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
print(f"Added {await memory.size()} messages")
|
|
190
|
+
|
|
191
|
+
# Best practice 1: Regular truncation
|
|
192
|
+
await memory.truncate_to_size(50)
|
|
193
|
+
print(f"After truncation: {await memory.size()} messages")
|
|
194
|
+
|
|
195
|
+
# Best practice 2: Get only recent messages
|
|
196
|
+
recent = await memory.get_messages(limit=10)
|
|
197
|
+
print(f"Retrieved last {len(recent)} messages")
|
|
198
|
+
|
|
199
|
+
# Best practice 3: Periodic cleanup
|
|
200
|
+
user_msgs = await memory.get_messages_by_role("user")
|
|
201
|
+
print(f"User sent {len(user_msgs)} messages")
|
|
202
|
+
|
|
203
|
+
# Best practice 4: Clear when starting new topic
|
|
204
|
+
await memory.clear()
|
|
205
|
+
print(f"After clearing: {await memory.size()} messages")
|
|
206
|
+
|
|
207
|
+
# Best practice 5: State persistence for crash recovery
|
|
208
|
+
await memory.add({"role": "user", "content": "Important message"})
|
|
209
|
+
state = memory.state_dict()
|
|
210
|
+
print(f"State saved with {len(state['messages'])} messages")
|
|
211
|
+
|
|
212
|
+
print("\n💾 Save this state to disk for persistence across restarts!")
|
|
213
|
+
print("\n✅ Memory management example completed!\n")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def main():
|
|
217
|
+
"""Run all examples."""
|
|
218
|
+
print("\n🚀 MassGen Memory System Examples\n")
|
|
219
|
+
|
|
220
|
+
await example_conversation_memory()
|
|
221
|
+
await example_persistent_memory()
|
|
222
|
+
await example_combined_usage()
|
|
223
|
+
await example_memory_management()
|
|
224
|
+
|
|
225
|
+
print("=" * 60)
|
|
226
|
+
print("All examples completed! 🎉")
|
|
227
|
+
print("=" * 60)
|
|
228
|
+
print("\nNext steps:")
|
|
229
|
+
print("1. Install mem0: pip install mem0ai")
|
|
230
|
+
print("2. Configure your LLM and embedding backends")
|
|
231
|
+
print("3. Try persistent memory with real backends")
|
|
232
|
+
print("4. Integrate into your MassGen agents")
|
|
233
|
+
print("\n")
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
if __name__ == "__main__":
|
|
237
|
+
asyncio.run(main())
|
massgen/message_templates.py
CHANGED
|
@@ -302,6 +302,8 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
|
|
|
302
302
|
original_system_message: Optional[str] = None,
|
|
303
303
|
enable_image_generation: bool = False,
|
|
304
304
|
enable_audio_generation: bool = False,
|
|
305
|
+
enable_file_generation: bool = False,
|
|
306
|
+
enable_video_generation: bool = False,
|
|
305
307
|
has_irreversible_actions: bool = False,
|
|
306
308
|
enable_command_execution: bool = False,
|
|
307
309
|
) -> str:
|
|
@@ -311,6 +313,8 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
|
|
|
311
313
|
original_system_message: The agent's original system message to preserve
|
|
312
314
|
enable_image_generation: Whether image generation is enabled
|
|
313
315
|
enable_audio_generation: Whether audio generation is enabled
|
|
316
|
+
enable_file_generation: Whether file generation is enabled
|
|
317
|
+
enable_video_generation: Whether video generation is enabled
|
|
314
318
|
has_irreversible_actions: Whether agent has write access to context paths (requires actual file delivery)
|
|
315
319
|
enable_command_execution: Whether command execution is enabled for this agent
|
|
316
320
|
"""
|
|
@@ -335,21 +339,165 @@ Present the best possible coordinated answer by combining the strengths from all
|
|
|
335
339
|
# Add image generation instructions only if enabled
|
|
336
340
|
if enable_image_generation:
|
|
337
341
|
presentation_instructions += """For image generation tasks:
|
|
338
|
-
|
|
339
|
-
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
342
|
+
|
|
343
|
+
**MANDATORY WORKFLOW - You MUST follow these steps in order:**
|
|
344
|
+
|
|
345
|
+
Step 1: **Check for existing images (REQUIRED)**
|
|
346
|
+
- First, list all files in the Shared Reference directory (temp_workspaces) to find ALL images from EVERY agent
|
|
347
|
+
- Look for image files (.png, .jpg, .jpeg, .gif, .webp, etc.) in each agent's workspace subdirectory
|
|
348
|
+
|
|
349
|
+
Step 2: **Understand ALL existing images (REQUIRED if images exist)**
|
|
350
|
+
- For EACH image file you found, you MUST call the **understand_image** tool to extract its key visual elements, composition, style, and quality
|
|
351
|
+
- Do this for images from yourself AND from other agents - analyze ALL images found
|
|
352
|
+
- DO NOT skip this step even if you think you know the content
|
|
353
|
+
|
|
354
|
+
Step 3: **Synthesize and generate final image (REQUIRED)**
|
|
355
|
+
- If existing images were found and analyzed:
|
|
356
|
+
* Synthesize ALL image analyses into a single, detailed, combined prompt
|
|
357
|
+
* The combined prompt should capture the best visual elements, composition, style, and quality from all analyzed images
|
|
358
|
+
* Call **image_to_image_generation** with this synthesized prompt and ALL images to create the final unified image
|
|
359
|
+
- If NO existing images were found:
|
|
360
|
+
* Generate a new image based directly on the original task requirements
|
|
361
|
+
* Call **text_to_image_generation** with a prompt derived from the original task
|
|
362
|
+
|
|
363
|
+
Step 4: **Save and report (REQUIRED)**
|
|
364
|
+
- Save the final generated image in your workspace
|
|
365
|
+
- Report the saved path in your final answer
|
|
366
|
+
|
|
367
|
+
**CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing images. Do not skip calling
|
|
368
|
+
understand_image on found images. This is a mandatory synthesis workflow.
|
|
369
|
+
"""
|
|
370
|
+
# presentation_instructions += """For image generation tasks:
|
|
371
|
+
# - Extract image paths from the existing answer and resolve them in the shared reference.
|
|
372
|
+
# - Gather all agent-produced images (ignore non-existent files).
|
|
373
|
+
# - IMPORTANT: If you find ANY existing images (from yourself or other agents), you MUST call the understand_image tool
|
|
374
|
+
# to analyze EACH image and extract their key visual elements, composition, style, and quality.
|
|
375
|
+
# - IMPORTANT: Synthesize insights from all analyzed images into a detailed, combined prompt that captures the best elements.
|
|
376
|
+
# - IMPORTANT: Call text_to_image_generation with this synthesized prompt to generate the final image.
|
|
377
|
+
# - IMPORTANT: Save the final output in your workspace and output the saved path.
|
|
378
|
+
# - If no existing images are found, generate based on the original task requirements.
|
|
379
|
+
# """
|
|
343
380
|
# Add audio generation instructions only if enabled
|
|
344
381
|
if enable_audio_generation:
|
|
345
382
|
presentation_instructions += """For audio generation tasks:
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
-
|
|
351
|
-
-
|
|
352
|
-
|
|
383
|
+
|
|
384
|
+
**MANDATORY WORKFLOW - You MUST follow these steps in order:**
|
|
385
|
+
|
|
386
|
+
Step 1: **Check for existing audios (REQUIRED)**
|
|
387
|
+
- First, list all files in the Shared Reference directory (temp_workspaces) to find ALL audio files from EVERY agent
|
|
388
|
+
- Look for audio files (.mp3, .wav, .flac, etc.) in each agent's workspace subdirectory
|
|
389
|
+
|
|
390
|
+
Step 2: **Understand ALL existing audios (REQUIRED if audios exist)**
|
|
391
|
+
- For EACH audio file you found, you MUST call the **understand_audio** tool to extract its transcription
|
|
392
|
+
- Do this for audios from yourself AND from other agents - analyze ALL audios found
|
|
393
|
+
- DO NOT skip this step even if you think you know the content
|
|
394
|
+
|
|
395
|
+
Step 3: **Synthesize and generate final audio (REQUIRED)**
|
|
396
|
+
- If existing audios were found and analyzed:
|
|
397
|
+
* Synthesize ALL audio transcriptions into a single, detailed, combined transcription
|
|
398
|
+
* The combined transcription should capture the best content from all analyzed audios
|
|
399
|
+
* Call **text_to_speech_transcription_generation** with this synthesized transcription to create the final unified audio
|
|
400
|
+
- If NO existing audios were found:
|
|
401
|
+
* Generate a new audio based directly on the original task requirements
|
|
402
|
+
* Call **text_to_speech_transcription_generation** with a transcription derived from the original task
|
|
403
|
+
|
|
404
|
+
Step 4: **Save and report (REQUIRED)**
|
|
405
|
+
- Save the final generated audio in your workspace
|
|
406
|
+
- Report the saved path in your final answer
|
|
407
|
+
|
|
408
|
+
**CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing audios. Do not skip calling
|
|
409
|
+
understand_audio on found audios. This is a mandatory synthesis workflow.
|
|
410
|
+
"""
|
|
411
|
+
# presentation_instructions += """For audio generation tasks:
|
|
412
|
+
# - Extract audio paths from the existing answer and resolve them in the shared reference.
|
|
413
|
+
# - Gather ALL audio files produced by EVERY agent (ignore non-existent files).
|
|
414
|
+
# - IMPORTANT: If you find ANY existing audios (from yourself or other agents), you MUST call the **understand_audio** tool to extract each audio's transcription.
|
|
415
|
+
# - IMPORTANT: Synthesize transcriptions from all audios into a detailed, combined transcription.
|
|
416
|
+
# - IMPORTANT: You MUST call the **text_to_speech_transcription_generation** tool with this synthesized transcription to generate the final audio.
|
|
417
|
+
# - IMPORTANT: Save the final output in your workspace and output the saved path.
|
|
418
|
+
# - If no existing audios are found, generate based on the original task requirements.
|
|
419
|
+
# """
|
|
420
|
+
# Add file generation instructions only if enabled
|
|
421
|
+
if enable_file_generation:
|
|
422
|
+
presentation_instructions += """For file generation tasks:
|
|
423
|
+
|
|
424
|
+
**MANDATORY WORKFLOW - You MUST follow these steps in order:**
|
|
425
|
+
|
|
426
|
+
Step 1: **Check for existing files (REQUIRED)**
|
|
427
|
+
- First, list all files in the Shared Reference directory (temp_workspaces) to find ALL files from EVERY agent
|
|
428
|
+
- Look for files of the requested type in each agent's workspace subdirectory
|
|
429
|
+
|
|
430
|
+
Step 2: **Understand ALL existing files (REQUIRED if files exist)**
|
|
431
|
+
- For EACH file you found, you MUST call the **understand_file** tool to extract its content, structure, and key elements
|
|
432
|
+
- Do this for files from yourself AND from other agents - analyze ALL files found
|
|
433
|
+
- DO NOT skip this step even if you think you know the content
|
|
434
|
+
|
|
435
|
+
Step 3: **Synthesize and generate final file (REQUIRED)**
|
|
436
|
+
- If existing files were found and analyzed:
|
|
437
|
+
* Synthesize ALL file contents into a single, detailed, combined content
|
|
438
|
+
* The combined content should capture the best elements, structure, and information from all analyzed files
|
|
439
|
+
* Call **text_to_file_generation** with this synthesized content to generate the final unified file
|
|
440
|
+
- If NO existing files were found:
|
|
441
|
+
* Generate a new file based directly on the original task requirements
|
|
442
|
+
* Call **text_to_file_generation** with content derived from the original task
|
|
443
|
+
|
|
444
|
+
Step 4: **Save and report (REQUIRED)**
|
|
445
|
+
- Save the final generated file in your workspace
|
|
446
|
+
- Report the saved path in your final answer
|
|
447
|
+
|
|
448
|
+
**CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing files. Do not skip calling
|
|
449
|
+
understand_file on found files. This is a mandatory synthesis workflow.
|
|
450
|
+
"""
|
|
451
|
+
# presentation_instructions += """For file generation tasks:
|
|
452
|
+
# - Extract file paths from the existing answer and resolve them in the shared reference.
|
|
453
|
+
# - Gather ALL files produced by EVERY agent (ignore non-existent files).
|
|
454
|
+
# - IMPORTANT: If you find ANY existing files (from yourself or other agents), you MUST call the **understand_file** tool to extract each file's content.
|
|
455
|
+
# - IMPORTANT: Synthesize contents from all files into a detailed, combined content.
|
|
456
|
+
# - IMPORTANT: You MUST call the **text_to_file_generation** tool with this synthesized content to generate the final file.
|
|
457
|
+
# - IMPORTANT: Save the final output in your workspace and output the saved path.
|
|
458
|
+
# - If no existing files are found, generate based on the original task requirements.
|
|
459
|
+
# """
|
|
460
|
+
# Add video generation instructions only if enabled
|
|
461
|
+
if enable_video_generation:
|
|
462
|
+
presentation_instructions += """For video generation tasks:
|
|
463
|
+
|
|
464
|
+
**MANDATORY WORKFLOW - You MUST follow these steps in order:**
|
|
465
|
+
|
|
466
|
+
Step 1: **Check for existing videos (REQUIRED)**
|
|
467
|
+
- First, list all files in the Shared Reference directory (temp_workspaces) to find ALL videos from EVERY agent
|
|
468
|
+
- Look for video files (.mp4, .avi, .mov, etc.) in each agent's workspace subdirectory
|
|
469
|
+
|
|
470
|
+
Step 2: **Understand ALL existing videos (REQUIRED if videos exist)**
|
|
471
|
+
- For EACH video file you found, you MUST call the **understand_video** tool to extract its description, visual features, and
|
|
472
|
+
key elements
|
|
473
|
+
- Do this for videos from yourself AND from other agents - analyze ALL videos found
|
|
474
|
+
- DO NOT skip this step even if you think you know the content
|
|
475
|
+
|
|
476
|
+
Step 3: **Synthesize and generate final video (REQUIRED)**
|
|
477
|
+
- If existing videos were found and analyzed:
|
|
478
|
+
* Synthesize ALL video descriptions into a single, detailed, combined prompt
|
|
479
|
+
* The combined prompt should capture the best visual elements, composition, motion, and style from all analyzed videos
|
|
480
|
+
* Call **text_to_video_generation** with this synthesized prompt to create the final unified video
|
|
481
|
+
- If NO existing videos were found:
|
|
482
|
+
* Generate a new video based directly on the original task requirements
|
|
483
|
+
* Call **text_to_video_generation** with a prompt derived from the original task
|
|
484
|
+
|
|
485
|
+
Step 4: **Save and report (REQUIRED)**
|
|
486
|
+
- Save the final generated video in your workspace
|
|
487
|
+
- Report the saved path in your final answer
|
|
488
|
+
|
|
489
|
+
**CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing videos. Do not skip calling
|
|
490
|
+
understand_video on found videos. This is a mandatory synthesis workflow.
|
|
491
|
+
"""
|
|
492
|
+
# presentation_instructions += """For video generation tasks:
|
|
493
|
+
# - Extract video paths from the existing answer and resolve them in the shared reference.
|
|
494
|
+
# - Gather ALL videos produced by EVERY agent (ignore non-existent files).
|
|
495
|
+
# - IMPORTANT: If you find ANY existing videos (from yourself or other agents), you MUST call the **understand_video** tool to extract each video's description and key features.
|
|
496
|
+
# - IMPORTANT: Synthesize descriptions from all videos into a detailed, combined prompt capturing the best elements.
|
|
497
|
+
# - IMPORTANT: You MUST call the **text_to_video_generation** tool with this synthesized prompt to generate the final video.
|
|
498
|
+
# - IMPORTANT: Save the final output in your workspace and output the saved path.
|
|
499
|
+
# - If no existing videos are found, generate based on the original task requirements.
|
|
500
|
+
# """
|
|
353
501
|
|
|
354
502
|
# Add irreversible actions reminder if needed
|
|
355
503
|
# TODO: Integrate more general irreversible actions handling in future (i.e., not just for context file delivery)
|