@aj-archipelago/cortex 1.3.65 → 1.3.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-autogen2/Dockerfile +88 -21
- package/helper-apps/cortex-autogen2/docker-compose.yml +15 -8
- package/helper-apps/cortex-autogen2/host.json +5 -0
- package/helper-apps/cortex-autogen2/pyproject.toml +82 -25
- package/helper-apps/cortex-autogen2/requirements.txt +84 -14
- package/helper-apps/cortex-autogen2/services/redis_publisher.py +129 -3
- package/helper-apps/cortex-autogen2/task_processor.py +432 -116
- package/helper-apps/cortex-autogen2/tools/__init__.py +2 -0
- package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +32 -0
- package/helper-apps/cortex-autogen2/tools/azure_foundry_agents.py +50 -14
- package/helper-apps/cortex-autogen2/tools/file_tools.py +169 -44
- package/helper-apps/cortex-autogen2/tools/google_cse.py +117 -0
- package/helper-apps/cortex-autogen2/tools/search_tools.py +655 -98
- package/lib/entityConstants.js +1 -1
- package/lib/pathwayManager.js +42 -8
- package/lib/pathwayTools.js +3 -3
- package/lib/util.js +58 -2
- package/package.json +1 -1
- package/pathways/system/entity/memory/sys_memory_format.js +1 -0
- package/pathways/system/entity/memory/sys_memory_manager.js +3 -3
- package/pathways/system/entity/sys_entity_start.js +1 -1
- package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +2 -0
- package/pathways/system/entity/tools/sys_tool_codingagent.js +2 -2
- package/pathways/system/entity/tools/sys_tool_google_search.js +3 -3
- package/pathways/system/entity/tools/sys_tool_grok_x_search.js +12 -2
- package/pathways/system/workspaces/run_workspace_prompt.js +0 -3
- package/server/executeWorkspace.js +381 -0
- package/server/graphql.js +5 -180
- package/server/pathwayResolver.js +3 -3
- package/server/plugins/apptekTranslatePlugin.js +2 -2
- package/server/plugins/azureFoundryAgentsPlugin.js +1 -1
- package/tests/unit/core/parser.test.js +0 -1
- package/tests/unit/core/pathwayManagerWithFiles.test.js +256 -0
- package/tests/unit/graphql_executeWorkspace_transformation.test.js +244 -0
- package/tests/unit/server/graphql.test.js +122 -1
|
@@ -28,6 +28,10 @@ class TaskProcessor:
|
|
|
28
28
|
self.gpt41_model_client = None
|
|
29
29
|
self.progress_tracker = None
|
|
30
30
|
self.final_progress_sent = False
|
|
31
|
+
# Background progress worker components
|
|
32
|
+
self._progress_queue: Optional[asyncio.Queue] = None
|
|
33
|
+
self._progress_worker_task: Optional[asyncio.Task] = None
|
|
34
|
+
self._last_summary_by_request: Dict[str, str] = {}
|
|
31
35
|
|
|
32
36
|
async def initialize(self):
|
|
33
37
|
"""Initialize model clients and services."""
|
|
@@ -39,9 +43,11 @@ class TaskProcessor:
|
|
|
39
43
|
CORTEX_API_BASE_URL = os.getenv("CORTEX_API_BASE_URL", "http://host.docker.internal:4000/v1")
|
|
40
44
|
|
|
41
45
|
# Define ModelInfo for custom models
|
|
42
|
-
o3_model_info = ModelInfo(model="o3", name="Cortex o3", max_tokens=
|
|
43
|
-
o4_mini_model_info = ModelInfo(model="o4-mini", name="Cortex o4-mini", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
|
|
44
|
-
gpt41_model_info = ModelInfo(model="gpt-4.1", name="Cortex gpt-4.1", max_tokens=8192, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
|
|
46
|
+
o3_model_info = ModelInfo(model="o3", name="Cortex o3", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
|
|
47
|
+
o4_mini_model_info = ModelInfo(model="o4-mini", name="Cortex o4-mini", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
|
|
48
|
+
gpt41_model_info = ModelInfo(model="gpt-4.1", name="Cortex gpt-4.1", max_tokens=8192, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
|
|
49
|
+
gpt5_model_info = ModelInfo(model="gpt-5", name="Cortex gpt-5", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
|
|
50
|
+
claude_4_sonnet_model_info = ModelInfo(model="claude-4-sonnet", name="Cortex claude-4-sonnet", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
|
|
45
51
|
|
|
46
52
|
self.o3_model_client = OpenAIChatCompletionClient(
|
|
47
53
|
model="o3",
|
|
@@ -66,12 +72,78 @@ class TaskProcessor:
|
|
|
66
72
|
timeout=600,
|
|
67
73
|
model_info=gpt41_model_info # Pass model_info
|
|
68
74
|
)
|
|
75
|
+
|
|
76
|
+
self.gpt5_model_client = OpenAIChatCompletionClient(
|
|
77
|
+
model="gpt-5",
|
|
78
|
+
api_key=CORTEX_API_KEY,
|
|
79
|
+
base_url=CORTEX_API_BASE_URL,
|
|
80
|
+
timeout=600,
|
|
81
|
+
model_info=gpt5_model_info # Pass model_info
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
self.claude_4_sonnet_model_client = OpenAIChatCompletionClient(
|
|
85
|
+
model="claude-4-sonnet",
|
|
86
|
+
api_key=CORTEX_API_KEY,
|
|
87
|
+
base_url=CORTEX_API_BASE_URL,
|
|
88
|
+
timeout=600,
|
|
89
|
+
model_info=claude_4_sonnet_model_info # Pass model_info
|
|
90
|
+
)
|
|
69
91
|
|
|
70
92
|
self.progress_tracker = await get_redis_publisher()
|
|
93
|
+
# Ensure background progress worker is running
|
|
94
|
+
await self._ensure_progress_worker()
|
|
95
|
+
|
|
96
|
+
async def _ensure_progress_worker(self) -> None:
|
|
97
|
+
"""Start a single background worker to process progress updates asynchronously."""
|
|
98
|
+
try:
|
|
99
|
+
if self._progress_queue is None:
|
|
100
|
+
# Bounded queue to avoid memory growth; newest updates replace when full
|
|
101
|
+
self._progress_queue = asyncio.Queue(maxsize=256)
|
|
102
|
+
if self._progress_worker_task is None or self._progress_worker_task.done():
|
|
103
|
+
self._progress_worker_task = asyncio.create_task(self._progress_worker_loop())
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.warning(f"Failed to start progress worker: {e}")
|
|
106
|
+
|
|
107
|
+
async def _progress_worker_loop(self) -> None:
|
|
108
|
+
"""Continuously consume progress events, summarize, de-duplicate, and publish transient updates."""
|
|
109
|
+
try:
|
|
110
|
+
while True:
|
|
111
|
+
try:
|
|
112
|
+
event = await self._progress_queue.get()
|
|
113
|
+
if not event:
|
|
114
|
+
self._progress_queue.task_done()
|
|
115
|
+
continue
|
|
116
|
+
req_id = event.get("task_id")
|
|
117
|
+
pct = float(event.get("percentage") or 0.0)
|
|
118
|
+
content = event.get("content")
|
|
119
|
+
msg_type = event.get("message_type")
|
|
120
|
+
source = event.get("source")
|
|
121
|
+
# Summarize in background
|
|
122
|
+
summary = await self.summarize_progress(content, msg_type, source)
|
|
123
|
+
if summary:
|
|
124
|
+
last = self._last_summary_by_request.get(req_id)
|
|
125
|
+
if last != summary:
|
|
126
|
+
self._last_summary_by_request[req_id] = summary
|
|
127
|
+
try:
|
|
128
|
+
await self.progress_tracker.set_transient_update(req_id, pct, summary)
|
|
129
|
+
except Exception as pub_err:
|
|
130
|
+
logger.debug(f"Progress transient publish error for {req_id}: {pub_err}")
|
|
131
|
+
self._progress_queue.task_done()
|
|
132
|
+
except asyncio.CancelledError:
|
|
133
|
+
raise
|
|
134
|
+
except Exception as loop_err:
|
|
135
|
+
logger.debug(f"Progress worker loop error: {loop_err}")
|
|
136
|
+
except asyncio.CancelledError:
|
|
137
|
+
logger.info("Progress worker task cancelled")
|
|
138
|
+
except Exception as e:
|
|
139
|
+
logger.warning(f"Progress worker terminated unexpectedly: {e}")
|
|
71
140
|
|
|
72
141
|
async def summarize_progress(self, content: str, message_type: str = None, source: str = None) -> str:
|
|
73
142
|
"""Summarize progress content for display with intelligent filtering."""
|
|
74
143
|
try:
|
|
144
|
+
# Skip internal selector or housekeeping messages entirely
|
|
145
|
+
if self._is_internal_selector_message(content):
|
|
146
|
+
return None
|
|
75
147
|
# Filter out technical/internal messages that shouldn't be shown to users
|
|
76
148
|
if self._should_skip_progress_update(content, message_type, source):
|
|
77
149
|
return None
|
|
@@ -81,37 +153,51 @@ class TaskProcessor:
|
|
|
81
153
|
if not cleaned_content:
|
|
82
154
|
return None
|
|
83
155
|
|
|
84
|
-
prompt = f"""
|
|
156
|
+
prompt = f"""Transform this agent activity into a delightful, crystal-clear progress update (8-15 words) that makes non-technical users feel excited about what's happening. Start with a perfect emoji.
|
|
85
157
|
|
|
86
|
-
Context: This
|
|
158
|
+
Context: This appears in a live progress indicator for end users who aren't coders.
|
|
87
159
|
|
|
88
160
|
Current Activity: {cleaned_content}
|
|
89
|
-
Agent
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
-
|
|
103
|
-
-
|
|
104
|
-
-
|
|
105
|
-
-
|
|
106
|
-
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
- "
|
|
111
|
-
- "
|
|
112
|
-
- "
|
|
113
|
-
|
|
114
|
-
|
|
161
|
+
Agent Role: {source if source else "Unknown"}
|
|
162
|
+
|
|
163
|
+
🎨 Emoji Guide (pick the most fitting):
|
|
164
|
+
Planning/Thinking: 🧭 🗺️ 💡 🎯 🤔
|
|
165
|
+
Research/Search: 🔎 🔍 🌐 📚 🕵️
|
|
166
|
+
Data/Analysis: 📊 📈 📉 🧮 💹
|
|
167
|
+
Writing/Creating: ✍️ 📝 🖊️ ✨ 🎨
|
|
168
|
+
Images/Media: 🖼️ 📸 🎬 🌈 🖌️
|
|
169
|
+
Code/Technical: 💻 ⚙️ 🛠️ 🔧 ⚡
|
|
170
|
+
Files/Upload: 📁 ☁️ 📤 💾 🗂️
|
|
171
|
+
Success/Done: ✅ 🎉 🏆 🎊 ⭐
|
|
172
|
+
|
|
173
|
+
✨ Writing Style:
|
|
174
|
+
- ENGAGING: Use vivid, active verbs that paint a picture (discovering, crafting, weaving, building, hunting)
|
|
175
|
+
- HUMAN: Conversational and warm, like a helpful colleague updating you
|
|
176
|
+
- CLEAR: Zero jargon, no technical terms, no agent/tool names
|
|
177
|
+
- SPECIFIC: Say what's actually being created/found (not just "processing data")
|
|
178
|
+
- UPBEAT: Positive energy, but not over-the-top
|
|
179
|
+
- SHORT: 8-15 words max - every word must earn its place
|
|
180
|
+
|
|
181
|
+
🌟 Great Examples (follow these patterns):
|
|
182
|
+
- "🔍 Hunting down the perfect images for your presentation"
|
|
183
|
+
- "📊 Crunching numbers to reveal hidden trends"
|
|
184
|
+
- "✨ Weaving everything together into a polished report"
|
|
185
|
+
- "🎨 Designing eye-catching charts that tell the story"
|
|
186
|
+
- "📚 Diving deep into research to find golden insights"
|
|
187
|
+
- "🖼️ Gathering stunning visuals to bring ideas to life"
|
|
188
|
+
- "💡 Mapping out the smartest approach to tackle this"
|
|
189
|
+
- "☁️ Packaging everything up for easy download"
|
|
190
|
+
- "🔎 Exploring databases to uncover the answers"
|
|
191
|
+
- "✍️ Crafting a compelling narrative from the data"
|
|
192
|
+
|
|
193
|
+
❌ Avoid These (too boring/technical):
|
|
194
|
+
- "Processing data" (vague)
|
|
195
|
+
- "Executing SQL query" (jargon)
|
|
196
|
+
- "Running code" (technical)
|
|
197
|
+
- "Your report is ready" (premature/addressing user)
|
|
198
|
+
- "Task terminated" (robotic)
|
|
199
|
+
|
|
200
|
+
Return ONLY the update line with emoji - nothing else:"""
|
|
115
201
|
|
|
116
202
|
messages = [UserMessage(content=str(prompt), source="summarize_progress_function")]
|
|
117
203
|
|
|
@@ -128,6 +214,10 @@ Generate only the progress update:"""
|
|
|
128
214
|
|
|
129
215
|
content_str = str(content).strip().upper()
|
|
130
216
|
|
|
217
|
+
# Skip internal selector prompts or bare role names
|
|
218
|
+
if self._is_internal_selector_message(content):
|
|
219
|
+
return True
|
|
220
|
+
|
|
131
221
|
# Skip termination messages
|
|
132
222
|
if content_str == "TERMINATE" or "TERMINATE" in content_str:
|
|
133
223
|
return True
|
|
@@ -183,13 +273,66 @@ Generate only the progress update:"""
|
|
|
183
273
|
|
|
184
274
|
return cleaned
|
|
185
275
|
|
|
276
|
+
def _is_internal_selector_message(self, content: str) -> bool:
|
|
277
|
+
"""Detect AutoGen selector prompts and bare role selections to avoid surfacing them."""
|
|
278
|
+
if not content:
|
|
279
|
+
return False
|
|
280
|
+
text = str(content).strip()
|
|
281
|
+
selector_markers = [
|
|
282
|
+
"You are in a role play game.",
|
|
283
|
+
"select the next role",
|
|
284
|
+
"Only return the role.",
|
|
285
|
+
]
|
|
286
|
+
for marker in selector_markers:
|
|
287
|
+
if marker.lower() in text.lower():
|
|
288
|
+
return True
|
|
289
|
+
|
|
290
|
+
role_names = {
|
|
291
|
+
"planner_agent", "coder_agent", "code_executor", "terminator_agent",
|
|
292
|
+
"presenter_agent", "file_cloud_uploader_agent", "aj_sql_agent",
|
|
293
|
+
"aj_article_writer_agent", "cognitive_search_agent", "web_search_agent"
|
|
294
|
+
}
|
|
295
|
+
# If the entire content is just a role name, treat as internal
|
|
296
|
+
if text in role_names:
|
|
297
|
+
return True
|
|
298
|
+
|
|
299
|
+
# Treat provider schema errors about tool_calls/MultiMessage as internal noise
|
|
300
|
+
try:
|
|
301
|
+
lowered = text.lower()
|
|
302
|
+
if ("tool_calls" in lowered) and ("multimessage" in lowered) and ("field" in lowered or "variable" in lowered):
|
|
303
|
+
return True
|
|
304
|
+
except Exception:
|
|
305
|
+
pass
|
|
306
|
+
return False
|
|
307
|
+
|
|
186
308
|
async def handle_progress_update(self, task_id: str, percentage: float, content: str, message_type: str = None, source: str = None):
|
|
187
|
-
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
309
|
+
"""Enqueue progress updates for the background worker to process (non-blocking)."""
|
|
310
|
+
try:
|
|
311
|
+
if self._progress_queue is None:
|
|
312
|
+
await self._ensure_progress_worker()
|
|
313
|
+
event = {
|
|
314
|
+
"task_id": task_id,
|
|
315
|
+
"percentage": percentage,
|
|
316
|
+
"content": content,
|
|
317
|
+
"message_type": message_type,
|
|
318
|
+
"source": source,
|
|
319
|
+
}
|
|
320
|
+
# Prefer non-blocking put; if full, drop the oldest and retry once
|
|
321
|
+
try:
|
|
322
|
+
self._progress_queue.put_nowait(event)
|
|
323
|
+
except asyncio.QueueFull:
|
|
324
|
+
try:
|
|
325
|
+
# Drop one item to make room
|
|
326
|
+
_ = self._progress_queue.get_nowait()
|
|
327
|
+
self._progress_queue.task_done()
|
|
328
|
+
except Exception:
|
|
329
|
+
pass
|
|
330
|
+
try:
|
|
331
|
+
self._progress_queue.put_nowait(event)
|
|
332
|
+
except Exception:
|
|
333
|
+
pass
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.debug(f"handle_progress_update enqueue error: {e}")
|
|
193
336
|
|
|
194
337
|
async def publish_final(self, task_id: str, message: str, data: Any = None) -> None:
|
|
195
338
|
"""Publish a final 1.0 progress message once."""
|
|
@@ -209,26 +352,49 @@ Generate only the progress update:"""
|
|
|
209
352
|
task_completed_percentage = 0.05
|
|
210
353
|
task = task_content
|
|
211
354
|
|
|
212
|
-
#
|
|
213
|
-
|
|
355
|
+
# Per-request working directory: isolate artifacts under /tmp/coding/<task_id>
|
|
356
|
+
try:
|
|
357
|
+
base_wd = os.getenv("CORTEX_WORK_DIR", "/tmp/coding")
|
|
358
|
+
# In Azure Functions, force /tmp for write access
|
|
359
|
+
if os.getenv("WEBSITE_INSTANCE_ID") and base_wd.startswith("/app/"):
|
|
360
|
+
base_wd = "/tmp/coding"
|
|
361
|
+
import time
|
|
362
|
+
req_dir_name = f"req_{task_id}" if task_id else f"req_{int(time.time())}"
|
|
363
|
+
request_work_dir = os.path.join(base_wd, req_dir_name)
|
|
364
|
+
os.makedirs(request_work_dir, exist_ok=True)
|
|
365
|
+
os.environ["CORTEX_WORK_DIR"] = request_work_dir
|
|
366
|
+
# pass to get_agents so all tools use this dir
|
|
367
|
+
request_work_dir_for_agents = request_work_dir
|
|
368
|
+
except Exception:
|
|
369
|
+
# Fallback to base directory if per-request directory cannot be created
|
|
370
|
+
try:
|
|
371
|
+
os.makedirs(os.getenv("CORTEX_WORK_DIR", "/tmp/coding"), exist_ok=True)
|
|
372
|
+
except Exception:
|
|
373
|
+
pass
|
|
374
|
+
|
|
375
|
+
# Send initial progress update (transient only)
|
|
376
|
+
await self.progress_tracker.set_transient_update(task_id, 0.05, "🚀 Starting your task...")
|
|
214
377
|
|
|
215
378
|
termination = HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
|
|
216
379
|
|
|
217
|
-
agents, presenter_agent = await get_agents(
|
|
380
|
+
agents, presenter_agent, terminator_agent = await get_agents(
|
|
218
381
|
self.gpt41_model_client,
|
|
219
382
|
self.o3_model_client,
|
|
220
|
-
self.gpt41_model_client
|
|
383
|
+
self.gpt41_model_client,
|
|
384
|
+
request_work_dir=request_work_dir_for_agents if 'request_work_dir_for_agents' in locals() else None
|
|
221
385
|
)
|
|
222
386
|
|
|
223
387
|
team = SelectorGroupChat(
|
|
224
388
|
participants=agents,
|
|
225
389
|
model_client=self.gpt41_model_client,
|
|
226
390
|
termination_condition=termination,
|
|
227
|
-
max_turns=
|
|
391
|
+
max_turns=200
|
|
228
392
|
)
|
|
229
393
|
|
|
230
394
|
messages = []
|
|
231
395
|
uploaded_file_urls = {}
|
|
396
|
+
uploaded_files_list: List[Dict[str, Any]] = []
|
|
397
|
+
external_media_urls: List[str] = []
|
|
232
398
|
final_result_content = []
|
|
233
399
|
|
|
234
400
|
detailed_task = f"""
|
|
@@ -238,6 +404,9 @@ Generate only the progress update:"""
|
|
|
238
404
|
"""
|
|
239
405
|
|
|
240
406
|
stream = team.run_stream(task=task)
|
|
407
|
+
# Loop guard for repeating provider schema errors (e.g., tool_calls/MultiMessage)
|
|
408
|
+
repeated_schema_error_count = 0
|
|
409
|
+
last_schema_error_seen = False
|
|
241
410
|
async for message in stream:
|
|
242
411
|
messages.append(message)
|
|
243
412
|
source = message.source if hasattr(message, 'source') else None
|
|
@@ -249,7 +418,27 @@ Generate only the progress update:"""
|
|
|
249
418
|
if task_completed_percentage >= 1.0:
|
|
250
419
|
task_completed_percentage = 0.99
|
|
251
420
|
|
|
252
|
-
if
|
|
421
|
+
# Loop-guard detection: break early if the same schema error repeats
|
|
422
|
+
try:
|
|
423
|
+
ctext = str(content) if content is not None else ""
|
|
424
|
+
is_schema_err = ("tool_calls" in ctext) and ("MultiMessage" in ctext)
|
|
425
|
+
if is_schema_err:
|
|
426
|
+
if last_schema_error_seen:
|
|
427
|
+
repeated_schema_error_count += 1
|
|
428
|
+
else:
|
|
429
|
+
repeated_schema_error_count = 1
|
|
430
|
+
last_schema_error_seen = True
|
|
431
|
+
# If schema error repeats too many times, stop the loop to avoid getting stuck
|
|
432
|
+
if repeated_schema_error_count >= 3:
|
|
433
|
+
logger.warning("Breaking team.run_stream due to repeated MultiMessage/tool_calls schema errors.")
|
|
434
|
+
break
|
|
435
|
+
else:
|
|
436
|
+
last_schema_error_seen = False
|
|
437
|
+
repeated_schema_error_count = 0
|
|
438
|
+
except Exception:
|
|
439
|
+
pass
|
|
440
|
+
|
|
441
|
+
if content and not self._is_internal_selector_message(content):
|
|
253
442
|
processed_content_for_progress = content
|
|
254
443
|
if message.type == "ToolCallExecutionEvent" and hasattr(message, 'content') and isinstance(message.content, list):
|
|
255
444
|
error_contents = [res.content for res in message.content if hasattr(res, 'is_error') and res.is_error]
|
|
@@ -264,105 +453,187 @@ Generate only the progress update:"""
|
|
|
264
453
|
if isinstance(json_content, dict):
|
|
265
454
|
if "download_url" in json_content and "blob_name" in json_content:
|
|
266
455
|
uploaded_file_urls[json_content["blob_name"]] = json_content["download_url"]
|
|
267
|
-
|
|
456
|
+
# collect external media from known keys
|
|
457
|
+
for k in ("images", "image_urls", "media", "videos", "thumbnails", "assets"):
|
|
458
|
+
try:
|
|
459
|
+
vals = json_content.get(k)
|
|
460
|
+
if isinstance(vals, list):
|
|
461
|
+
for v in vals:
|
|
462
|
+
if isinstance(v, str) and v.startswith("http"):
|
|
463
|
+
external_media_urls.append(v)
|
|
464
|
+
elif isinstance(vals, dict):
|
|
465
|
+
for v in vals.values():
|
|
466
|
+
if isinstance(v, str) and v.startswith("http"):
|
|
467
|
+
external_media_urls.append(v)
|
|
468
|
+
except Exception:
|
|
469
|
+
pass
|
|
268
470
|
elif isinstance(json_content, list):
|
|
269
471
|
for item in json_content:
|
|
270
472
|
if isinstance(item, dict) and "download_url" in item and "blob_name" in item:
|
|
271
473
|
uploaded_file_urls[item["blob_name"]] = item["download_url"]
|
|
272
|
-
|
|
474
|
+
# look for url-like fields
|
|
475
|
+
if isinstance(item, dict):
|
|
476
|
+
for key in ("url", "image", "thumbnail", "video", "download_url"):
|
|
477
|
+
try:
|
|
478
|
+
val = item.get(key)
|
|
479
|
+
if isinstance(val, str) and val.startswith("http"):
|
|
480
|
+
external_media_urls.append(val)
|
|
481
|
+
except Exception:
|
|
482
|
+
pass
|
|
273
483
|
# otherwise, ignore scalars like numbers/strings
|
|
274
484
|
except json.JSONDecodeError:
|
|
275
|
-
|
|
485
|
+
# best-effort regex scrape of http(s) URLs that look like media
|
|
486
|
+
try:
|
|
487
|
+
import re
|
|
488
|
+
for m in re.findall(r"https?://[^\s)\]}]+", content):
|
|
489
|
+
if any(m.lower().endswith(ext) for ext in (".png", ".jpg", ".jpeg", ".webp", ".gif", ".mp4", ".webm", ".mov")):
|
|
490
|
+
external_media_urls.append(m)
|
|
491
|
+
except Exception:
|
|
492
|
+
pass
|
|
276
493
|
|
|
277
494
|
final_result_content.append(str(content))
|
|
495
|
+
# Enqueue progress update for background processing (non-blocking)
|
|
278
496
|
asyncio.create_task(self.handle_progress_update(task_id, task_completed_percentage, processed_content_for_progress, message.type, source))
|
|
279
497
|
|
|
280
|
-
await self.progress_tracker.publish_progress(task_id, 0.95, "✨ Finalizing your results...")
|
|
281
|
-
|
|
282
|
-
# Targeted auto-upload: if no URLs yet, opportunistically upload recent deliverables created in this run.
|
|
283
|
-
# Fast, non-recursive, and limited to known dirs and extensions.
|
|
284
498
|
try:
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
deliverable_exts = {".pptx", ".ppt", ".csv", ".png", ".jpg", ".jpeg", ".pdf"}
|
|
290
|
-
candidate_dirs: List[str] = []
|
|
291
|
-
try:
|
|
292
|
-
wd = os.getenv("CORTEX_WORK_DIR", "/tmp/coding")
|
|
293
|
-
# In Azure Functions, prefer /tmp for write access
|
|
294
|
-
if os.getenv("WEBSITE_INSTANCE_ID") and wd.startswith("/app/"):
|
|
295
|
-
wd = "/tmp/coding"
|
|
296
|
-
candidate_dirs.append(wd)
|
|
297
|
-
except Exception:
|
|
298
|
-
pass
|
|
299
|
-
candidate_dirs.append("/tmp/coding")
|
|
499
|
+
# Finalizing update (transient only)
|
|
500
|
+
await self.progress_tracker.set_transient_update(task_id, 0.95, "✨ Finalizing your results...")
|
|
501
|
+
except Exception:
|
|
502
|
+
pass
|
|
300
503
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
504
|
+
# No fallback file generation: if required assets are missing, allow termination to report inability
|
|
505
|
+
except Exception:
|
|
506
|
+
# Catch-all for the outer deliverables-referencing try block
|
|
507
|
+
pass
|
|
508
|
+
|
|
509
|
+
# Per-request auto-upload: select best deliverables (avoid multiple near-identical PPTX)
|
|
510
|
+
try:
|
|
511
|
+
deliverable_exts = {".pptx", ".ppt", ".csv", ".png", ".jpg", ".jpeg", ".pdf", ".zip"}
|
|
512
|
+
req_dir = os.getenv("CORTEX_WORK_DIR", "/tmp/coding")
|
|
513
|
+
selected_paths: List[str] = []
|
|
514
|
+
if os.path.isdir(req_dir):
|
|
515
|
+
# Gather candidates by extension
|
|
516
|
+
candidates_by_ext: Dict[str, List[Dict[str, Any]]] = {}
|
|
517
|
+
for root, _, files in os.walk(req_dir):
|
|
518
|
+
for name in files:
|
|
519
|
+
try:
|
|
317
520
|
_, ext = os.path.splitext(name)
|
|
318
|
-
|
|
521
|
+
ext = ext.lower()
|
|
522
|
+
if ext not in deliverable_exts:
|
|
319
523
|
continue
|
|
524
|
+
fp = os.path.join(root, name)
|
|
525
|
+
size = 0
|
|
526
|
+
mtime = 0.0
|
|
320
527
|
try:
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
528
|
+
st = os.stat(fp)
|
|
529
|
+
size = int(getattr(st, 'st_size', 0))
|
|
530
|
+
mtime = float(getattr(st, 'st_mtime', 0.0))
|
|
324
531
|
except Exception:
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
532
|
+
pass
|
|
533
|
+
lst = candidates_by_ext.setdefault(ext, [])
|
|
534
|
+
lst.append({"path": fp, "size": size, "mtime": mtime})
|
|
535
|
+
except Exception:
|
|
536
|
+
continue
|
|
537
|
+
|
|
538
|
+
# Selection policy:
|
|
539
|
+
# - For .pptx and .ppt: choose the single largest file (assume most complete)
|
|
540
|
+
# - For other ext: include all
|
|
541
|
+
for ext, items in candidates_by_ext.items():
|
|
542
|
+
if ext in (".pptx", ".ppt"):
|
|
543
|
+
if items:
|
|
544
|
+
best = max(items, key=lambda x: (x.get("size", 0), x.get("mtime", 0.0)))
|
|
545
|
+
selected_paths.append(best["path"])
|
|
546
|
+
else:
|
|
547
|
+
for it in items:
|
|
548
|
+
selected_paths.append(it["path"])
|
|
328
549
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
550
|
+
# Upload only selected paths
|
|
551
|
+
for fp in selected_paths:
|
|
552
|
+
try:
|
|
553
|
+
up_json = upload_file_to_azure_blob(fp, blob_name=None)
|
|
554
|
+
up = json.loads(up_json)
|
|
555
|
+
if "download_url" in up and "blob_name" in up:
|
|
556
|
+
uploaded_file_urls[up["blob_name"]] = up["download_url"]
|
|
557
|
+
try:
|
|
558
|
+
bname = os.path.basename(str(up.get("blob_name") or ""))
|
|
559
|
+
extl = os.path.splitext(bname)[1].lower()
|
|
560
|
+
is_img = extl in (".png", ".jpg", ".jpeg", ".webp", ".gif")
|
|
561
|
+
uploaded_files_list.append({
|
|
562
|
+
"file_name": bname,
|
|
563
|
+
"url": up["download_url"],
|
|
564
|
+
"ext": extl,
|
|
565
|
+
"is_image": is_img,
|
|
566
|
+
})
|
|
567
|
+
if is_img:
|
|
568
|
+
external_media_urls.append(up["download_url"])
|
|
569
|
+
except Exception:
|
|
570
|
+
pass
|
|
571
|
+
except Exception:
|
|
572
|
+
continue
|
|
573
|
+
except Exception:
|
|
574
|
+
pass
|
|
332
575
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
576
|
+
# Deduplicate and cap external media to a reasonable number
|
|
577
|
+
try:
|
|
578
|
+
dedup_media = []
|
|
579
|
+
seen = set()
|
|
580
|
+
for u in external_media_urls:
|
|
581
|
+
if u in seen:
|
|
582
|
+
continue
|
|
583
|
+
seen.add(u)
|
|
584
|
+
dedup_media.append(u)
|
|
585
|
+
external_media_urls = dedup_media[:24]
|
|
342
586
|
except Exception:
|
|
343
587
|
pass
|
|
344
588
|
|
|
345
589
|
result_limited_to_fit = "\n".join(final_result_content)
|
|
346
590
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
591
|
+
# Provide the presenter with explicit file list to avoid duplication and downloads sections
|
|
592
|
+
uploaded_files_list = []
|
|
593
|
+
try:
|
|
594
|
+
for blob_name, url in (uploaded_file_urls.items() if isinstance(uploaded_file_urls, dict) else []):
|
|
595
|
+
try:
|
|
596
|
+
fname = os.path.basename(str(blob_name))
|
|
597
|
+
except Exception:
|
|
598
|
+
fname = str(blob_name)
|
|
599
|
+
extl = os.path.splitext(fname)[1].lower()
|
|
600
|
+
is_image = extl in (".png", ".jpg", ".jpeg", ".webp", ".gif")
|
|
601
|
+
uploaded_files_list.append({"file_name": fname, "url": url, "ext": extl, "is_image": is_image})
|
|
602
|
+
except Exception:
|
|
603
|
+
pass
|
|
352
604
|
|
|
353
|
-
|
|
605
|
+
presenter_task = f"""
|
|
606
|
+
Present the task result in a clean, professional Markdown/HTML that contains ONLY what the task requested. This will be shown in a React app.
|
|
607
|
+
Use only the information provided.
|
|
354
608
|
|
|
609
|
+
TASK:
|
|
355
610
|
{task}
|
|
356
611
|
|
|
357
612
|
RAW_AGENT_COMMUNICATIONS:
|
|
358
|
-
|
|
359
613
|
{result_limited_to_fit}
|
|
360
614
|
|
|
361
615
|
UPLOADED_FILES_SAS_URLS:
|
|
362
|
-
|
|
363
616
|
{json.dumps(uploaded_file_urls, indent=2)}
|
|
364
617
|
|
|
365
|
-
|
|
618
|
+
EXTERNAL_MEDIA_URLS:
|
|
619
|
+
{json.dumps(external_media_urls, indent=2)}
|
|
620
|
+
|
|
621
|
+
UPLOADED_FILES_LIST:
|
|
622
|
+
{json.dumps(uploaded_files_list, indent=2)}
|
|
623
|
+
|
|
624
|
+
STRICT OUTPUT RULES:
|
|
625
|
+
- Use UPLOADED_FILES_LIST (SAS URLs) and EXTERNAL_MEDIA_URLS to present assets. Always use the SAS URL provided in UPLOADED_FILES_LIST for any uploaded file.
|
|
626
|
+
- Images (png, jpg, jpeg, webp, gif): embed inline in a Visuals section using <figure><img/></figure> with captions. Do NOT provide links for images.
|
|
627
|
+
- Non-image files (pptx, pdf, csv): insert a SINGLE inline anchor (<a href=\"...\">filename</a>) at the first natural mention; do NOT create a 'Downloads' section; do NOT repeat links.
|
|
628
|
+
- For media: do NOT use grid or containers.
|
|
629
|
+
- SINGLE media: wrap in <figure style=\"margin: 12px 0;\"> with <img style=\"display:block;width:100%;max-width:960px;height:auto;margin:0 auto;border-radius:8px;box-shadow:0 1px 3px rgba(0,0,0,0.12)\"> and a <figcaption style=\"margin-top:8px;font-size:0.92em;color:inherit;opacity:0.8;text-align:center;\">.
|
|
630
|
+
- MULTIPLE media: output consecutive <figure> elements, one per row; no wrapping <div>.
|
|
631
|
+
- Avoid framework classes in HTML; rely on inline styles only. Do NOT include any class attributes. Use color: inherit for captions to respect dark/light mode.
|
|
632
|
+
- Never fabricate URLs, images, or content; use only links present in UPLOADED_FILES_LIST or EXTERNAL_MEDIA_URLS.
|
|
633
|
+
- Present each uploaded non-image file ONCE only (no duplicate links), using its filename as the link text.
|
|
634
|
+
- For links, prefer HTML anchor tags: <a href=\"URL\" target=\"_blank\" rel=\"noopener noreferrer\" download>FILENAME</a>.
|
|
635
|
+
- Do NOT include code, tool usage, or internal logs.
|
|
636
|
+
- Be detailed and user-facing. Include Overview, Visuals, Key Takeaways, and Next Actions sections. Do not create a Downloads section.
|
|
366
637
|
"""
|
|
367
638
|
|
|
368
639
|
presenter_stream = presenter_agent.run_stream(task=presenter_task)
|
|
@@ -375,20 +646,50 @@ Generate only the progress update:"""
|
|
|
375
646
|
last_message = task_result.messages[-1]
|
|
376
647
|
text_result = last_message.content if hasattr(last_message, 'content') else None
|
|
377
648
|
|
|
378
|
-
#
|
|
649
|
+
# No presenter normalization or auto-upload based on text; rely on strict prompts
|
|
379
650
|
try:
|
|
380
|
-
|
|
381
|
-
# naive pattern for http links
|
|
382
|
-
import re
|
|
383
|
-
if re.search(r"https?://", text_result):
|
|
384
|
-
logger.warning("Presenter output contains a link but no uploaded URLs exist. Rewriting to prevent hallucinated links.")
|
|
385
|
-
text_result = re.sub(r"\(https?://[^)]+\)", "(Download not available)", text_result)
|
|
651
|
+
pass
|
|
386
652
|
except Exception:
|
|
387
653
|
pass
|
|
388
654
|
|
|
655
|
+
# No post-sanitization here; enforce via presenter prompt only per user request
|
|
656
|
+
|
|
389
657
|
logger.info(f"🔍 TASK RESULT:\n{text_result}")
|
|
658
|
+
|
|
659
|
+
# Run terminator agent once presenter has produced final text
|
|
660
|
+
try:
|
|
661
|
+
term_messages = []
|
|
662
|
+
term_task = f"""
|
|
663
|
+
Check if the task is completed and output TERMINATE if and only if done.
|
|
664
|
+
Latest presenter output:
|
|
665
|
+
{text_result}
|
|
666
|
+
|
|
667
|
+
Uploaded files (SAS URLs):
|
|
668
|
+
{json.dumps(uploaded_file_urls, indent=2)}
|
|
669
|
+
|
|
670
|
+
TASK:
|
|
671
|
+
{task}
|
|
672
|
+
|
|
673
|
+
Reminder:
|
|
674
|
+
- If the TASK explicitly requires downloadable files, ensure at least one clickable download URL is present.
|
|
675
|
+
- If the TASK does not require files (e.g., simple answer, calculation, summary, troubleshooting), terminate when the presenter has clearly delivered the requested content. Do not require downloads in that case.
|
|
676
|
+
"""
|
|
677
|
+
term_stream = terminator_agent.run_stream(task=term_task)
|
|
678
|
+
async for message in term_stream:
|
|
679
|
+
term_messages.append(message)
|
|
680
|
+
if term_messages:
|
|
681
|
+
t_last = term_messages[-1].messages[-1]
|
|
682
|
+
t_text = t_last.content if hasattr(t_last, 'content') else ''
|
|
683
|
+
logger.info(f"🛑 TERMINATOR: {t_text}")
|
|
684
|
+
# If it didn't say TERMINATE but we already have presenter output, proceed anyway
|
|
685
|
+
except Exception as e:
|
|
686
|
+
logger.warning(f"⚠️ Terminator agent failed or unavailable: {e}")
|
|
390
687
|
final_data = text_result or "🎉 Your task is complete!"
|
|
391
688
|
await self.progress_tracker.publish_progress(task_id, 1.0, "🎉 Your task is complete!", data=final_data)
|
|
689
|
+
try:
|
|
690
|
+
await self.progress_tracker.mark_final(task_id)
|
|
691
|
+
except Exception:
|
|
692
|
+
pass
|
|
392
693
|
self.final_progress_sent = True
|
|
393
694
|
|
|
394
695
|
return text_result
|
|
@@ -399,6 +700,21 @@ Generate only the progress update:"""
|
|
|
399
700
|
|
|
400
701
|
async def close(self):
|
|
401
702
|
"""Close all connections gracefully."""
|
|
703
|
+
# Stop background progress worker first to avoid pending task destruction
|
|
704
|
+
try:
|
|
705
|
+
if self._progress_worker_task is not None:
|
|
706
|
+
try:
|
|
707
|
+
self._progress_worker_task.cancel()
|
|
708
|
+
try:
|
|
709
|
+
await self._progress_worker_task
|
|
710
|
+
except asyncio.CancelledError:
|
|
711
|
+
pass
|
|
712
|
+
finally:
|
|
713
|
+
self._progress_worker_task = None
|
|
714
|
+
# Allow GC of the queue
|
|
715
|
+
self._progress_queue = None
|
|
716
|
+
except Exception as e:
|
|
717
|
+
logger.debug(f"Error stopping progress worker: {e}")
|
|
402
718
|
clients_to_close = [
|
|
403
719
|
self.o3_model_client,
|
|
404
720
|
self.o4_mini_model_client,
|