@aj-archipelago/cortex 1.3.65 → 1.3.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/helper-apps/cortex-autogen2/Dockerfile +88 -21
  2. package/helper-apps/cortex-autogen2/docker-compose.yml +15 -8
  3. package/helper-apps/cortex-autogen2/host.json +5 -0
  4. package/helper-apps/cortex-autogen2/pyproject.toml +82 -25
  5. package/helper-apps/cortex-autogen2/requirements.txt +84 -14
  6. package/helper-apps/cortex-autogen2/services/redis_publisher.py +129 -3
  7. package/helper-apps/cortex-autogen2/task_processor.py +432 -116
  8. package/helper-apps/cortex-autogen2/tools/__init__.py +2 -0
  9. package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +32 -0
  10. package/helper-apps/cortex-autogen2/tools/azure_foundry_agents.py +50 -14
  11. package/helper-apps/cortex-autogen2/tools/file_tools.py +169 -44
  12. package/helper-apps/cortex-autogen2/tools/google_cse.py +117 -0
  13. package/helper-apps/cortex-autogen2/tools/search_tools.py +655 -98
  14. package/lib/entityConstants.js +1 -1
  15. package/lib/pathwayManager.js +42 -8
  16. package/lib/pathwayTools.js +3 -3
  17. package/lib/util.js +58 -2
  18. package/package.json +1 -1
  19. package/pathways/system/entity/memory/sys_memory_format.js +1 -0
  20. package/pathways/system/entity/memory/sys_memory_manager.js +3 -3
  21. package/pathways/system/entity/sys_entity_start.js +1 -1
  22. package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +2 -0
  23. package/pathways/system/entity/tools/sys_tool_codingagent.js +2 -2
  24. package/pathways/system/entity/tools/sys_tool_google_search.js +3 -3
  25. package/pathways/system/entity/tools/sys_tool_grok_x_search.js +12 -2
  26. package/pathways/system/workspaces/run_workspace_prompt.js +0 -3
  27. package/server/executeWorkspace.js +381 -0
  28. package/server/graphql.js +5 -180
  29. package/server/pathwayResolver.js +3 -3
  30. package/server/plugins/apptekTranslatePlugin.js +2 -2
  31. package/server/plugins/azureFoundryAgentsPlugin.js +1 -1
  32. package/tests/unit/core/parser.test.js +0 -1
  33. package/tests/unit/core/pathwayManagerWithFiles.test.js +256 -0
  34. package/tests/unit/graphql_executeWorkspace_transformation.test.js +244 -0
  35. package/tests/unit/server/graphql.test.js +122 -1
@@ -28,6 +28,10 @@ class TaskProcessor:
28
28
  self.gpt41_model_client = None
29
29
  self.progress_tracker = None
30
30
  self.final_progress_sent = False
31
+ # Background progress worker components
32
+ self._progress_queue: Optional[asyncio.Queue] = None
33
+ self._progress_worker_task: Optional[asyncio.Task] = None
34
+ self._last_summary_by_request: Dict[str, str] = {}
31
35
 
32
36
  async def initialize(self):
33
37
  """Initialize model clients and services."""
@@ -39,9 +43,11 @@ class TaskProcessor:
39
43
  CORTEX_API_BASE_URL = os.getenv("CORTEX_API_BASE_URL", "http://host.docker.internal:4000/v1")
40
44
 
41
45
  # Define ModelInfo for custom models
42
- o3_model_info = ModelInfo(model="o3", name="Cortex o3", max_tokens=8192, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False) # Placeholder cost
43
- o4_mini_model_info = ModelInfo(model="o4-mini", name="Cortex o4-mini", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False) # Placeholder cost
44
- gpt41_model_info = ModelInfo(model="gpt-4.1", name="Cortex gpt-4.1", max_tokens=8192, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False) # Placeholder cost
46
+ o3_model_info = ModelInfo(model="o3", name="Cortex o3", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
47
+ o4_mini_model_info = ModelInfo(model="o4-mini", name="Cortex o4-mini", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
48
+ gpt41_model_info = ModelInfo(model="gpt-4.1", name="Cortex gpt-4.1", max_tokens=8192, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
49
+ gpt5_model_info = ModelInfo(model="gpt-5", name="Cortex gpt-5", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
50
+ claude_4_sonnet_model_info = ModelInfo(model="claude-4-sonnet", name="Cortex claude-4-sonnet", max_tokens=128000, cost_per_token=0.0, vision=False, function_calling=True, json_output=False, family="openai", structured_output=False)
45
51
 
46
52
  self.o3_model_client = OpenAIChatCompletionClient(
47
53
  model="o3",
@@ -66,12 +72,78 @@ class TaskProcessor:
66
72
  timeout=600,
67
73
  model_info=gpt41_model_info # Pass model_info
68
74
  )
75
+
76
+ self.gpt5_model_client = OpenAIChatCompletionClient(
77
+ model="gpt-5",
78
+ api_key=CORTEX_API_KEY,
79
+ base_url=CORTEX_API_BASE_URL,
80
+ timeout=600,
81
+ model_info=gpt5_model_info # Pass model_info
82
+ )
83
+
84
+ self.claude_4_sonnet_model_client = OpenAIChatCompletionClient(
85
+ model="claude-4-sonnet",
86
+ api_key=CORTEX_API_KEY,
87
+ base_url=CORTEX_API_BASE_URL,
88
+ timeout=600,
89
+ model_info=claude_4_sonnet_model_info # Pass model_info
90
+ )
69
91
 
70
92
  self.progress_tracker = await get_redis_publisher()
93
+ # Ensure background progress worker is running
94
+ await self._ensure_progress_worker()
95
+
96
+ async def _ensure_progress_worker(self) -> None:
97
+ """Start a single background worker to process progress updates asynchronously."""
98
+ try:
99
+ if self._progress_queue is None:
100
+ # Bounded queue to avoid memory growth; newest updates replace when full
101
+ self._progress_queue = asyncio.Queue(maxsize=256)
102
+ if self._progress_worker_task is None or self._progress_worker_task.done():
103
+ self._progress_worker_task = asyncio.create_task(self._progress_worker_loop())
104
+ except Exception as e:
105
+ logger.warning(f"Failed to start progress worker: {e}")
106
+
107
+ async def _progress_worker_loop(self) -> None:
108
+ """Continuously consume progress events, summarize, de-duplicate, and publish transient updates."""
109
+ try:
110
+ while True:
111
+ try:
112
+ event = await self._progress_queue.get()
113
+ if not event:
114
+ self._progress_queue.task_done()
115
+ continue
116
+ req_id = event.get("task_id")
117
+ pct = float(event.get("percentage") or 0.0)
118
+ content = event.get("content")
119
+ msg_type = event.get("message_type")
120
+ source = event.get("source")
121
+ # Summarize in background
122
+ summary = await self.summarize_progress(content, msg_type, source)
123
+ if summary:
124
+ last = self._last_summary_by_request.get(req_id)
125
+ if last != summary:
126
+ self._last_summary_by_request[req_id] = summary
127
+ try:
128
+ await self.progress_tracker.set_transient_update(req_id, pct, summary)
129
+ except Exception as pub_err:
130
+ logger.debug(f"Progress transient publish error for {req_id}: {pub_err}")
131
+ self._progress_queue.task_done()
132
+ except asyncio.CancelledError:
133
+ raise
134
+ except Exception as loop_err:
135
+ logger.debug(f"Progress worker loop error: {loop_err}")
136
+ except asyncio.CancelledError:
137
+ logger.info("Progress worker task cancelled")
138
+ except Exception as e:
139
+ logger.warning(f"Progress worker terminated unexpectedly: {e}")
71
140
 
72
141
  async def summarize_progress(self, content: str, message_type: str = None, source: str = None) -> str:
73
142
  """Summarize progress content for display with intelligent filtering."""
74
143
  try:
144
+ # Skip internal selector or housekeeping messages entirely
145
+ if self._is_internal_selector_message(content):
146
+ return None
75
147
  # Filter out technical/internal messages that shouldn't be shown to users
76
148
  if self._should_skip_progress_update(content, message_type, source):
77
149
  return None
@@ -81,37 +153,51 @@ class TaskProcessor:
81
153
  if not cleaned_content:
82
154
  return None
83
155
 
84
- prompt = f"""Generate a concise, engaging, and user-friendly progress update (5-15 words) that clearly indicates what the AI is currently working on. Include an appropriate emoji.
156
+ prompt = f"""Transform this agent activity into a delightful, crystal-clear progress update (8-15 words) that makes non-technical users feel excited about what's happening. Start with a perfect emoji.
85
157
 
86
- Context: This is for a user-facing progress indicator in a React app.
158
+ Context: This appears in a live progress indicator for end users who aren't coders.
87
159
 
88
160
  Current Activity: {cleaned_content}
89
- Agent Source: {source if source else "Unknown"}
90
-
91
- Requirements:
92
- - Be positive and professional
93
- - Focus on what the user will benefit from
94
- - Avoid technical jargon
95
- - Use engaging, action-oriented language
96
- - Include a relevant emoji
97
- - Consider the agent source to provide context (e.g., coder_agent = coding, presenter_agent = creating presentation)
98
-
99
- Examples of good updates:
100
- - "🔍 Researching the latest trends"
101
- - "📊 Analyzing data patterns"
102
- - "🎨 Creating visual content"
103
- - "📝 Compiling your report"
104
- - "🚀 Finalizing results"
105
- - "💻 Writing code for your request"
106
- - "☁️ Uploading files to cloud storage"
107
-
108
- Bad examples (avoid):
109
- - "Task terminated"
110
- - "Processing internal data"
111
- - "Executing tool calls"
112
- - "TERMINATE"
113
-
114
- Generate only the progress update:"""
161
+ Agent Role: {source if source else "Unknown"}
162
+
163
+ 🎨 Emoji Guide (pick the most fitting):
164
+ Planning/Thinking: 🧭 🗺️ 💡 🎯 🤔
165
+ Research/Search: 🔎 🔍 🌐 📚 🕵️
166
+ Data/Analysis: 📊 📈 📉 🧮 💹
167
+ Writing/Creating: ✍️ 📝 🖊️ ✨ 🎨
168
+ Images/Media: 🖼️ 📸 🎬 🌈 🖌️
169
+ Code/Technical: 💻 ⚙️ 🛠️ 🔧
170
+ Files/Upload: 📁 ☁️ 📤 💾 🗂️
171
+ Success/Done: 🎉 🏆 🎊 ⭐
172
+
173
+ Writing Style:
174
+ - ENGAGING: Use vivid, active verbs that paint a picture (discovering, crafting, weaving, building, hunting)
175
+ - HUMAN: Conversational and warm, like a helpful colleague updating you
176
+ - CLEAR: Zero jargon, no technical terms, no agent/tool names
177
+ - SPECIFIC: Say what's actually being created/found (not just "processing data")
178
+ - UPBEAT: Positive energy, but not over-the-top
179
+ - SHORT: 8-15 words max - every word must earn its place
180
+
181
+ 🌟 Great Examples (follow these patterns):
182
+ - "🔍 Hunting down the perfect images for your presentation"
183
+ - "📊 Crunching numbers to reveal hidden trends"
184
+ - "✨ Weaving everything together into a polished report"
185
+ - "🎨 Designing eye-catching charts that tell the story"
186
+ - "📚 Diving deep into research to find golden insights"
187
+ - "🖼️ Gathering stunning visuals to bring ideas to life"
188
+ - "💡 Mapping out the smartest approach to tackle this"
189
+ - "☁️ Packaging everything up for easy download"
190
+ - "🔎 Exploring databases to uncover the answers"
191
+ - "✍️ Crafting a compelling narrative from the data"
192
+
193
+ ❌ Avoid These (too boring/technical):
194
+ - "Processing data" (vague)
195
+ - "Executing SQL query" (jargon)
196
+ - "Running code" (technical)
197
+ - "Your report is ready" (premature/addressing user)
198
+ - "Task terminated" (robotic)
199
+
200
+ Return ONLY the update line with emoji - nothing else:"""
115
201
 
116
202
  messages = [UserMessage(content=str(prompt), source="summarize_progress_function")]
117
203
 
@@ -128,6 +214,10 @@ Generate only the progress update:"""
128
214
 
129
215
  content_str = str(content).strip().upper()
130
216
 
217
+ # Skip internal selector prompts or bare role names
218
+ if self._is_internal_selector_message(content):
219
+ return True
220
+
131
221
  # Skip termination messages
132
222
  if content_str == "TERMINATE" or "TERMINATE" in content_str:
133
223
  return True
@@ -183,13 +273,66 @@ Generate only the progress update:"""
183
273
 
184
274
  return cleaned
185
275
 
276
+ def _is_internal_selector_message(self, content: str) -> bool:
277
+ """Detect AutoGen selector prompts and bare role selections to avoid surfacing them."""
278
+ if not content:
279
+ return False
280
+ text = str(content).strip()
281
+ selector_markers = [
282
+ "You are in a role play game.",
283
+ "select the next role",
284
+ "Only return the role.",
285
+ ]
286
+ for marker in selector_markers:
287
+ if marker.lower() in text.lower():
288
+ return True
289
+
290
+ role_names = {
291
+ "planner_agent", "coder_agent", "code_executor", "terminator_agent",
292
+ "presenter_agent", "file_cloud_uploader_agent", "aj_sql_agent",
293
+ "aj_article_writer_agent", "cognitive_search_agent", "web_search_agent"
294
+ }
295
+ # If the entire content is just a role name, treat as internal
296
+ if text in role_names:
297
+ return True
298
+
299
+ # Treat provider schema errors about tool_calls/MultiMessage as internal noise
300
+ try:
301
+ lowered = text.lower()
302
+ if ("tool_calls" in lowered) and ("multimessage" in lowered) and ("field" in lowered or "variable" in lowered):
303
+ return True
304
+ except Exception:
305
+ pass
306
+ return False
307
+
186
308
  async def handle_progress_update(self, task_id: str, percentage: float, content: str, message_type: str = None, source: str = None):
187
- """Handle progress updates with intelligent summarization."""
188
- summarized_content = await self.summarize_progress(content, message_type, source)
189
-
190
- # Only publish if we have meaningful content
191
- if summarized_content:
192
- await self.progress_tracker.publish_progress(task_id, percentage, summarized_content)
309
+ """Enqueue progress updates for the background worker to process (non-blocking)."""
310
+ try:
311
+ if self._progress_queue is None:
312
+ await self._ensure_progress_worker()
313
+ event = {
314
+ "task_id": task_id,
315
+ "percentage": percentage,
316
+ "content": content,
317
+ "message_type": message_type,
318
+ "source": source,
319
+ }
320
+ # Prefer non-blocking put; if full, drop the oldest and retry once
321
+ try:
322
+ self._progress_queue.put_nowait(event)
323
+ except asyncio.QueueFull:
324
+ try:
325
+ # Drop one item to make room
326
+ _ = self._progress_queue.get_nowait()
327
+ self._progress_queue.task_done()
328
+ except Exception:
329
+ pass
330
+ try:
331
+ self._progress_queue.put_nowait(event)
332
+ except Exception:
333
+ pass
334
+ except Exception as e:
335
+ logger.debug(f"handle_progress_update enqueue error: {e}")
193
336
 
194
337
  async def publish_final(self, task_id: str, message: str, data: Any = None) -> None:
195
338
  """Publish a final 1.0 progress message once."""
@@ -209,26 +352,49 @@ Generate only the progress update:"""
209
352
  task_completed_percentage = 0.05
210
353
  task = task_content
211
354
 
212
- # Send initial progress update
213
- await self.progress_tracker.publish_progress(task_id, 0.05, "🚀 Starting your task...")
355
+ # Per-request working directory: isolate artifacts under /tmp/coding/<task_id>
356
+ try:
357
+ base_wd = os.getenv("CORTEX_WORK_DIR", "/tmp/coding")
358
+ # In Azure Functions, force /tmp for write access
359
+ if os.getenv("WEBSITE_INSTANCE_ID") and base_wd.startswith("/app/"):
360
+ base_wd = "/tmp/coding"
361
+ import time
362
+ req_dir_name = f"req_{task_id}" if task_id else f"req_{int(time.time())}"
363
+ request_work_dir = os.path.join(base_wd, req_dir_name)
364
+ os.makedirs(request_work_dir, exist_ok=True)
365
+ os.environ["CORTEX_WORK_DIR"] = request_work_dir
366
+ # pass to get_agents so all tools use this dir
367
+ request_work_dir_for_agents = request_work_dir
368
+ except Exception:
369
+ # Fallback to base directory if per-request directory cannot be created
370
+ try:
371
+ os.makedirs(os.getenv("CORTEX_WORK_DIR", "/tmp/coding"), exist_ok=True)
372
+ except Exception:
373
+ pass
374
+
375
+ # Send initial progress update (transient only)
376
+ await self.progress_tracker.set_transient_update(task_id, 0.05, "🚀 Starting your task...")
214
377
 
215
378
  termination = HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
216
379
 
217
- agents, presenter_agent = await get_agents(
380
+ agents, presenter_agent, terminator_agent = await get_agents(
218
381
  self.gpt41_model_client,
219
382
  self.o3_model_client,
220
- self.gpt41_model_client
383
+ self.gpt41_model_client,
384
+ request_work_dir=request_work_dir_for_agents if 'request_work_dir_for_agents' in locals() else None
221
385
  )
222
386
 
223
387
  team = SelectorGroupChat(
224
388
  participants=agents,
225
389
  model_client=self.gpt41_model_client,
226
390
  termination_condition=termination,
227
- max_turns=10000
391
+ max_turns=200
228
392
  )
229
393
 
230
394
  messages = []
231
395
  uploaded_file_urls = {}
396
+ uploaded_files_list: List[Dict[str, Any]] = []
397
+ external_media_urls: List[str] = []
232
398
  final_result_content = []
233
399
 
234
400
  detailed_task = f"""
@@ -238,6 +404,9 @@ Generate only the progress update:"""
238
404
  """
239
405
 
240
406
  stream = team.run_stream(task=task)
407
+ # Loop guard for repeating provider schema errors (e.g., tool_calls/MultiMessage)
408
+ repeated_schema_error_count = 0
409
+ last_schema_error_seen = False
241
410
  async for message in stream:
242
411
  messages.append(message)
243
412
  source = message.source if hasattr(message, 'source') else None
@@ -249,7 +418,27 @@ Generate only the progress update:"""
249
418
  if task_completed_percentage >= 1.0:
250
419
  task_completed_percentage = 0.99
251
420
 
252
- if content:
421
+ # Loop-guard detection: break early if the same schema error repeats
422
+ try:
423
+ ctext = str(content) if content is not None else ""
424
+ is_schema_err = ("tool_calls" in ctext) and ("MultiMessage" in ctext)
425
+ if is_schema_err:
426
+ if last_schema_error_seen:
427
+ repeated_schema_error_count += 1
428
+ else:
429
+ repeated_schema_error_count = 1
430
+ last_schema_error_seen = True
431
+ # If schema error repeats too many times, stop the loop to avoid getting stuck
432
+ if repeated_schema_error_count >= 3:
433
+ logger.warning("Breaking team.run_stream due to repeated MultiMessage/tool_calls schema errors.")
434
+ break
435
+ else:
436
+ last_schema_error_seen = False
437
+ repeated_schema_error_count = 0
438
+ except Exception:
439
+ pass
440
+
441
+ if content and not self._is_internal_selector_message(content):
253
442
  processed_content_for_progress = content
254
443
  if message.type == "ToolCallExecutionEvent" and hasattr(message, 'content') and isinstance(message.content, list):
255
444
  error_contents = [res.content for res in message.content if hasattr(res, 'is_error') and res.is_error]
@@ -264,105 +453,187 @@ Generate only the progress update:"""
264
453
  if isinstance(json_content, dict):
265
454
  if "download_url" in json_content and "blob_name" in json_content:
266
455
  uploaded_file_urls[json_content["blob_name"]] = json_content["download_url"]
267
- final_result_content.append(f"Uploaded file: [{json_content['blob_name']}]({json_content['download_url']})")
456
+ # collect external media from known keys
457
+ for k in ("images", "image_urls", "media", "videos", "thumbnails", "assets"):
458
+ try:
459
+ vals = json_content.get(k)
460
+ if isinstance(vals, list):
461
+ for v in vals:
462
+ if isinstance(v, str) and v.startswith("http"):
463
+ external_media_urls.append(v)
464
+ elif isinstance(vals, dict):
465
+ for v in vals.values():
466
+ if isinstance(v, str) and v.startswith("http"):
467
+ external_media_urls.append(v)
468
+ except Exception:
469
+ pass
268
470
  elif isinstance(json_content, list):
269
471
  for item in json_content:
270
472
  if isinstance(item, dict) and "download_url" in item and "blob_name" in item:
271
473
  uploaded_file_urls[item["blob_name"]] = item["download_url"]
272
- final_result_content.append(f"Uploaded file: [{item['blob_name']}]({item['download_url']})")
474
+ # look for url-like fields
475
+ if isinstance(item, dict):
476
+ for key in ("url", "image", "thumbnail", "video", "download_url"):
477
+ try:
478
+ val = item.get(key)
479
+ if isinstance(val, str) and val.startswith("http"):
480
+ external_media_urls.append(val)
481
+ except Exception:
482
+ pass
273
483
  # otherwise, ignore scalars like numbers/strings
274
484
  except json.JSONDecodeError:
275
- pass
485
+ # best-effort regex scrape of http(s) URLs that look like media
486
+ try:
487
+ import re
488
+ for m in re.findall(r"https?://[^\s)\]}]+", content):
489
+ if any(m.lower().endswith(ext) for ext in (".png", ".jpg", ".jpeg", ".webp", ".gif", ".mp4", ".webm", ".mov")):
490
+ external_media_urls.append(m)
491
+ except Exception:
492
+ pass
276
493
 
277
494
  final_result_content.append(str(content))
495
+ # Enqueue progress update for background processing (non-blocking)
278
496
  asyncio.create_task(self.handle_progress_update(task_id, task_completed_percentage, processed_content_for_progress, message.type, source))
279
497
 
280
- await self.progress_tracker.publish_progress(task_id, 0.95, "✨ Finalizing your results...")
281
-
282
- # Targeted auto-upload: if no URLs yet, opportunistically upload recent deliverables created in this run.
283
- # Fast, non-recursive, and limited to known dirs and extensions.
284
498
  try:
285
- if not uploaded_file_urls:
286
- import time
287
- now = time.time()
288
- max_age_seconds = 15 * 60 # last 15 minutes
289
- deliverable_exts = {".pptx", ".ppt", ".csv", ".png", ".jpg", ".jpeg", ".pdf"}
290
- candidate_dirs: List[str] = []
291
- try:
292
- wd = os.getenv("CORTEX_WORK_DIR", "/tmp/coding")
293
- # In Azure Functions, prefer /tmp for write access
294
- if os.getenv("WEBSITE_INSTANCE_ID") and wd.startswith("/app/"):
295
- wd = "/tmp/coding"
296
- candidate_dirs.append(wd)
297
- except Exception:
298
- pass
299
- candidate_dirs.append("/tmp/coding")
499
+ # Finalizing update (transient only)
500
+ await self.progress_tracker.set_transient_update(task_id, 0.95, "✨ Finalizing your results...")
501
+ except Exception:
502
+ pass
300
503
 
301
- recent_files: List[str] = []
302
- for d in candidate_dirs:
303
- if not d:
304
- continue
305
- # Ensure directory exists if possible
306
- try:
307
- os.makedirs(d, exist_ok=True)
308
- except Exception:
309
- pass
310
- if not os.path.isdir(d):
311
- continue
312
- try:
313
- for name in os.listdir(d):
314
- fp = os.path.join(d, name)
315
- if not os.path.isfile(fp):
316
- continue
504
+ # No fallback file generation: if required assets are missing, allow termination to report inability
505
+ except Exception:
506
+ # Catch-all for the outer deliverables-referencing try block
507
+ pass
508
+
509
+ # Per-request auto-upload: select best deliverables (avoid multiple near-identical PPTX)
510
+ try:
511
+ deliverable_exts = {".pptx", ".ppt", ".csv", ".png", ".jpg", ".jpeg", ".pdf", ".zip"}
512
+ req_dir = os.getenv("CORTEX_WORK_DIR", "/tmp/coding")
513
+ selected_paths: List[str] = []
514
+ if os.path.isdir(req_dir):
515
+ # Gather candidates by extension
516
+ candidates_by_ext: Dict[str, List[Dict[str, Any]]] = {}
517
+ for root, _, files in os.walk(req_dir):
518
+ for name in files:
519
+ try:
317
520
  _, ext = os.path.splitext(name)
318
- if ext.lower() not in deliverable_exts:
521
+ ext = ext.lower()
522
+ if ext not in deliverable_exts:
319
523
  continue
524
+ fp = os.path.join(root, name)
525
+ size = 0
526
+ mtime = 0.0
320
527
  try:
321
- mtime = os.path.getmtime(fp)
322
- if now - mtime <= max_age_seconds:
323
- recent_files.append(fp)
528
+ st = os.stat(fp)
529
+ size = int(getattr(st, 'st_size', 0))
530
+ mtime = float(getattr(st, 'st_mtime', 0.0))
324
531
  except Exception:
325
- continue
326
- except Exception:
327
- continue
532
+ pass
533
+ lst = candidates_by_ext.setdefault(ext, [])
534
+ lst.append({"path": fp, "size": size, "mtime": mtime})
535
+ except Exception:
536
+ continue
537
+
538
+ # Selection policy:
539
+ # - For .pptx and .ppt: choose the single largest file (assume most complete)
540
+ # - For other ext: include all
541
+ for ext, items in candidates_by_ext.items():
542
+ if ext in (".pptx", ".ppt"):
543
+ if items:
544
+ best = max(items, key=lambda x: (x.get("size", 0), x.get("mtime", 0.0)))
545
+ selected_paths.append(best["path"])
546
+ else:
547
+ for it in items:
548
+ selected_paths.append(it["path"])
328
549
 
329
- # Sort newest first and cap to a few uploads to keep fast
330
- recent_files.sort(key=lambda p: os.path.getmtime(p), reverse=True)
331
- recent_files = recent_files[:5]
550
+ # Upload only selected paths
551
+ for fp in selected_paths:
552
+ try:
553
+ up_json = upload_file_to_azure_blob(fp, blob_name=None)
554
+ up = json.loads(up_json)
555
+ if "download_url" in up and "blob_name" in up:
556
+ uploaded_file_urls[up["blob_name"]] = up["download_url"]
557
+ try:
558
+ bname = os.path.basename(str(up.get("blob_name") or ""))
559
+ extl = os.path.splitext(bname)[1].lower()
560
+ is_img = extl in (".png", ".jpg", ".jpeg", ".webp", ".gif")
561
+ uploaded_files_list.append({
562
+ "file_name": bname,
563
+ "url": up["download_url"],
564
+ "ext": extl,
565
+ "is_image": is_img,
566
+ })
567
+ if is_img:
568
+ external_media_urls.append(up["download_url"])
569
+ except Exception:
570
+ pass
571
+ except Exception:
572
+ continue
573
+ except Exception:
574
+ pass
332
575
 
333
- for fp in recent_files:
334
- try:
335
- up_json = upload_file_to_azure_blob(fp, blob_name=None)
336
- up = json.loads(up_json)
337
- if "download_url" in up and "blob_name" in up:
338
- uploaded_file_urls[up["blob_name"]] = up["download_url"]
339
- final_result_content.append(f"Uploaded file: [{up['blob_name']}]({up['download_url']})")
340
- except Exception:
341
- continue
576
+ # Deduplicate and cap external media to a reasonable number
577
+ try:
578
+ dedup_media = []
579
+ seen = set()
580
+ for u in external_media_urls:
581
+ if u in seen:
582
+ continue
583
+ seen.add(u)
584
+ dedup_media.append(u)
585
+ external_media_urls = dedup_media[:24]
342
586
  except Exception:
343
587
  pass
344
588
 
345
589
  result_limited_to_fit = "\n".join(final_result_content)
346
590
 
347
- presenter_task = f"""
348
- Present the task result in a great way, Markdown, it'll be shown in a React app that supports markdown that doesn't have access to your local files.
349
- Make sure to use all the info you have, do not miss any info.
350
- Make sure to have images, videos, etc. users love them.
351
- UI must be professional that is really important.
591
+ # Provide the presenter with explicit file list to avoid duplication and downloads sections
592
+ uploaded_files_list = []
593
+ try:
594
+ for blob_name, url in (uploaded_file_urls.items() if isinstance(uploaded_file_urls, dict) else []):
595
+ try:
596
+ fname = os.path.basename(str(blob_name))
597
+ except Exception:
598
+ fname = str(blob_name)
599
+ extl = os.path.splitext(fname)[1].lower()
600
+ is_image = extl in (".png", ".jpg", ".jpeg", ".webp", ".gif")
601
+ uploaded_files_list.append({"file_name": fname, "url": url, "ext": extl, "is_image": is_image})
602
+ except Exception:
603
+ pass
352
604
 
353
- TASK:
605
+ presenter_task = f"""
606
+ Present the task result in a clean, professional Markdown/HTML that contains ONLY what the task requested. This will be shown in a React app.
607
+ Use only the information provided.
354
608
 
609
+ TASK:
355
610
  {task}
356
611
 
357
612
  RAW_AGENT_COMMUNICATIONS:
358
-
359
613
  {result_limited_to_fit}
360
614
 
361
615
  UPLOADED_FILES_SAS_URLS:
362
-
363
616
  {json.dumps(uploaded_file_urls, indent=2)}
364
617
 
365
- **CRITICAL INSTRUCTION: Analyze the RAW_AGENT_COMMUNICATIONS above. Your ONLY goal is to extract and present the final, user-facing result requested in the TASK. Absolutely DO NOT include any code, internal agent thought processes, tool calls, technical logs, or descriptions of how the task was accomplished. Focus solely on delivering the ANSWER to the user's original request in a clear, professional, and visually appealing Markdown format. If the task was to create a file, you MUST ONLY use download URLs found in UPLOADED_FILES_SAS_URLS. DO NOT fabricate, guess, or link to any external or placeholder URLs. If no uploaded URLs exist, say so and present the results without a download link. Remove all extraneous information.**
618
+ EXTERNAL_MEDIA_URLS:
619
+ {json.dumps(external_media_urls, indent=2)}
620
+
621
+ UPLOADED_FILES_LIST:
622
+ {json.dumps(uploaded_files_list, indent=2)}
623
+
624
+ STRICT OUTPUT RULES:
625
+ - Use UPLOADED_FILES_LIST (SAS URLs) and EXTERNAL_MEDIA_URLS to present assets. Always use the SAS URL provided in UPLOADED_FILES_LIST for any uploaded file.
626
+ - Images (png, jpg, jpeg, webp, gif): embed inline in a Visuals section using <figure><img/></figure> with captions. Do NOT provide links for images.
627
+ - Non-image files (pptx, pdf, csv): insert a SINGLE inline anchor (<a href=\"...\">filename</a>) at the first natural mention; do NOT create a 'Downloads' section; do NOT repeat links.
628
+ - For media: do NOT use grid or containers.
629
+ - SINGLE media: wrap in <figure style=\"margin: 12px 0;\"> with <img style=\"display:block;width:100%;max-width:960px;height:auto;margin:0 auto;border-radius:8px;box-shadow:0 1px 3px rgba(0,0,0,0.12)\"> and a <figcaption style=\"margin-top:8px;font-size:0.92em;color:inherit;opacity:0.8;text-align:center;\">.
630
+ - MULTIPLE media: output consecutive <figure> elements, one per row; no wrapping <div>.
631
+ - Avoid framework classes in HTML; rely on inline styles only. Do NOT include any class attributes. Use color: inherit for captions to respect dark/light mode.
632
+ - Never fabricate URLs, images, or content; use only links present in UPLOADED_FILES_LIST or EXTERNAL_MEDIA_URLS.
633
+ - Present each uploaded non-image file ONCE only (no duplicate links), using its filename as the link text.
634
+ - For links, prefer HTML anchor tags: <a href=\"URL\" target=\"_blank\" rel=\"noopener noreferrer\" download>FILENAME</a>.
635
+ - Do NOT include code, tool usage, or internal logs.
636
+ - Be detailed and user-facing. Include Overview, Visuals, Key Takeaways, and Next Actions sections. Do not create a Downloads section.
366
637
  """
367
638
 
368
639
  presenter_stream = presenter_agent.run_stream(task=presenter_task)
@@ -375,20 +646,50 @@ Generate only the progress update:"""
375
646
  last_message = task_result.messages[-1]
376
647
  text_result = last_message.content if hasattr(last_message, 'content') else None
377
648
 
378
- # Safety check: if presenter fabricated an external link while uploaded_file_urls is empty, replace with explicit notice
649
+ # No presenter normalization or auto-upload based on text; rely on strict prompts
379
650
  try:
380
- if not uploaded_file_urls and isinstance(text_result, str):
381
- # naive pattern for http links
382
- import re
383
- if re.search(r"https?://", text_result):
384
- logger.warning("Presenter output contains a link but no uploaded URLs exist. Rewriting to prevent hallucinated links.")
385
- text_result = re.sub(r"\(https?://[^)]+\)", "(Download not available)", text_result)
651
+ pass
386
652
  except Exception:
387
653
  pass
388
654
 
655
+ # No post-sanitization here; enforce via presenter prompt only per user request
656
+
389
657
  logger.info(f"🔍 TASK RESULT:\n{text_result}")
658
+
659
+ # Run terminator agent once presenter has produced final text
660
+ try:
661
+ term_messages = []
662
+ term_task = f"""
663
+ Check if the task is completed and output TERMINATE if and only if done.
664
+ Latest presenter output:
665
+ {text_result}
666
+
667
+ Uploaded files (SAS URLs):
668
+ {json.dumps(uploaded_file_urls, indent=2)}
669
+
670
+ TASK:
671
+ {task}
672
+
673
+ Reminder:
674
+ - If the TASK explicitly requires downloadable files, ensure at least one clickable download URL is present.
675
+ - If the TASK does not require files (e.g., simple answer, calculation, summary, troubleshooting), terminate when the presenter has clearly delivered the requested content. Do not require downloads in that case.
676
+ """
677
+ term_stream = terminator_agent.run_stream(task=term_task)
678
+ async for message in term_stream:
679
+ term_messages.append(message)
680
+ if term_messages:
681
+ t_last = term_messages[-1].messages[-1]
682
+ t_text = t_last.content if hasattr(t_last, 'content') else ''
683
+ logger.info(f"🛑 TERMINATOR: {t_text}")
684
+ # If it didn't say TERMINATE but we already have presenter output, proceed anyway
685
+ except Exception as e:
686
+ logger.warning(f"⚠️ Terminator agent failed or unavailable: {e}")
390
687
  final_data = text_result or "🎉 Your task is complete!"
391
688
  await self.progress_tracker.publish_progress(task_id, 1.0, "🎉 Your task is complete!", data=final_data)
689
+ try:
690
+ await self.progress_tracker.mark_final(task_id)
691
+ except Exception:
692
+ pass
392
693
  self.final_progress_sent = True
393
694
 
394
695
  return text_result
@@ -399,6 +700,21 @@ Generate only the progress update:"""
399
700
 
400
701
  async def close(self):
401
702
  """Close all connections gracefully."""
703
+ # Stop background progress worker first to avoid pending task destruction
704
+ try:
705
+ if self._progress_worker_task is not None:
706
+ try:
707
+ self._progress_worker_task.cancel()
708
+ try:
709
+ await self._progress_worker_task
710
+ except asyncio.CancelledError:
711
+ pass
712
+ finally:
713
+ self._progress_worker_task = None
714
+ # Allow GC of the queue
715
+ self._progress_queue = None
716
+ except Exception as e:
717
+ logger.debug(f"Error stopping progress worker: {e}")
402
718
  clients_to_close = [
403
719
  self.o3_model_client,
404
720
  self.o4_mini_model_client,