PyPI - agno - Versions diffs - 2.1.10__py3-none-any.whl → 2.2.1__py3-none-any.whl - Mend

agno 2.1.10py3-none-any.whl → 2.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

agno/agent/agent.py +1594 -1248
agno/knowledge/knowledge.py +11 -0
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +14 -0
agno/knowledge/types.py +1 -0
agno/models/anthropic/claude.py +2 -2
agno/models/base.py +4 -4
agno/models/ollama/chat.py +7 -2
agno/os/app.py +1 -1
agno/os/interfaces/a2a/router.py +2 -2
agno/os/interfaces/agui/router.py +2 -2
agno/os/router.py +7 -7
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/health.py +6 -2
agno/os/routers/knowledge/schemas.py +49 -47
agno/os/routers/memory/schemas.py +16 -16
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +382 -7
agno/os/schema.py +254 -231
agno/os/utils.py +1 -1
agno/run/agent.py +54 -1
agno/run/team.py +48 -0
agno/run/workflow.py +15 -5
agno/session/summary.py +45 -13
agno/session/team.py +90 -5
agno/team/team.py +1130 -849
agno/utils/agent.py +372 -0
agno/utils/events.py +144 -2
agno/utils/message.py +60 -0
agno/utils/print_response/agent.py +10 -6
agno/utils/print_response/team.py +6 -4
agno/utils/print_response/workflow.py +7 -5
agno/utils/team.py +9 -8
agno/workflow/condition.py +17 -9
agno/workflow/loop.py +18 -10
agno/workflow/parallel.py +14 -6
agno/workflow/router.py +16 -8
agno/workflow/step.py +14 -6
agno/workflow/steps.py +14 -6
agno/workflow/workflow.py +331 -123
{agno-2.1.10.dist-info → agno-2.2.1.dist-info}/METADATA +63 -23
{agno-2.1.10.dist-info → agno-2.2.1.dist-info}/RECORD +45 -43
{agno-2.1.10.dist-info → agno-2.2.1.dist-info}/WHEEL +0 -0
{agno-2.1.10.dist-info → agno-2.2.1.dist-info}/licenses/LICENSE +0 -0
{agno-2.1.10.dist-info → agno-2.2.1.dist-info}/top_level.txt +0 -0

agno/agent/agent.py CHANGED Viewed

@@ -64,15 +64,28 @@ from agno.run.cancel import (
 )
 from agno.run.messages import RunMessages
 from agno.run.team import TeamRunOutputEvent
-from agno.session import AgentSession, SessionSummaryManager
+from agno.session import AgentSession, SessionSummaryManager, TeamSession, WorkflowSession
 from agno.tools import Toolkit
 from agno.tools.function import Function
+from agno.utils.agent import (
+    await_for_background_tasks,
+    await_for_background_tasks_stream,
+    collect_joint_audios,
+    collect_joint_files,
+    collect_joint_images,
+    collect_joint_videos,
+    scrub_history_messages_from_run_output,
+    scrub_media_from_run_output,
+    scrub_tool_results_from_run_output,
+    wait_for_background_tasks,
+    wait_for_background_tasks_stream,
+)
 from agno.utils.common import is_typed_dict, validate_typed_dict
 from agno.utils.events import (
-    create_memory_update_completed_event,
-    create_memory_update_started_event,
     create_parser_model_response_completed_event,
     create_parser_model_response_started_event,
+    create_post_hook_completed_event,
+    create_post_hook_started_event,
     create_pre_hook_completed_event,
     create_pre_hook_started_event,
     create_reasoning_completed_event,
@@ -80,13 +93,17 @@ from agno.utils.events import (
     create_reasoning_step_event,
     create_run_cancelled_event,
     create_run_completed_event,
+    create_run_content_completed_event,
     create_run_continued_event,
     create_run_error_event,
     create_run_output_content_event,
     create_run_paused_event,
     create_run_started_event,
+    create_session_summary_completed_event,
+    create_session_summary_started_event,
     create_tool_call_completed_event,
     create_tool_call_started_event,
+    handle_event,
 )
 from agno.utils.hooks import filter_hook_args, normalize_hooks
 from agno.utils.knowledge import get_agentic_or_user_search_filters
@@ -100,7 +117,7 @@ from agno.utils.log import (
     set_log_level_to_info,
 )
 from agno.utils.merge_dict import merge_dictionaries
-from agno.utils.message import get_text_from_message
+from agno.utils.message import filter_tool_calls, get_text_from_message
 from agno.utils.print_response.agent import (
     aprint_response,
     aprint_response_stream,
@@ -186,6 +203,8 @@ class Agent:
     add_history_to_context: bool = False
     # Number of historical runs to include in the messages
     num_history_runs: int = 3
+    # Maximum number of tool calls to include from history (None = no limit)
+    max_tool_calls_from_history: Optional[int] = None
     # --- Knowledge ---
     knowledge: Optional[Knowledge] = None
@@ -332,7 +351,9 @@ class Agent:
     # Stream the response from the Agent
     stream: Optional[bool] = None
     # Stream the intermediate steps from the Agent
-    stream_intermediate_steps: bool = False
+    stream_events: Optional[bool] = None
+    # [Deprecated] Stream the intermediate steps from the Agent
+    stream_intermediate_steps: Optional[bool] = None
     # Persist the events on the run response
     store_events: bool = False
@@ -400,6 +421,7 @@ class Agent:
         session_summary_manager: Optional[SessionSummaryManager] = None,
         add_history_to_context: bool = False,
         num_history_runs: int = 3,
+        max_tool_calls_from_history: Optional[int] = None,
         store_media: bool = True,
         store_tool_messages: bool = True,
         store_history_messages: bool = True,
@@ -456,7 +478,8 @@ class Agent:
         use_json_mode: bool = False,
         save_response_to_file: Optional[str] = None,
         stream: Optional[bool] = None,
-        stream_intermediate_steps: bool = False,
+        stream_events: Optional[bool] = None,
+        stream_intermediate_steps: Optional[bool] = None,
         store_events: bool = False,
         events_to_skip: Optional[List[RunEvent]] = None,
         role: Optional[str] = None,
@@ -500,11 +523,7 @@ class Agent:
         self.add_history_to_context = add_history_to_context
         self.num_history_runs = num_history_runs
-        if add_history_to_context and not db:
-            log_warning(
-                "add_history_to_context is True, but no database has been assigned to the agent. History will not be added to the context."
-            )
+        self.max_tool_calls_from_history = max_tool_calls_from_history
         self.store_media = store_media
         self.store_tool_messages = store_tool_messages
@@ -574,7 +593,7 @@ class Agent:
         self.save_response_to_file = save_response_to_file
         self.stream = stream
-        self.stream_intermediate_steps = stream_intermediate_steps
+        self.stream_events = stream_events or stream_intermediate_steps
         self.store_events = store_events
         self.role = role
@@ -607,6 +626,22 @@ class Agent:
         self._hooks_normalised = False
+        # Lazy-initialized shared thread pool executor for background tasks (memory, cultural knowledge, etc.)
+        self._background_executor: Optional[Any] = None
+    @property
+    def background_executor(self) -> Any:
+        """Lazy initialization of shared thread pool executor for background tasks.
+        Handles both memory creation and cultural knowledge updates concurrently.
+        Initialized only on first use (runtime, not instantiation) and reused across runs.
+        """
+        if self._background_executor is None:
+            from concurrent.futures import ThreadPoolExecutor
+            self._background_executor = ThreadPoolExecutor(max_workers=3, thread_name_prefix="agno-bg")
+        return self._background_executor
     def set_id(self) -> None:
         if self.id is None:
             self.id = generate_id_from_name(self.name)
@@ -779,11 +814,9 @@ class Agent:
     def _initialize_session(
         self,
-        run_id: str,
         session_id: Optional[str] = None,
         user_id: Optional[str] = None,
-        session_state: Optional[Dict[str, Any]] = None,
-    ) -> Tuple[str, Optional[str], Dict[str, Any]]:
+    ) -> Tuple[str, Optional[str]]:
         """Initialize the session for the agent."""
         if session_id is None:
@@ -800,26 +833,23 @@ class Agent:
         if user_id is None or user_id == "":
             user_id = self.user_id
-        # Determine the session_state
-        if session_state is None:
-            session_state = self.session_state or {}
-        else:
-            # If run session_state is provided, merge agent defaults under it
-            # This ensures run state takes precedence over agent defaults
-            if self.session_state:
-                base_state = self.session_state.copy()
-                merge_dictionaries(base_state, session_state)
-                session_state.clear()
-                session_state.update(base_state)
+        return session_id, user_id
-        if user_id is not None:
+    def _initialize_session_state(
+        self,
+        session_state: Dict[str, Any],
+        user_id: Optional[str] = None,
+        session_id: Optional[str] = None,
+        run_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Initialize the session state for the agent."""
+        if user_id:
             session_state["current_user_id"] = user_id
         if session_id is not None:
             session_state["current_session_id"] = session_id
         if run_id is not None:
             session_state["current_run_id"] = run_id
-        return session_id, user_id, session_state  # type: ignore
+        return session_state
     def _run(
         self,
@@ -841,16 +871,18 @@ class Agent:
         Steps:
         1. Execute pre-hooks
-        2. Prepare run messages
-        3. Reason about the task if reasoning is enabled
-        4. Generate a response from the Model (includes running function calls)
-        5. Update the RunOutput with the model response
-        6. Execute post-hooks
-        7. Calculate session metrics
-        8. Optional: Save output to file if save_response_to_file is set
-        9. Add RunOutput to Agent Session
-        10. Update Agent Memory, Cultural Knowledge and Summaries
-        11. Save session to storage
+        2. Determine tools for model
+        3. Prepare run messages
+        4. Start memory creation in background thread
+        5. Reason about the task if reasoning is enabled
+        6. Generate a response from the Model (includes running function calls)
+        7. Update the RunOutput with the model response
+        8. Store media if enabled
+        9. Convert the response to the structured format if needed
+        10. Execute post-hooks
+        11. Wait for background memory creation and cultural knowledge creation
+        12. Create session summary
+        13. Cleanup and store the run response and session
         """
         # Register run for cancellation tracking
@@ -876,6 +908,7 @@ class Agent:
             # Consume the generator without yielding
             deque(pre_hook_iterator, maxlen=0)
+        # 2. Determine tools for model
         self._determine_tools_for_model(
             model=self.model,
             run_response=run_response,
@@ -887,7 +920,7 @@ class Agent:
             knowledge_filters=knowledge_filters,
         )
-        # 2. Prepare run messages
+        # 3. Prepare run messages
         run_messages: RunMessages = self._get_run_messages(
             run_response=run_response,
             input=run_input.input_content,
@@ -911,114 +944,132 @@ class Agent:
         log_debug(f"Agent Run Start: {run_response.run_id}", center=True)
-        # 3. Reason about the task if reasoning is enabled
-        self._handle_reasoning(run_response=run_response, run_messages=run_messages)
+        # Start memory creation on a separate thread (runs concurrently with the main execution loop)
+        memory_future = None
+        # 4. Start memory creation in background thread if memory manager is enabled and agentic memory is disabled
+        if run_messages.user_message is not None and self.memory_manager is not None and not self.enable_agentic_memory:
+            log_debug("Starting memory creation in background thread.")
+            memory_future = self.background_executor.submit(
+                self._make_memories, run_messages=run_messages, user_id=user_id
+            )
-        # Check for cancellation before model call
-        raise_if_cancelled(run_response.run_id)  # type: ignore
+        # Start cultural knowledge creation on a separate thread (runs concurrently with the main execution loop)
+        cultural_knowledge_future = None
+        if (
+            run_messages.user_message is not None
+            and self.culture_manager is not None
+            and self.update_cultural_knowledge
+        ):
+            log_debug("Starting cultural knowledge creation in background thread.")
+            cultural_knowledge_future = self.background_executor.submit(
+                self._make_cultural_knowledge, run_messages=run_messages
+            )
-        # 4. Generate a response from the Model (includes running function calls)
-        self.model = cast(Model, self.model)
-        model_response: ModelResponse = self.model.response(
-            messages=run_messages.messages,
-            tools=self._tools_for_model,
-            functions=self._functions_for_model,
-            tool_choice=self.tool_choice,
-            tool_call_limit=self.tool_call_limit,
-            response_format=response_format,
-            run_response=run_response,
-            send_media_to_model=self.send_media_to_model,
-        )
+        try:
+            raise_if_cancelled(run_response.run_id)  # type: ignore
+            # 5. Reason about the task
+            self._handle_reasoning(run_response=run_response, run_messages=run_messages)
-        # Check for cancellation after model call
-        raise_if_cancelled(run_response.run_id)  # type: ignore
+            # Check for cancellation before model call
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-        # If an output model is provided, generate output using the output model
-        self._generate_response_with_output_model(model_response, run_messages)
+            # 6. Generate a response from the Model (includes running function calls)
+            self.model = cast(Model, self.model)
+            model_response: ModelResponse = self.model.response(
+                messages=run_messages.messages,
+                tools=self._tools_for_model,
+                functions=self._functions_for_model,
+                tool_choice=self.tool_choice,
+                tool_call_limit=self.tool_call_limit,
+                response_format=response_format,
+                run_response=run_response,
+                send_media_to_model=self.send_media_to_model,
+            )
-        # If a parser model is provided, structure the response separately
-        self._parse_response_with_parser_model(model_response, run_messages)
+            # Check for cancellation after model call
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-        # 5. Update the RunOutput with the model response
-        self._update_run_response(
-            model_response=model_response,
-            run_response=run_response,
-            run_messages=run_messages,
-        )
+            # If an output model is provided, generate output using the output model
+            self._generate_response_with_output_model(model_response, run_messages)
-        if self.store_media:
-            self._store_media(run_response, model_response)
+            # If a parser model is provided, structure the response separately
+            self._parse_response_with_parser_model(model_response, run_messages)
-        # We should break out of the run function
-        if any(tool_call.is_paused for tool_call in run_response.tools or []):
-            return self._handle_agent_run_paused(
-                run_response=run_response,
-                run_messages=run_messages,
-                session=session,
-                user_id=user_id,
+            # 7. Update the RunOutput with the model response
+            self._update_run_response(
+                model_response=model_response, run_response=run_response, run_messages=run_messages
             )
-        # Convert the response to the structured format if needed
-        self._convert_response_to_structured_format(run_response)
+            # We should break out of the run function
+            if any(tool_call.is_paused for tool_call in run_response.tools or []):
+                wait_for_background_tasks(
+                    memory_future=memory_future, cultural_knowledge_future=cultural_knowledge_future
+                )
-        # Execute post-hooks after output is generated but before response is returned
-        if self.post_hooks is not None:
-            self._execute_post_hooks(
-                hooks=self.post_hooks,  # type: ignore
-                run_output=run_response,
-                session_state=session_state,
-                dependencies=dependencies,
-                metadata=metadata,
-                session=session,
-                user_id=user_id,
-                debug_mode=debug_mode,
-                **kwargs,
-            )
-        run_response.status = RunStatus.completed
-        # Stop the timer for the Run duration
-        if run_response.metrics:
-            run_response.metrics.stop_timer()
+                return self._handle_agent_run_paused(run_response=run_response, session=session, user_id=user_id)
-        # 7. Calculate session metrics
-        self._update_session_metrics(session=session, run_response=run_response)
+            # 8. Store media if enabled
+            if self.store_media:
+                self._store_media(run_response, model_response)
-        # 8. Optional: Save output to file if save_response_to_file is set
-        self.save_run_response_to_file(
-            run_response=run_response,
-            input=run_messages.user_message,
-            session_id=session.session_id,
-            user_id=user_id,
-        )
+            # 9. Convert the response to the structured format if needed
+            self._convert_response_to_structured_format(run_response)
-        # 9. Add the RunOutput to Agent Session
-        session.upsert_run(run=run_response)
+            # 10. Execute post-hooks after output is generated but before response is returned
+            if self.post_hooks is not None:
+                post_hook_iterator = self._execute_post_hooks(
+                    hooks=self.post_hooks,  # type: ignore
+                    run_output=run_response,
+                    session=session,
+                    user_id=user_id,
+                    session_state=session_state,
+                    dependencies=dependencies,
+                    metadata=metadata,
+                    debug_mode=debug_mode,
+                    **kwargs,
+                )
+                deque(post_hook_iterator, maxlen=0)
-        # 10. Update Agent Memory, Cultural Knowledge and Summaries
-        response_iterator = self._make_memories_cultural_knowledge_and_summaries(
-            run_response=run_response,
-            run_messages=run_messages,
-            session=session,
-            user_id=user_id,
-        )
-        # Consume the response iterator to ensure the memory is updated before the run is completed
-        deque(response_iterator, maxlen=0)
+            # Check for cancellation
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-        # 11. Scrub the stored run based on storage flags
-        if self._scrub_run_output_for_storage(run_response):
-            session.upsert_run(run=run_response)
+            # 11. Wait for background memory creation and cultural knowledge creation
+            wait_for_background_tasks(memory_future=memory_future, cultural_knowledge_future=cultural_knowledge_future)
-        # 12. Save session to memory
-        self.save_session(session=session)
+            # 12. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                session.upsert_run(run=run_response)
+                try:
+                    self.session_summary_manager.create_session_summary(session=session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
+            run_response.status = RunStatus.completed
-        # Log Agent Telemetry
-        self._log_agent_telemetry(session_id=session.session_id, run_id=run_response.run_id)
+            # 13. Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
-        log_debug(f"Agent Run End: {run_response.run_id}", center=True, symbol="*")
+            # Log Agent Telemetry
+            self._log_agent_telemetry(session_id=session.session_id, run_id=run_response.run_id)
-        # Always clean up the run tracking
-        cleanup_run(run_response.run_id)  # type: ignore
+            log_debug(f"Agent Run End: {run_response.run_id}", center=True, symbol="*")
-        return run_response
+            return run_response
+        except RunCancelledException as e:
+            # Handle run cancellation
+            log_info(f"Run {run_response.run_id} was cancelled")
+            run_response.content = str(e)
+            run_response.status = RunStatus.cancelled
+            # Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
+            return run_response
+        finally:
+            # Always clean up the run tracking
+            cleanup_run(run_response.run_id)  # type: ignore
     def _run_stream(
         self,
@@ -1033,7 +1084,7 @@ class Agent:
         metadata: Optional[Dict[str, Any]] = None,
         dependencies: Optional[Dict[str, Any]] = None,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
         yield_run_response: bool = False,
         debug_mode: Optional[bool] = None,
         **kwargs: Any,
@@ -1042,15 +1093,15 @@ class Agent:
         Steps:
         1. Execute pre-hooks
-        2. Prepare run messages
-        3. Reason about the task if reasoning is enabled
-        4. Generate a response from the Model (includes running function calls)
-        5. Calculate session metrics
-        6. Optional: Save output to file if save_response_to_file is set
-        7. Add the RunOutput to the Agent Session
-        8. Update Agent Memory, Cultural Knowledge and Summaries
-        9. Create the run completed event
-        10. Save session to storage
+        2. Determine tools for model
+        3. Prepare run messages
+        4. Start memory creation in background thread
+        5. Reason about the task if reasoning is enabled
+        6. Process model response
+        7. Parse response with parser model if provided
+        8. Wait for background memory creation and cultural knowledge creation
+        9. Create session summary
+        10. Cleanup and store the run response and session
         """
         # Register run for cancellation tracking
@@ -1076,6 +1127,7 @@ class Agent:
             for event in pre_hook_iterator:
                 yield event
+        # 2. Determine tools for model
         self._determine_tools_for_model(
             model=self.model,
             run_response=run_response,
@@ -1087,7 +1139,7 @@ class Agent:
             knowledge_filters=knowledge_filters,
         )
-        # 2. Prepare run messages
+        # 3. Prepare run messages
         run_messages: RunMessages = self._get_run_messages(
             run_response=run_response,
             input=run_input.input_content,
@@ -1111,25 +1163,55 @@ class Agent:
         log_debug(f"Agent Run Start: {run_response.run_id}", center=True)
+        # Start memory creation on a separate thread (runs concurrently with the main execution loop)
+        memory_future = None
+        # 4. Start memory creation in background thread if memory manager is enabled and agentic memory is disabled
+        if run_messages.user_message is not None and self.memory_manager is not None and not self.enable_agentic_memory:
+            log_debug("Starting memory creation in background thread.")
+            memory_future = self.background_executor.submit(
+                self._make_memories, run_messages=run_messages, user_id=user_id
+            )
+        # Start cultural knowledge creation on a separate thread (runs concurrently with the main execution loop)
+        cultural_knowledge_future = None
+        if (
+            run_messages.user_message is not None
+            and self.culture_manager is not None
+            and self.update_cultural_knowledge
+        ):
+            log_debug("Starting cultural knowledge creation in background thread.")
+            cultural_knowledge_future = self.background_executor.submit(
+                self._make_cultural_knowledge, run_messages=run_messages
+            )
         try:
             # Start the Run by yielding a RunStarted event
-            if stream_intermediate_steps:
-                yield self._handle_event(create_run_started_event(run_response), run_response)
+            if stream_events:
+                yield handle_event(  # type: ignore
+                    create_run_started_event(run_response),
+                    run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
-            # 3. Reason about the task if reasoning is enabled
-            yield from self._handle_reasoning_stream(run_response=run_response, run_messages=run_messages)
+            # 5. Reason about the task if reasoning is enabled
+            yield from self._handle_reasoning_stream(
+                run_response=run_response,
+                run_messages=run_messages,
+                stream_events=stream_events,
+            )
             # Check for cancellation before model processing
             raise_if_cancelled(run_response.run_id)  # type: ignore
-            # 4. Generate a response from the Model (includes running function calls)
+            # 6. Process model response
             if self.output_model is None:
                 for event in self._handle_model_response_stream(
                     session=session,
                     run_response=run_response,
                     run_messages=run_messages,
                     response_format=response_format,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     yield event
@@ -1144,11 +1226,11 @@ class Agent:
                     run_response=run_response,
                     run_messages=run_messages,
                     response_format=response_format,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     if isinstance(event, RunContentEvent):
-                        if stream_intermediate_steps:
+                        if stream_events:
                             yield IntermediateRunContentEvent(
                                 content=event.content,
                                 content_type=event.content_type,
@@ -1161,7 +1243,7 @@ class Agent:
                     session=session,
                     run_response=run_response,
                     run_messages=run_messages,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     yield event
@@ -1169,26 +1251,40 @@ class Agent:
             # Check for cancellation after model processing
             raise_if_cancelled(run_response.run_id)  # type: ignore
-            # If a parser model is provided, structure the response separately
+            # 7. Parse response with parser model if provided
             yield from self._parse_response_with_parser_model_stream(
-                session=session,
-                run_response=run_response,
-                stream_intermediate_steps=stream_intermediate_steps,
+                session=session, run_response=run_response, stream_events=stream_events
             )
             # We should break out of the run function
             if any(tool_call.is_paused for tool_call in run_response.tools or []):
-                yield from self._handle_agent_run_paused_stream(
+                yield from wait_for_background_tasks_stream(
+                    memory_future=memory_future,
+                    cultural_knowledge_future=cultural_knowledge_future,
+                    stream_events=stream_events,
                     run_response=run_response,
-                    run_messages=run_messages,
-                    session=session,
-                    user_id=user_id,
+                    events_to_skip=self.events_to_skip,
+                    store_events=self.store_events,
+                )
+                # Handle the paused run
+                yield from self._handle_agent_run_paused_stream(
+                    run_response=run_response, session=session, user_id=user_id
                 )
                 return
+            # Yield RunContentCompletedEvent
+            if stream_events:
+                yield handle_event(  # type: ignore
+                    create_run_content_completed_event(from_run_response=run_response),
+                    run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
             # Execute post-hooks after output is generated but before response is returned
             if self.post_hooks is not None:
-                self._execute_post_hooks(
+                yield from self._execute_post_hooks(
                     hooks=self.post_hooks,  # type: ignore
                     run_output=run_response,
                     session_state=session_state,
@@ -1200,48 +1296,56 @@ class Agent:
                     **kwargs,
                 )
-            run_response.status = RunStatus.completed
-            # Set the run duration
-            if run_response.metrics:
-                run_response.metrics.stop_timer()
-            # 5. Calculate session metrics
-            self._update_session_metrics(session=session, run_response=run_response)
-            # 6. Optional: Save output to file if save_response_to_file is set
-            self.save_run_response_to_file(
+            # 8. Wait for background memory creation and cultural knowledge creation
+            yield from wait_for_background_tasks_stream(
+                memory_future=memory_future,
+                cultural_knowledge_future=cultural_knowledge_future,
+                stream_events=stream_events,
                 run_response=run_response,
-                input=run_messages.user_message,
-                session_id=session.session_id,
-                user_id=user_id,
             )
-            # 7. Add RunOutput to Agent Session
-            session.upsert_run(run=run_response)
+            # 9. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                session.upsert_run(run=run_response)
-            # 8. Update Agent Memory, Cultural Knowledge and Summaries
-            yield from self._make_memories_cultural_knowledge_and_summaries(
-                run_response=run_response,
-                run_messages=run_messages,
-                session=session,
-                user_id=user_id,
-            )
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_started_event(from_run_response=run_response),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
+                try:
+                    self.session_summary_manager.create_session_summary(session=session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_completed_event(
+                            from_run_response=run_response, session_summary=session.summary
+                        ),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
-            # 9. Create the run completed event
-            completed_event = self._handle_event(
-                create_run_completed_event(from_run_response=run_response), run_response
+            # Create the run completed event
+            completed_event = handle_event(  # type: ignore
+                create_run_completed_event(from_run_response=run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
-            # 10. Scrub the stored run based on storage flags
-            if self._scrub_run_output_for_storage(run_response):
-                session.upsert_run(run=run_response)
+            # Set the run status to completed
+            run_response.status = RunStatus.completed
-            # 11. Save session to storage
-            self.save_session(session=session)
+            # 10. Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
-            if stream_intermediate_steps:
-                yield completed_event
+            if stream_events:
+                yield completed_event  # type: ignore
             if yield_run_response:
                 yield run_response
@@ -1258,14 +1362,15 @@ class Agent:
             run_response.content = str(e)
             # Yield the cancellation event
-            yield self._handle_event(
+            yield handle_event(  # type: ignore
                 create_run_cancelled_event(from_run_response=run_response, reason=str(e)),
                 run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
-            # Add the RunOutput to Agent Session even when cancelled
-            session.upsert_run(run=run_response)
-            self.save_session(session=session)
+            # Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
         finally:
             # Always clean up the run tracking
             cleanup_run(run_response.run_id)  # type: ignore
@@ -1276,6 +1381,7 @@ class Agent:
         input: Union[str, List, Dict, Message, BaseModel, List[Message]],
         *,
         stream: Literal[False] = False,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         user_id: Optional[str] = None,
         session_id: Optional[str] = None,
@@ -1301,6 +1407,7 @@ class Agent:
         input: Union[str, List, Dict, Message, BaseModel, List[Message]],
         *,
         stream: Literal[True] = True,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         user_id: Optional[str] = None,
         session_id: Optional[str] = None,
@@ -1326,6 +1433,7 @@ class Agent:
         input: Union[str, List, Dict, Message, BaseModel, List[Message]],
         *,
         stream: Optional[bool] = None,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         user_id: Optional[str] = None,
         session_id: Optional[str] = None,
@@ -1351,6 +1459,11 @@ class Agent:
                 "`run` method is not supported with an async database. Please use `arun` method instead."
             )
+        if (add_history_to_context or self.add_history_to_context) and not self.db and not self.team_id:
+            log_warning(
+                "add_history_to_context is True, but no database has been assigned to the agent. History will not be added to the context."
+            )
         # Create a run_id for this specific run
         run_id = str(uuid4())
@@ -1365,12 +1478,7 @@ class Agent:
                 self.post_hooks = normalize_hooks(self.post_hooks)
             self._hooks_normalised = True
-        session_id, user_id, session_state = self._initialize_session(
-            run_id=run_id,
-            session_id=session_id,
-            user_id=user_id,
-            session_state=session_state,
-        )
+        session_id, user_id = self._initialize_session(session_id=session_id, user_id=user_id)
         # Initialize the Agent
         self.initialize_agent(debug_mode=debug_mode)
@@ -1392,15 +1500,19 @@ class Agent:
         agent_session = self._read_or_create_session(session_id=session_id, user_id=user_id)
         self._update_metadata(session=agent_session)
+        # Initialize session state
+        session_state = self._initialize_session_state(
+            session_state=session_state or {}, user_id=user_id, session_id=session_id, run_id=run_id
+        )
         # Update session state from DB
         session_state = self._load_session_state(session=agent_session, session_state=session_state)
         # Determine runtime dependencies
         run_dependencies = dependencies if dependencies is not None else self.dependencies
         # Resolve dependencies
         if run_dependencies is not None:
             self._resolve_run_dependencies(dependencies=run_dependencies)
         add_dependencies = (
             add_dependencies_to_context if add_dependencies_to_context is not None else self.add_dependencies_to_context
         )
@@ -1422,17 +1534,18 @@ class Agent:
         if stream is None:
             stream = False if self.stream is None else self.stream
-        if stream_intermediate_steps is None:
-            stream_intermediate_steps = (
-                False if self.stream_intermediate_steps is None else self.stream_intermediate_steps
-            )
+        # Considering both stream_events and stream_intermediate_steps (deprecated)
+        stream_events = stream_events or stream_intermediate_steps
-        # Can't have stream_intermediate_steps if stream is False
+        # Can't stream events if streaming is disabled
         if stream is False:
-            stream_intermediate_steps = False
+            stream_events = False
+        if stream_events is None:
+            stream_events = False if self.stream_events is None else self.stream_events
         self.stream = self.stream or stream
-        self.stream_intermediate_steps = self.stream_intermediate_steps or (stream_intermediate_steps and self.stream)
+        self.stream_events = self.stream_events or stream_events
         # Prepare arguments for the model
         response_format = self._get_response_format() if self.parser_model is None else None
@@ -1484,7 +1597,7 @@ class Agent:
                         metadata=metadata,
                         dependencies=run_dependencies,
                         response_format=response_format,
-                        stream_intermediate_steps=stream_intermediate_steps,
+                        stream_events=stream_events,
                         yield_run_response=yield_run_response,
                         debug_mode=debug_mode,
                         **kwargs,
@@ -1523,17 +1636,6 @@ class Agent:
                     import time
                     time.sleep(delay)
-            except RunCancelledException as e:
-                # Handle run cancellation
-                log_info(f"Run {run_response.run_id} was cancelled")
-                run_response.content = str(e)
-                run_response.status = RunStatus.cancelled
-                # Add the RunOutput to Agent Session even when cancelled
-                agent_session.upsert_run(run=run_response)
-                self.save_session(session=agent_session)
-                return run_response
             except KeyboardInterrupt:
                 run_response.content = "Operation cancelled by user"
                 run_response.status = RunStatus.cancelled
@@ -1578,7 +1680,7 @@ class Agent:
         debug_mode: Optional[bool] = None,
         **kwargs: Any,
     ) -> RunOutput:
-        """Run the Agent and yield the RunOutput.
+        """Run the Agent and return the RunOutput.
         Steps:
         1. Read or create session
@@ -1587,14 +1689,16 @@ class Agent:
         4. Execute pre-hooks
         5. Determine tools for model
         6. Prepare run messages
-        7. Reason about the task if reasoning is enabled
-        8. Generate a response from the Model (includes running function calls)
-        9. Update the RunOutput with the model response
-        10. Execute post-hooks
-        11. Add RunOutput to Agent Session
-        12. Update Agent Memory
-        13. Scrub the stored run if needed
-        14. Save session to storage
+        7. Start memory creation in background task
+        8. Reason about the task if reasoning is enabled
+        9. Generate a response from the Model (includes running function calls)
+        10. Update the RunOutput with the model response
+        11. Convert response to structured format
+        12. Store media if enabled
+        13. Execute post-hooks
+        14. Wait for background memory creation
+        15. Create session summary
+        16. Cleanup and store (scrub, stop timer, save to file, add to session, calculate metrics, save session)
         """
         log_debug(f"Agent Run Start: {run_response.run_id}", center=True)
@@ -1602,13 +1706,15 @@ class Agent:
         register_run(run_response.run_id)  # type: ignore
         # 1. Read or create session. Reads from the database if provided.
-        if self._has_async_db():
-            agent_session = await self._aread_or_create_session(session_id=session_id, user_id=user_id)
-        else:
-            agent_session = self._read_or_create_session(session_id=session_id, user_id=user_id)
+        agent_session = await self._aread_or_create_session(session_id=session_id, user_id=user_id)
         # 2. Update metadata and session state
         self._update_metadata(session=agent_session)
+        # Initialize session state
+        session_state = self._initialize_session_state(
+            session_state=session_state or {}, user_id=user_id, session_id=session_id, run_id=run_response.run_id
+        )
+        # Update session state from DB
         if session_state is not None:
             session_state = self._load_session_state(session=agent_session, session_state=session_state)
@@ -1672,12 +1778,37 @@ class Agent:
         if len(run_messages.messages) == 0:
             log_error("No messages to be sent to the model.")
+        # 7. Start memory creation as a background task (runs concurrently with the main execution)
+        memory_task = None
+        if run_messages.user_message is not None and self.memory_manager is not None and not self.enable_agentic_memory:
+            import asyncio
+            log_debug("Starting memory creation in background task.")
+            memory_task = asyncio.create_task(self._amake_memories(run_messages=run_messages, user_id=user_id))
+        # Start cultural knowledge creation on a separate thread (runs concurrently with the main execution loop)
+        cultural_knowledge_task = None
+        if (
+            run_messages.user_message is not None
+            and self.culture_manager is not None
+            and self.update_cultural_knowledge
+        ):
+            import asyncio
+            log_debug("Starting cultural knowledge creation in background thread.")
+            cultural_knowledge_task = asyncio.create_task(self._acreate_cultural_knowledge(run_messages=run_messages))
         try:
-            # 7. Reason about the task if reasoning is enabled
+            # Check for cancellation before model call
+            raise_if_cancelled(run_response.run_id)  # type: ignore
+            # 8. Reason about the task if reasoning is enabled
             await self._ahandle_reasoning(run_response=run_response, run_messages=run_messages)
+            # Check for cancellation before model call
             raise_if_cancelled(run_response.run_id)  # type: ignore
-            # 8. Generate a response from the Model (includes running function calls)
+            # 9. Generate a response from the Model (includes running function calls)
             model_response: ModelResponse = await self.model.aresponse(
                 messages=run_messages.messages,
                 tools=self._tools_for_model,
@@ -1687,43 +1818,42 @@ class Agent:
                 response_format=response_format,
                 send_media_to_model=self.send_media_to_model,
             )
+            # Check for cancellation after model call
             raise_if_cancelled(run_response.run_id)  # type: ignore
             # If an output model is provided, generate output using the output model
             await self._agenerate_response_with_output_model(model_response=model_response, run_messages=run_messages)
             # If a parser model is provided, structure the response separately
             await self._aparse_response_with_parser_model(model_response=model_response, run_messages=run_messages)
-            # 9. Update the RunOutput with the model response
+            # 10. Update the RunOutput with the model response
             self._update_run_response(
                 model_response=model_response,
                 run_response=run_response,
                 run_messages=run_messages,
             )
-            # Optional: Store media
-            if self.store_media:
-                self._store_media(run_response, model_response)
-            # Break out of the run function if a tool call is paused
+            # We should break out of the run function
             if any(tool_call.is_paused for tool_call in run_response.tools or []):
-                return self._handle_agent_run_paused(
-                    run_response=run_response,
-                    run_messages=run_messages,
-                    session=agent_session,
-                    user_id=user_id,
+                await await_for_background_tasks(
+                    memory_task=memory_task, cultural_knowledge_task=cultural_knowledge_task
+                )
+                return await self._ahandle_agent_run_paused(
+                    run_response=run_response, session=agent_session, user_id=user_id
                 )
-            raise_if_cancelled(run_response.run_id)  # type: ignore
-            # 10. Calculate session metrics
-            self._update_session_metrics(session=agent_session, run_response=run_response)
-            # Convert the response to the structured format if needed
+            # 11. Convert the response to the structured format if needed
             self._convert_response_to_structured_format(run_response)
-            # Execute post-hooks (after output is generated but before response is returned)
+            # 12. Store media if enabled
+            if self.store_media:
+                self._store_media(run_response, model_response)
+            # 13. Execute post-hooks (after output is generated but before response is returned)
             if self.post_hooks is not None:
-                await self._aexecute_post_hooks(
+                async for _ in self._aexecute_post_hooks(
                     hooks=self.post_hooks,  # type: ignore
                     run_output=run_response,
                     session_state=session_state,
@@ -1733,44 +1863,28 @@ class Agent:
                     user_id=user_id,
                     debug_mode=debug_mode,
                     **kwargs,
-                )
+                ):
+                    pass
-            # Set the run status to completed
-            run_response.status = RunStatus.completed
+            # Check for cancellation
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-            # Set the run duration
-            if run_response.metrics:
-                run_response.metrics.stop_timer()
+            # 14. Wait for background memory creation
+            await await_for_background_tasks(memory_task=memory_task, cultural_knowledge_task=cultural_knowledge_task)
-            # Optional: Save output to file if save_response_to_file is set
-            self.save_run_response_to_file(
-                run_response=run_response,
-                input=run_messages.user_message,
-                session_id=agent_session.session_id,
-                user_id=user_id,
-            )
+            # 15. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                agent_session.upsert_run(run=run_response)
+                try:
+                    await self.session_summary_manager.acreate_session_summary(session=agent_session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
-            # 11. Add RunOutput to Agent Session
-            agent_session.upsert_run(run=run_response)
+            run_response.status = RunStatus.completed
-            # 12. Update Agent Memory, Cultural Knowledge and Summaries
-            async for _ in self._amake_memories_cultural_knowledge_and_summaries(
-                run_response=run_response,
-                run_messages=run_messages,
-                session=agent_session,
-                user_id=user_id,
-            ):
-                pass
-            # 13. Scrub the stored run based on storage flags
-            if self._scrub_run_output_for_storage(run_response):
-                agent_session.upsert_run(run=run_response)
-            # 14. Save session to storage
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # 16. Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
             # Log Agent Telemetry
             await self._alog_agent_telemetry(session_id=agent_session.session_id, run_id=run_response.run_id)
@@ -1785,16 +1899,30 @@ class Agent:
             run_response.content = str(e)
             run_response.status = RunStatus.cancelled
-            # Update the Agent Session before exiting
-            agent_session.upsert_run(run=run_response)
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
             return run_response
         finally:
+            # Cancel the memory task if it's still running
+            if memory_task is not None and not memory_task.done():
+                import asyncio
+                memory_task.cancel()
+                try:
+                    await memory_task
+                except asyncio.CancelledError:
+                    pass
+            # Cancel the cultural knowledge task if it's still running
+            if cultural_knowledge_task is not None and not cultural_knowledge_task.done():
+                import asyncio
+                cultural_knowledge_task.cancel()
+                try:
+                    await cultural_knowledge_task
+                except asyncio.CancelledError:
+                    pass
             # Always clean up the run tracking
             cleanup_run(run_response.run_id)  # type: ignore
@@ -1811,7 +1939,7 @@ class Agent:
         add_session_state_to_context: Optional[bool] = None,
         metadata: Optional[Dict[str, Any]] = None,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
         yield_run_response: Optional[bool] = None,
         debug_mode: Optional[bool] = None,
         **kwargs: Any,
@@ -1825,28 +1953,35 @@ class Agent:
         4. Execute pre-hooks
         5. Determine tools for model
         6. Prepare run messages
-        7. Reason about the task if reasoning is enabled
-        8. Generate a response from the Model (includes running function calls)
-        9. Calculate session metrics
-        10. Add RunOutput to Agent Session
-        11. Update Agent Memory
-        12. Create the run completed event
-        13. Save session to storage
+        7. Start memory creation in background task
+        8. Reason about the task if reasoning is enabled
+        9. Generate a response from the Model (includes running function calls)
+        10. Parse response with parser model if provided
+        11. Wait for background memory creation
+        12. Create session summary
+        13. Cleanup and store (scrub, stop timer, save to file, add to session, calculate metrics, save session)
         """
         log_debug(f"Agent Run Start: {run_response.run_id}", center=True)
         # Start the Run by yielding a RunStarted event
-        if stream_intermediate_steps:
-            yield self._handle_event(create_run_started_event(run_response), run_response)
+        if stream_events:
+            yield handle_event(  # type: ignore
+                create_run_started_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
         # 1. Read or create session. Reads from the database if provided.
-        if self._has_async_db():
-            agent_session = await self._aread_or_create_session(session_id=session_id, user_id=user_id)
-        else:
-            agent_session = self._read_or_create_session(session_id=session_id, user_id=user_id)
+        agent_session = await self._aread_or_create_session(session_id=session_id, user_id=user_id)
         # 2. Update metadata and session state
         self._update_metadata(session=agent_session)
+        # Initialize session state
+        session_state = self._initialize_session_state(
+            session_state=session_state or {}, user_id=user_id, session_id=session_id, run_id=run_response.run_id
+        )
+        # Update session state from DB
         if session_state is not None:
             session_state = self._load_session_state(session=agent_session, session_state=session_state)
@@ -1864,6 +1999,9 @@ class Agent:
                 run_response=run_response,
                 run_input=run_input,
                 session=agent_session,
+                session_state=session_state,
+                dependencies=dependencies,
+                metadata=metadata,
                 user_id=user_id,
                 debug_mode=debug_mode,
                 **kwargs,
@@ -1906,24 +2044,49 @@ class Agent:
         if len(run_messages.messages) == 0:
             log_error("No messages to be sent to the model.")
+        # 7. Start memory creation as a background task (runs concurrently with the main execution)
+        memory_task = None
+        if run_messages.user_message is not None and self.memory_manager is not None and not self.enable_agentic_memory:
+            import asyncio
+            log_debug("Starting memory creation in background task.")
+            memory_task = asyncio.create_task(self._amake_memories(run_messages=run_messages, user_id=user_id))
+        # Start cultural knowledge creation on a separate thread (runs concurrently with the main execution loop)
+        cultural_knowledge_task = None
+        if (
+            run_messages.user_message is not None
+            and self.culture_manager is not None
+            and self.update_cultural_knowledge
+        ):
+            import asyncio
+            log_debug("Starting cultural knowledge creation in background task.")
+            cultural_knowledge_task = asyncio.create_task(self._acreate_cultural_knowledge(run_messages=run_messages))
         # Register run for cancellation tracking
         register_run(run_response.run_id)  # type: ignore
         try:
-            # 7. Reason about the task if reasoning is enabled
-            async for item in self._ahandle_reasoning_stream(run_response=run_response, run_messages=run_messages):
+            # 8. Reason about the task if reasoning is enabled
+            async for item in self._ahandle_reasoning_stream(
+                run_response=run_response,
+                run_messages=run_messages,
+                stream_events=stream_events,
+            ):
                 raise_if_cancelled(run_response.run_id)  # type: ignore
                 yield item
             raise_if_cancelled(run_response.run_id)  # type: ignore
-            # 8. Generate a response from the Model
+            # 9. Generate a response from the Model
             if self.output_model is None:
                 async for event in self._ahandle_model_response_stream(
                     session=agent_session,
                     run_response=run_response,
                     run_messages=run_messages,
                     response_format=response_format,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     yield event
@@ -1938,11 +2101,11 @@ class Agent:
                     run_response=run_response,
                     run_messages=run_messages,
                     response_format=response_format,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     if isinstance(event, RunContentEvent):
-                        if stream_intermediate_steps:
+                        if stream_events:
                             yield IntermediateRunContentEvent(
                                 content=event.content,
                                 content_type=event.content_type,
@@ -1955,7 +2118,7 @@ class Agent:
                     session=agent_session,
                     run_response=run_response,
                     run_messages=run_messages,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     yield event
@@ -1963,28 +2126,39 @@ class Agent:
             # Check for cancellation after model processing
             raise_if_cancelled(run_response.run_id)  # type: ignore
-            # If a parser model is provided, structure the response separately
+            # 10. Parse response with parser model if provided
             async for event in self._aparse_response_with_parser_model_stream(
-                session=agent_session,
-                run_response=run_response,
-                stream_intermediate_steps=stream_intermediate_steps,
+                session=agent_session, run_response=run_response, stream_events=stream_events
             ):
                 yield event
+            if stream_events:
+                yield handle_event(  # type: ignore
+                    create_run_content_completed_event(from_run_response=run_response),
+                    run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
             # Break out of the run function if a tool call is paused
             if any(tool_call.is_paused for tool_call in run_response.tools or []):
-                for item in self._handle_agent_run_paused_stream(
+                async for item in await_for_background_tasks_stream(
+                    memory_task=memory_task,
+                    cultural_knowledge_task=cultural_knowledge_task,
+                    stream_events=stream_events,
                     run_response=run_response,
-                    run_messages=run_messages,
-                    session=agent_session,
-                    user_id=user_id,
+                ):
+                    yield item
+                async for item in self._ahandle_agent_run_paused_stream(
+                    run_response=run_response, session=agent_session, user_id=user_id
                 ):
                     yield item
                 return
             # Execute post-hooks (after output is generated but before response is returned)
             if self.post_hooks is not None:
-                await self._aexecute_post_hooks(
+                async for event in self._aexecute_post_hooks(
                     hooks=self.post_hooks,  # type: ignore
                     run_output=run_response,
                     session_state=session_state,
@@ -1994,55 +2168,62 @@ class Agent:
                     user_id=user_id,
                     debug_mode=debug_mode,
                     **kwargs,
-                )
-            # Set the run status to completed
-            run_response.status = RunStatus.completed
-            # Set the run duration
-            if run_response.metrics:
-                run_response.metrics.stop_timer()
-            # 9. Calculate session metrics
-            self._update_session_metrics(session=agent_session, run_response=run_response)
+                ):
+                    yield event
-            # Optional: Save output to file if save_response_to_file is set
-            self.save_run_response_to_file(
+            # 11. Wait for background memory creation
+            async for item in await_for_background_tasks_stream(
+                memory_task=memory_task,
+                cultural_knowledge_task=cultural_knowledge_task,
+                stream_events=stream_events,
                 run_response=run_response,
-                input=run_messages.user_message,
-                session_id=agent_session.session_id,
-                user_id=user_id,
-            )
+                events_to_skip=self.events_to_skip,
+                store_events=self.store_events,
+            ):
+                yield item
-            # 10. Add RunOutput to Agent Session
-            agent_session.upsert_run(run=run_response)
+            # 12. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                agent_session.upsert_run(run=run_response)
-            # 11. Update Agent Memory
-            async for event in self._amake_memories_cultural_knowledge_and_summaries(
-                run_response=run_response,
-                run_messages=run_messages,
-                session=agent_session,
-                user_id=user_id,
-            ):
-                yield event
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_started_event(from_run_response=run_response),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
+                try:
+                    await self.session_summary_manager.acreate_session_summary(session=agent_session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_completed_event(
+                            from_run_response=run_response, session_summary=agent_session.summary
+                        ),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
-            # 12. Create the run completed event
-            completed_event = self._handle_event(
-                create_run_completed_event(from_run_response=run_response), run_response
+            # Create the run completed event
+            completed_event = handle_event(
+                create_run_completed_event(from_run_response=run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
-            # 13. Scrub the stored run based on storage flags
-            if self._scrub_run_output_for_storage(run_response):
-                agent_session.upsert_run(run=run_response)
+            # Set the run status to completed
+            run_response.status = RunStatus.completed
-            # 14. Save session to storage
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # 13. Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
-            if stream_intermediate_steps:
-                yield completed_event
+            if stream_events:
+                yield completed_event  # type: ignore
             if yield_run_response:
                 yield run_response
@@ -2059,18 +2240,31 @@ class Agent:
             run_response.content = str(e)
             # Yield the cancellation event
-            yield self._handle_event(
+            yield handle_event(  # type: ignore
                 create_run_cancelled_event(from_run_response=run_response, reason=str(e)),
                 run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
-            # Add the RunOutput to Agent Session even when cancelled
-            agent_session.upsert_run(run=run_response)
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
         finally:
+            # Cancel the memory task if it's still running
+            if memory_task is not None and not memory_task.done():
+                memory_task.cancel()
+                try:
+                    await memory_task
+                except asyncio.CancelledError:
+                    pass
+            if cultural_knowledge_task is not None and not cultural_knowledge_task.done():
+                cultural_knowledge_task.cancel()
+                try:
+                    await cultural_knowledge_task
+                except asyncio.CancelledError:
+                    pass
             # Always clean up the run tracking
             cleanup_run(run_response.run_id)  # type: ignore
@@ -2087,6 +2281,7 @@ class Agent:
         images: Optional[Sequence[Image]] = None,
         videos: Optional[Sequence[Video]] = None,
         files: Optional[Sequence[File]] = None,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         retries: Optional[int] = None,
         knowledge_filters: Optional[Dict[str, Any]] = None,
@@ -2111,6 +2306,7 @@ class Agent:
         images: Optional[Sequence[Image]] = None,
         videos: Optional[Sequence[Video]] = None,
         files: Optional[Sequence[File]] = None,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         retries: Optional[int] = None,
         knowledge_filters: Optional[Dict[str, Any]] = None,
@@ -2136,6 +2332,7 @@ class Agent:
         images: Optional[Sequence[Image]] = None,
         videos: Optional[Sequence[Video]] = None,
         files: Optional[Sequence[File]] = None,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         retries: Optional[int] = None,
         knowledge_filters: Optional[Dict[str, Any]] = None,
@@ -2150,6 +2347,11 @@ class Agent:
     ) -> Union[RunOutput, AsyncIterator[RunOutputEvent]]:
         """Async Run the Agent and return the response."""
+        if (add_history_to_context or self.add_history_to_context) and not self.db and not self.team_id:
+            log_warning(
+                "add_history_to_context is True, but no database has been assigned to the agent. History will not be added to the context."
+            )
         # Create a run_id for this specific run
         run_id = str(uuid4())
@@ -2165,12 +2367,7 @@ class Agent:
             self._hooks_normalised = True
         # Initialize session
-        session_id, user_id, session_state = self._initialize_session(
-            run_id=run_id,
-            session_id=session_id,
-            user_id=user_id,
-            session_state=session_state,
-        )
+        session_id, user_id = self._initialize_session(session_id=session_id, user_id=user_id)
         # Initialize the Agent
         self.initialize_agent(debug_mode=debug_mode)
@@ -2204,17 +2401,18 @@ class Agent:
         if stream is None:
             stream = False if self.stream is None else self.stream
-        if stream_intermediate_steps is None:
-            stream_intermediate_steps = (
-                False if self.stream_intermediate_steps is None else self.stream_intermediate_steps
-            )
+        # Considering both stream_events and stream_intermediate_steps (deprecated)
+        stream_events = stream_events or stream_intermediate_steps
-        # Can't have stream_intermediate_steps if stream is False
+        # Can't stream events if streaming is disabled
         if stream is False:
-            stream_intermediate_steps = False
+            stream_events = False
+        if stream_events is None:
+            stream_events = False if self.stream_events is None else self.stream_events
         self.stream = self.stream or stream
-        self.stream_intermediate_steps = self.stream_intermediate_steps or (stream_intermediate_steps and self.stream)
+        self.stream_events = self.stream_events or stream_events
         # Prepare arguments for the model
         response_format = self._get_response_format() if self.parser_model is None else None
@@ -2264,7 +2462,7 @@ class Agent:
                         run_response=run_response,
                         user_id=user_id,
                         response_format=response_format,
-                        stream_intermediate_steps=stream_intermediate_steps,
+                        stream_events=stream_events,
                         yield_run_response=yield_run_response,
                         dependencies=run_dependencies,
                         session_id=session_id,
@@ -2346,6 +2544,7 @@ class Agent:
         run_id: Optional[str] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
         stream: Literal[False] = False,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         user_id: Optional[str] = None,
         session_id: Optional[str] = None,
@@ -2364,6 +2563,7 @@ class Agent:
         run_id: Optional[str] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
         stream: Literal[True] = True,
+        stream_events: Optional[bool] = False,
         stream_intermediate_steps: Optional[bool] = None,
         user_id: Optional[str] = None,
         session_id: Optional[str] = None,
@@ -2381,6 +2581,7 @@ class Agent:
         run_id: Optional[str] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
         stream: Optional[bool] = None,
+        stream_events: Optional[bool] = False,
         stream_intermediate_steps: Optional[bool] = None,
         user_id: Optional[str] = None,
         session_id: Optional[str] = None,
@@ -2398,13 +2599,15 @@ class Agent:
             run_id: The run id to continue. Alternative to passing run_response.
             updated_tools: The updated tools to use for the run. Required to be used with `run_id`.
             stream: Whether to stream the response.
-            stream_intermediate_steps: Whether to stream the intermediate steps.
+            stream_events: Whether to stream all events.
             user_id: The user id to continue the run for.
             session_id: The session id to continue the run for.
             retries: The number of retries to continue the run for.
             knowledge_filters: The knowledge filters to use for the run.
             dependencies: The dependencies to use for the run.
+            metadata: The metadata to use for the run.
             debug_mode: Whether to enable debug mode.
+            (deprecated) stream_intermediate_steps: Whether to stream all steps.
         """
         if run_response is None and run_id is None:
             raise ValueError("Either run_response or run_id must be provided.")
@@ -2417,10 +2620,9 @@ class Agent:
         session_id = run_response.session_id if run_response else session_id
-        session_id, user_id, session_state = self._initialize_session(
-            run_id=run_id,  # type: ignore
+        session_id, user_id = self._initialize_session(
             session_id=session_id,
-            user_id=user_id,  # type: ignore
+            user_id=user_id,
         )
         # Initialize the Agent
         self.initialize_agent(debug_mode=debug_mode)
@@ -2429,6 +2631,10 @@ class Agent:
         agent_session = self._read_or_create_session(session_id=session_id, user_id=user_id)
         self._update_metadata(session=agent_session)
+        # Initialize session state
+        session_state = self._initialize_session_state(
+            session_state={}, user_id=user_id, session_id=session_id, run_id=run_id
+        )
         # Update session state from DB
         session_state = self._load_session_state(session=agent_session, session_state=session_state)
@@ -2458,17 +2664,22 @@ class Agent:
         if stream is None:
             stream = False if self.stream is None else self.stream
-        if stream_intermediate_steps is None:
-            stream_intermediate_steps = (
-                False if self.stream_intermediate_steps is None else self.stream_intermediate_steps
-            )
+        # Considering both stream_events and stream_intermediate_steps (deprecated)
+        stream_events = stream_events or stream_intermediate_steps
-        # Can't have stream_intermediate_steps if stream is False
+        # Can't stream events if streaming is disabled
         if stream is False:
-            stream_intermediate_steps = False
+            stream_events = False
+        if stream_events is None:
+            stream_events = False if self.stream_events is None else self.stream_events
+        # Can't stream events if streaming is disabled
+        if stream is False:
+            stream_events = False
         self.stream = self.stream or stream
-        self.stream_intermediate_steps = self.stream_intermediate_steps or (stream_intermediate_steps and self.stream)
+        self.stream_events = self.stream_events or stream_events
         # Run can be continued from previous run response or from passed run_response context
         if run_response is not None:
@@ -2530,7 +2741,7 @@ class Agent:
                         dependencies=run_dependencies,
                         metadata=metadata,
                         response_format=response_format,
-                        stream_intermediate_steps=stream_intermediate_steps,
+                        stream_events=stream_events,
                         debug_mode=debug_mode,
                         **kwargs,
                     )
@@ -2604,95 +2815,105 @@ class Agent:
         Steps:
         1. Handle any updated tools
         2. Generate a response from the Model
-        3. Update Agent Memory
-        4. Calculate session metrics
-        5. Save output to file if save_response_to_file is set
-        6. Add RunOutput to Agent Session
-        7. Save session to storage
+        3. Update the RunOutput with the model response
+        4. Convert response to structured format
+        5. Store media if enabled
+        6. Execute post-hooks
+        7. Create session summary
+        8. Cleanup and store (scrub, stop timer, save to file, add to session, calculate metrics, save session)
         """
+        # Register run for cancellation tracking
+        register_run(run_response.run_id)  # type: ignore
         self.model = cast(Model, self.model)
         # 1. Handle the updated tools
         self._handle_tool_call_updates(run_response=run_response, run_messages=run_messages)
-        # 2. Generate a response from the Model (includes running function calls)
-        self.model = cast(Model, self.model)
-        model_response: ModelResponse = self.model.response(
-            messages=run_messages.messages,
-            response_format=response_format,
-            tools=self._tools_for_model,
-            functions=self._functions_for_model,
-            tool_choice=self.tool_choice,
-            tool_call_limit=self.tool_call_limit,
-        )
+        try:
+            # Check for cancellation before model call
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-        self._update_run_response(
-            model_response=model_response,
-            run_response=run_response,
-            run_messages=run_messages,
-        )
+            # 2. Generate a response from the Model (includes running function calls)
+            self.model = cast(Model, self.model)
+            model_response: ModelResponse = self.model.response(
+                messages=run_messages.messages,
+                response_format=response_format,
+                tools=self._tools_for_model,
+                functions=self._functions_for_model,
+                tool_choice=self.tool_choice,
+                tool_call_limit=self.tool_call_limit,
+            )
-        # We should break out of the run function
-        if any(tool_call.is_paused for tool_call in run_response.tools or []):
-            return self._handle_agent_run_paused(
-                run_response=run_response,
-                run_messages=run_messages,
-                session=session,
-                user_id=user_id,
+            # Check for cancellation after model processing
+            raise_if_cancelled(run_response.run_id)  # type: ignore
+            # 3. Update the RunOutput with the model response
+            self._update_run_response(
+                model_response=model_response, run_response=run_response, run_messages=run_messages
             )
-        # 3. Calculate session metrics
-        self._update_session_metrics(session=session, run_response=run_response)
+            # We should break out of the run function
+            if any(tool_call.is_paused for tool_call in run_response.tools or []):
+                return self._handle_agent_run_paused(run_response=run_response, session=session, user_id=user_id)
-        # Convert the response to the structured format if needed
-        self._convert_response_to_structured_format(run_response)
+            # 4. Convert the response to the structured format if needed
+            self._convert_response_to_structured_format(run_response)
-        if self.post_hooks is not None:
-            self._execute_post_hooks(
-                hooks=self.post_hooks,  # type: ignore
-                run_output=run_response,
-                session_state=session_state,
-                dependencies=dependencies,
-                metadata=metadata,
-                session=session,
-                user_id=user_id,
-                debug_mode=debug_mode,
-                **kwargs,
-            )
+            # 5. Store media if enabled
+            if self.store_media:
+                self._store_media(run_response, model_response)
-        run_response.status = RunStatus.completed
-        # Set the run duration
-        if run_response.metrics:
-            run_response.metrics.stop_timer()
+            # 6. Execute post-hooks
+            if self.post_hooks is not None:
+                post_hook_iterator = self._execute_post_hooks(
+                    hooks=self.post_hooks,  # type: ignore
+                    run_output=run_response,
+                    session=session,
+                    user_id=user_id,
+                    session_state=session_state,
+                    dependencies=dependencies,
+                    metadata=metadata,
+                    debug_mode=debug_mode,
+                    **kwargs,
+                )
+                deque(post_hook_iterator, maxlen=0)
+            # Check for cancellation
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-        # 4. Save output to file if save_response_to_file is set
-        self.save_run_response_to_file(
-            run_response=run_response,
-            input=run_messages.user_message,
-            session_id=session.session_id,
-            user_id=user_id,
-        )
+            # 7. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                session.upsert_run(run=run_response)
-        # 5. Add the run to memory
-        session.upsert_run(run=run_response)
+                try:
+                    self.session_summary_manager.create_session_summary(session=session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
-        # 6. Update Agent Memory, Cultural Knowledge and Summaries
-        response_iterator = self._make_memories_cultural_knowledge_and_summaries(
-            run_response=run_response,
-            run_messages=run_messages,
-            session=session,
-            user_id=user_id,
-        )
-        # Consume the response iterator to ensure the memory is updated before the run is completed
-        deque(response_iterator, maxlen=0)
+            # Set the run status to completed
+            run_response.status = RunStatus.completed
-        # 7. Save session to storage
-        self.save_session(session=session)
+            # 8. Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
-        # Log Agent Telemetry
-        self._log_agent_telemetry(session_id=session.session_id, run_id=run_response.run_id)
+            # Log Agent Telemetry
+            self._log_agent_telemetry(session_id=session.session_id, run_id=run_response.run_id)
-        return run_response
+            return run_response
+        except RunCancelledException as e:
+            # Handle run cancellation during async streaming
+            log_info(f"Run {run_response.run_id} was cancelled")
+            run_response.status = RunStatus.cancelled
+            run_response.content = str(e)
+            # Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
+            return run_response
+        finally:
+            # Always clean up the run tracking
+            cleanup_run(run_response.run_id)  # type: ignore
     def _continue_run_stream(
         self,
@@ -2703,7 +2924,7 @@ class Agent:
         metadata: Optional[Dict[str, Any]] = None,
         user_id: Optional[str] = None,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
         dependencies: Optional[Dict[str, Any]] = None,
         debug_mode: Optional[bool] = None,
         **kwargs,
@@ -2711,100 +2932,149 @@ class Agent:
         """Continue a previous run.
         Steps:
-        1. Handle any updated tools
-        2. Generate a response from the Model
-        3. Calculate session metrics
-        4. Save output to file if save_response_to_file is set
-        5. Add the run to memory
-        6. Update Agent Memory
-        7. Create the run completed event
-        8. Save session to storage
+        1. Resolve dependencies
+        2. Handle any updated tools
+        3. Process model response
+        4. Execute post-hooks
+        5. Create session summary
+        6. Cleanup and store the run response and session
         """
+        # 1. Resolve dependencies
         if dependencies is not None:
             self._resolve_run_dependencies(dependencies=dependencies)
         # Start the Run by yielding a RunContinued event
-        if stream_intermediate_steps:
-            yield self._handle_event(create_run_continued_event(run_response), run_response)
-        # 1. Handle the updated tools
-        yield from self._handle_tool_call_updates_stream(run_response=run_response, run_messages=run_messages)
+        if stream_events:
+            yield handle_event(  # type: ignore
+                create_run_continued_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
-        # 2. Process model response
-        for event in self._handle_model_response_stream(
-            session=session,
-            run_response=run_response,
-            run_messages=run_messages,
-            response_format=response_format,
-            stream_intermediate_steps=stream_intermediate_steps,
-        ):
-            yield event
+        # 2. Handle the updated tools
+        yield from self._handle_tool_call_updates_stream(
+            run_response=run_response, run_messages=run_messages, stream_events=stream_events
+        )
-        # We should break out of the run function
-        if any(tool_call.is_paused for tool_call in run_response.tools or []):
-            yield from self._handle_agent_run_paused_stream(
+        try:
+            # 3. Process model response
+            for event in self._handle_model_response_stream(
+                session=session,
                 run_response=run_response,
                 run_messages=run_messages,
-                session=session,
-                user_id=user_id,
-            )
-            return
+                response_format=response_format,
+                stream_events=stream_events,
+            ):
+                yield event
-        if self.post_hooks is not None:
-            self._execute_post_hooks(
-                hooks=self.post_hooks,  # type: ignore
-                run_output=run_response,
-                session_state=session_state,
-                dependencies=dependencies,
-                metadata=metadata,
-                session=session,
-                user_id=user_id,
-                debug_mode=debug_mode,
-                **kwargs,
+            # Parse response with parser model if provided
+            yield from self._parse_response_with_parser_model_stream(
+                session=session, run_response=run_response, stream_events=stream_events
             )
-        run_response.status = RunStatus.completed
+            # Yield RunContentCompletedEvent
+            if stream_events:
+                yield handle_event(  # type: ignore
+                    create_run_content_completed_event(from_run_response=run_response),
+                    run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
-        # Set the run duration
-        if run_response.metrics:
-            run_response.metrics.stop_timer()
+            # We should break out of the run function
+            if any(tool_call.is_paused for tool_call in run_response.tools or []):
+                yield from self._handle_agent_run_paused_stream(
+                    run_response=run_response, session=session, user_id=user_id
+                )
+                return
-        # 3. Calculate session metrics
-        self._update_session_metrics(session=session, run_response=run_response)
+            # Execute post-hooks
+            if self.post_hooks is not None:
+                yield from self._execute_post_hooks(
+                    hooks=self.post_hooks,  # type: ignore
+                    run_output=run_response,
+                    session=session,
+                    session_state=session_state,
+                    dependencies=dependencies,
+                    metadata=metadata,
+                    user_id=user_id,
+                    debug_mode=debug_mode,
+                    **kwargs,
+                )
-        # 4. Save output to file if save_response_to_file is set
-        self.save_run_response_to_file(
-            run_response=run_response,
-            input=run_messages.user_message,
-            session_id=session.session_id,
-            user_id=user_id,
-        )
+            # Check for cancellation before model call
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-        # 5. Add the run to memory
-        session.upsert_run(run=run_response)
+            # 4. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                session.upsert_run(run=run_response)
-        # 6. Update Agent Memory
-        yield from self._make_memories_cultural_knowledge_and_summaries(
-            run_response=run_response,
-            run_messages=run_messages,
-            session=session,
-            user_id=user_id,
-        )
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_started_event(from_run_response=run_response),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
+                try:
+                    self.session_summary_manager.create_session_summary(session=session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
-        # 7. Create the run completed event
-        completed_event = self._handle_event(create_run_completed_event(run_response), run_response)
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_completed_event(
+                            from_run_response=run_response, session_summary=session.summary
+                        ),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
-        # 8. Save session to storage
-        self.save_session(session=session)
+            # Create the run completed event
+            completed_event = handle_event(
+                create_run_completed_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
+            # Set the run status to completed
+            run_response.status = RunStatus.completed
+            # 5. Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
+            if stream_events:
+                yield completed_event  # type: ignore
+            # Log Agent Telemetry
+            self._log_agent_telemetry(session_id=session.session_id, run_id=run_response.run_id)
-        if stream_intermediate_steps:
-            yield completed_event
+            log_debug(f"Agent Run End: {run_response.run_id}", center=True, symbol="*")
-        # Log Agent Telemetry
-        self._log_agent_telemetry(session_id=session.session_id, run_id=run_response.run_id)
+        except RunCancelledException as e:
+            # Handle run cancellation during async streaming
+            log_info(f"Run {run_response.run_id} was cancelled during streaming")
+            run_response.status = RunStatus.cancelled
+            run_response.content = str(e)
-        log_debug(f"Agent Run End: {run_response.run_id}", center=True, symbol="*")
+            # Yield the cancellation event
+            yield handle_event(  # type: ignore
+                create_run_cancelled_event(from_run_response=run_response, reason=str(e)),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
+            # Cleanup and store the run response and session
+            self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
+        finally:
+            # Always clean up the run tracking
+            cleanup_run(run_response.run_id)  # type: ignore
     @overload
     async def acontinue_run(
@@ -2812,6 +3082,7 @@ class Agent:
         run_response: Optional[RunOutput] = None,
         *,
         stream: Literal[False] = False,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         run_id: Optional[str] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
@@ -2830,6 +3101,7 @@ class Agent:
         run_response: Optional[RunOutput] = None,
         *,
         stream: Literal[True] = True,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         run_id: Optional[str] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
@@ -2849,6 +3121,7 @@ class Agent:
         run_id: Optional[str] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
         stream: Optional[bool] = None,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         user_id: Optional[str] = None,
         session_id: Optional[str] = None,
@@ -2867,13 +3140,16 @@ class Agent:
             run_id: The run id to continue. Alternative to passing run_response.
             updated_tools: The updated tools to use for the run. Required to be used with `run_id`.
             stream: Whether to stream the response.
-            stream_intermediate_steps: Whether to stream the intermediate steps.
+            stream_events: Whether to stream all events.
             user_id: The user id to continue the run for.
             session_id: The session id to continue the run for.
             retries: The number of retries to continue the run for.
             knowledge_filters: The knowledge filters to use for the run.
             dependencies: The dependencies to use for continuing the run.
+            metadata: The metadata to use for continuing the run.
             debug_mode: Whether to enable debug mode.
+            yield_run_response: Whether to yield the run response.
+            (deprecated) stream_intermediate_steps: Whether to stream all steps.
         """
         if run_response is None and run_id is None:
             raise ValueError("Either run_response or run_id must be provided.")
@@ -2881,10 +3157,9 @@ class Agent:
         if run_response is None and (run_id is not None and (session_id is None and self.session_id is None)):
             raise ValueError("Session ID is required to continue a run from a run_id.")
-        session_id, user_id, session_state = self._initialize_session(
-            run_id=run_id,  # type: ignore
+        session_id, user_id = self._initialize_session(
             session_id=session_id,
-            user_id=user_id,  # type: ignore
+            user_id=user_id,
         )
         # Initialize the Agent
@@ -2899,17 +3174,22 @@ class Agent:
         if stream is None:
             stream = False if self.stream is None else self.stream
-        if stream_intermediate_steps is None:
-            stream_intermediate_steps = (
-                False if self.stream_intermediate_steps is None else self.stream_intermediate_steps
-            )
+        # Considering both stream_events and stream_intermediate_steps (deprecated)
+        stream_events = stream_events or stream_intermediate_steps
+        # Can't stream events if streaming is disabled
+        if stream is False:
+            stream_events = False
+        if stream_events is None:
+            stream_events = False if self.stream_events is None else self.stream_events
         # Can't have stream_intermediate_steps if stream is False
         if stream is False:
-            stream_intermediate_steps = False
+            stream_events = False
         self.stream = self.stream or stream
-        self.stream_intermediate_steps = self.stream_intermediate_steps or (stream_intermediate_steps and self.stream)
+        self.stream_events = self.stream_events or stream_events
         # Get knowledge filters
         effective_filters = knowledge_filters
@@ -2936,14 +3216,13 @@ class Agent:
                         run_response=run_response,
                         updated_tools=updated_tools,
                         knowledge_filters=effective_filters,
-                        session_state=session_state,
                         run_id=run_id,
                         user_id=user_id,
                         session_id=session_id,
                         response_format=response_format,
                         dependencies=run_dependencies,
+                        stream_events=stream_events,
                         metadata=metadata,
-                        stream_intermediate_steps=stream_intermediate_steps,
                         yield_run_response=yield_run_response,
                         debug_mode=debug_mode,
                         **kwargs,
@@ -2954,7 +3233,6 @@ class Agent:
                         run_response=run_response,
                         updated_tools=updated_tools,
                         knowledge_filters=effective_filters,
-                        session_state=session_state,
                         run_id=run_id,
                         user_id=user_id,
                         response_format=response_format,
@@ -3006,7 +3284,6 @@ class Agent:
         run_response: Optional[RunOutput] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
         knowledge_filters: Optional[Dict[str, Any]] = None,
-        session_state: Optional[Dict[str, Any]] = None,
         run_id: Optional[str] = None,
         user_id: Optional[str] = None,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
@@ -3027,18 +3304,16 @@ class Agent:
         7. Handle the updated tools
         8. Get model response
         9. Update the RunOutput with the model response
-        10. Calculate session metrics
-        11. Execute post-hooks
-        12. Update Agent Memory
-        13. Save session to storage
+        10. Convert response to structured format
+        11. Store media if enabled
+        12. Execute post-hooks
+        13. Create session summary
+        14. Cleanup and store (scrub, stop timer, save to file, add to session, calculate metrics, save session)
         """
         log_debug(f"Agent Run Continue: {run_response.run_id if run_response else run_id}", center=True)  # type: ignore
         # 1. Read existing session from db
-        if self._has_async_db():
-            agent_session = await self._aread_or_create_session(session_id=session_id, user_id=user_id)
-        else:
-            agent_session = self._read_or_create_session(session_id=session_id, user_id=user_id)
+        agent_session = await self._aread_or_create_session(session_id=session_id, user_id=user_id)
         # 2. Resolve dependencies
         if dependencies is not None:
@@ -3046,6 +3321,11 @@ class Agent:
         # 3. Update metadata and session state
         self._update_metadata(session=agent_session)
+        # Initialize session state
+        session_state = self._initialize_session_state(
+            session_state={}, user_id=user_id, session_id=session_id, run_id=run_id
+        )
+        # Update session state from DB
         if session_state is not None:
             session_state = self._load_session_state(session=agent_session, session_state=session_state)
@@ -3120,30 +3400,24 @@ class Agent:
                 run_messages=run_messages,
             )
-            if self.store_media:
-                self._store_media(run_response, model_response)
-            else:
-                self._scrub_media_from_run_output(run_response)
             # Break out of the run function if a tool call is paused
             if any(tool_call.is_paused for tool_call in run_response.tools or []):
-                return self._handle_agent_run_paused(
-                    run_response=run_response,
-                    run_messages=run_messages,
-                    session=agent_session,
-                    user_id=user_id,
+                return await self._ahandle_agent_run_paused(
+                    run_response=run_response, session=agent_session, user_id=user_id
                 )
-            raise_if_cancelled(run_response.run_id)  # type: ignore
-            # 10. Calculate session metrics
-            self._update_session_metrics(session=agent_session, run_response=run_response)
-            # Convert the response to the structured format if needed
+            # 10. Convert the response to the structured format if needed
             self._convert_response_to_structured_format(run_response)
-            # 11. Execute post-hooks
+            # 11. Store media if enabled
+            if self.store_media:
+                self._store_media(run_response, model_response)
+            raise_if_cancelled(run_response.run_id)  # type: ignore
+            # 12. Execute post-hooks
             if self.post_hooks is not None:
-                await self._aexecute_post_hooks(
+                async for _ in self._aexecute_post_hooks(
                     hooks=self.post_hooks,  # type: ignore
                     run_output=run_response,
                     session=agent_session,
@@ -3153,37 +3427,27 @@ class Agent:
                     dependencies=dependencies,
                     metadata=metadata,
                     **kwargs,
-                )
-            run_response.status = RunStatus.completed
+                ):
+                    pass
-            if run_response.metrics:
-                run_response.metrics.stop_timer()
+            # Check for cancellation
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-            # 12. Update Agent Memory
-            async for _ in self._amake_memories_cultural_knowledge_and_summaries(
-                run_response=run_response,
-                run_messages=run_messages,
-                session=agent_session,
-                user_id=user_id,
-            ):
-                pass
+            # 13. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                agent_session.upsert_run(run=run_response)
-            # Optional: Save output to file if save_response_to_file is set
-            self.save_run_response_to_file(
-                run_response=run_response,
-                input=run_messages.user_message,
-                session_id=agent_session.session_id,
-                user_id=user_id,
-            )
+                try:
+                    await self.session_summary_manager.acreate_session_summary(session=agent_session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
-            agent_session.upsert_run(run=run_response)
+            # Set the run status to completed
+            run_response.status = RunStatus.completed
-            # 13. Save session to storage
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # 14. Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
             # Log Agent Telemetry
             await self._alog_agent_telemetry(session_id=agent_session.session_id, run_id=run_response.run_id)
@@ -3198,12 +3462,8 @@ class Agent:
             run_response.content = str(e)
             run_response.status = RunStatus.cancelled
-            # Update the Agent Session before exiting
-            agent_session.upsert_run(run=run_response)
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
             return run_response
         finally:
@@ -3216,11 +3476,10 @@ class Agent:
         run_response: Optional[RunOutput] = None,
         updated_tools: Optional[List[ToolExecution]] = None,
         knowledge_filters: Optional[Dict[str, Any]] = None,
-        session_state: Optional[Dict[str, Any]] = None,
         run_id: Optional[str] = None,
         user_id: Optional[str] = None,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
         yield_run_response: Optional[bool] = None,
         dependencies: Optional[Dict[str, Any]] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -3238,12 +3497,9 @@ class Agent:
         6. Prepare run messages
         7. Handle the updated tools
         8. Process model response
-        9. Add the run to memory
-        10. Update Agent Memory
-        11. Calculate session metrics
-        12. Create the run completed event
-        13. Add the RunOutput to Agent Session
-        14. Save session to storage
+        9. Create session summary
+        10. Execute post-hooks
+        11. Cleanup and store the run response and session
         """
         log_debug(f"Agent Run Continue: {run_response.run_id if run_response else run_id}", center=True)  # type: ignore
@@ -3256,6 +3512,11 @@ class Agent:
         # 3. Update session state and metadata
         self._update_metadata(session=agent_session)
+        # Initialize session state
+        session_state = self._initialize_session_state(
+            session_state={}, user_id=user_id, session_id=session_id, run_id=run_id
+        )
+        # Update session state from DB
         if session_state is not None:
             session_state = self._load_session_state(session=agent_session, session_state=session_state)
@@ -3303,8 +3564,13 @@ class Agent:
         try:
             # Start the Run by yielding a RunContinued event
-            if stream_intermediate_steps:
-                yield self._handle_event(create_run_continued_event(run_response), run_response)
+            if stream_events:
+                yield handle_event(  # type: ignore
+                    create_run_continued_event(run_response),
+                    run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
             # 7. Handle the updated tools
             async for event in self._ahandle_tool_call_updates_stream(
@@ -3320,7 +3586,7 @@ class Agent:
                     run_response=run_response,
                     run_messages=run_messages,
                     response_format=response_format,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     yield event
@@ -3335,11 +3601,11 @@ class Agent:
                     run_response=run_response,
                     run_messages=run_messages,
                     response_format=response_format,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     if isinstance(event, RunContentEvent):
-                        if stream_intermediate_steps:
+                        if stream_events:
                             yield IntermediateRunContentEvent(
                                 content=event.content,
                                 content_type=event.content_type,
@@ -3352,7 +3618,7 @@ class Agent:
                     session=agent_session,
                     run_response=run_response,
                     run_messages=run_messages,
-                    stream_intermediate_steps=stream_intermediate_steps,
+                    stream_events=stream_events,
                 ):
                     raise_if_cancelled(run_response.run_id)  # type: ignore
                     yield event
@@ -3360,71 +3626,88 @@ class Agent:
             # Check for cancellation after model processing
             raise_if_cancelled(run_response.run_id)  # type: ignore
+            # Parse response with parser model if provided
+            async for event in self._aparse_response_with_parser_model_stream(
+                session=agent_session, run_response=run_response, stream_events=stream_events
+            ):
+                yield event
+            # Yield RunContentCompletedEvent
+            if stream_events:
+                yield handle_event(  # type: ignore
+                    create_run_content_completed_event(from_run_response=run_response),
+                    run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
             # Break out of the run function if a tool call is paused
             if any(tool_call.is_paused for tool_call in run_response.tools or []):
-                for item in self._handle_agent_run_paused_stream(
-                    run_response=run_response,
-                    run_messages=run_messages,
-                    session=agent_session,
-                    user_id=user_id,
+                async for item in self._ahandle_agent_run_paused_stream(
+                    run_response=run_response, session=agent_session, user_id=user_id
                 ):
                     yield item
                 return
-            # 9. Create the run completed event
-            completed_event = self._handle_event(create_run_completed_event(run_response), run_response)
-            # 10. Execute post-hooks
+            # 8. Execute post-hooks
             if self.post_hooks is not None:
-                await self._aexecute_post_hooks(
+                async for event in self._aexecute_post_hooks(
                     hooks=self.post_hooks,  # type: ignore
                     run_output=run_response,
                     session=agent_session,
                     user_id=user_id,
-                    debug_mode=debug_mode,
                     session_state=session_state,
                     dependencies=dependencies,
                     metadata=metadata,
+                    debug_mode=debug_mode,
                     **kwargs,
-                )
-            run_response.status = RunStatus.completed
+                ):
+                    yield event
+            # Check for cancellation before model call
+            raise_if_cancelled(run_response.run_id)  # type: ignore
-            # Set the run duration
-            if run_response.metrics:
-                run_response.metrics.stop_timer()
+            # 9. Create session summary
+            if self.session_summary_manager is not None:
+                # Upsert the RunOutput to Agent Session before creating the session summary
+                agent_session.upsert_run(run=run_response)
-            # 11. Add the run to memory
-            agent_session.upsert_run(run=run_response)
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_started_event(from_run_response=run_response),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
+                try:
+                    await self.session_summary_manager.acreate_session_summary(session=agent_session)
+                except Exception as e:
+                    log_warning(f"Error in session summary creation: {str(e)}")
+                if stream_events:
+                    yield handle_event(  # type: ignore
+                        create_session_summary_completed_event(
+                            from_run_response=run_response, session_summary=agent_session.summary
+                        ),
+                        run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
+                    )
-            # Optional: Save output to file if save_response_to_file is set
-            self.save_run_response_to_file(
-                run_response=run_response,
-                input=run_messages.user_message,
-                session_id=agent_session.session_id,
-                user_id=user_id,
+            # Create the run completed event
+            completed_event = handle_event(
+                create_run_completed_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
-            # 11. Calculate session metrics
-            self._update_session_metrics(session=agent_session, run_response=run_response)
-            # 12. Update Agent Memory
-            async for event in self._amake_memories_cultural_knowledge_and_summaries(
-                run_response=run_response,
-                run_messages=run_messages,
-                session=agent_session,
-                user_id=user_id,
-            ):
-                yield event
+            # Set the run status to completed
+            run_response.status = RunStatus.completed
-            # 13. Save session to storage
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # 10. Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
-            if stream_intermediate_steps:
-                yield completed_event
+            if stream_events:
+                yield completed_event  # type: ignore
             if yield_run_response:
                 yield run_response
@@ -3440,17 +3723,15 @@ class Agent:
             run_response.content = str(e)
             # Yield the cancellation event
-            yield self._handle_event(
+            yield handle_event(  # type: ignore
                 create_run_cancelled_event(from_run_response=run_response, reason=str(e)),
                 run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
-            # Add the RunOutput to Agent Session even when cancelled
-            agent_session.upsert_run(run=run_response)
-            if self._has_async_db():
-                await self.asave_session(session=agent_session)
-            else:
-                self.save_session(session=agent_session)
+            # Cleanup and store the run response and session
+            await self._acleanup_and_store(run_response=run_response, session=agent_session, user_id=user_id)
         finally:
             # Always clean up the run tracking
             cleanup_run(run_response.run_id)  # type: ignore
@@ -3486,13 +3767,15 @@ class Agent:
         all_args.update(kwargs)
         for i, hook in enumerate(hooks):
-            yield self._handle_event(
+            yield handle_event(  # type: ignore
                 run_response=run_response,
                 event=create_pre_hook_started_event(
                     from_run_response=run_response,
                     run_input=run_input,
                     pre_hook_name=hook.__name__,
                 ),
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
             try:
                 # Filter arguments to only include those that the hook accepts
@@ -3500,13 +3783,15 @@ class Agent:
                 hook(**filtered_args)
-                yield self._handle_event(
+                yield handle_event(  # type: ignore
                     run_response=run_response,
                     event=create_pre_hook_completed_event(
                         from_run_response=run_response,
                         run_input=run_input,
                         pre_hook_name=hook.__name__,
                     ),
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
                 )
             except (InputCheckError, OutputCheckError) as e:
@@ -3552,13 +3837,15 @@ class Agent:
         all_args.update(kwargs)
         for i, hook in enumerate(hooks):
-            yield self._handle_event(
+            yield handle_event(  # type: ignore
                 run_response=run_response,
                 event=create_pre_hook_started_event(
                     from_run_response=run_response,
                     run_input=run_input,
                     pre_hook_name=hook.__name__,
                 ),
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
             try:
                 # Filter arguments to only include those that the hook accepts
@@ -3570,13 +3857,15 @@ class Agent:
                     # Synchronous function
                     hook(**filtered_args)
-                yield self._handle_event(
+                yield handle_event(  # type: ignore
                     run_response=run_response,
                     event=create_pre_hook_completed_event(
                         from_run_response=run_response,
                         run_input=run_input,
                         pre_hook_name=hook.__name__,
                     ),
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
                 )
             except (InputCheckError, OutputCheckError) as e:
@@ -3602,7 +3891,7 @@ class Agent:
         user_id: Optional[str] = None,
         debug_mode: Optional[bool] = None,
         **kwargs: Any,
-    ) -> None:
+    ) -> Iterator[RunOutputEvent]:
         """Execute multiple post-hook functions in succession."""
         if hooks is None:
             return
@@ -3621,11 +3910,30 @@ class Agent:
         all_args.update(kwargs)
         for i, hook in enumerate(hooks):
+            yield handle_event(  # type: ignore
+                run_response=run_output,
+                event=create_post_hook_started_event(
+                    from_run_response=run_output,
+                    post_hook_name=hook.__name__,
+                ),
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
             try:
                 # Filter arguments to only include those that the hook accepts
                 filtered_args = filter_hook_args(hook, all_args)
                 hook(**filtered_args)
+                yield handle_event(  # type: ignore
+                    run_response=run_output,
+                    event=create_post_hook_completed_event(
+                        from_run_response=run_output,
+                        post_hook_name=hook.__name__,
+                    ),
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
             except (InputCheckError, OutputCheckError) as e:
                 raise e
             except Exception as e:
@@ -3646,7 +3954,7 @@ class Agent:
         user_id: Optional[str] = None,
         debug_mode: Optional[bool] = None,
         **kwargs: Any,
-    ) -> None:
+    ) -> AsyncIterator[RunOutputEvent]:
         """Execute multiple post-hook functions in succession (async version)."""
         if hooks is None:
             return
@@ -3665,6 +3973,15 @@ class Agent:
         all_args.update(kwargs)
         for i, hook in enumerate(hooks):
+            yield handle_event(  # type: ignore
+                run_response=run_output,
+                event=create_post_hook_started_event(
+                    from_run_response=run_output,
+                    post_hook_name=hook.__name__,
+                ),
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
             try:
                 # Filter arguments to only include those that the hook accepts
                 filtered_args = filter_hook_args(hook, all_args)
@@ -3674,6 +3991,16 @@ class Agent:
                 else:
                     hook(**filtered_args)
+                yield handle_event(  # type: ignore
+                    run_response=run_output,
+                    event=create_post_hook_completed_event(
+                        from_run_response=run_output,
+                        post_hook_name=hook.__name__,
+                    ),
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
+                )
             except (InputCheckError, OutputCheckError) as e:
                 raise e
             except Exception as e:
@@ -3686,7 +4013,6 @@ class Agent:
     def _handle_agent_run_paused(
         self,
         run_response: RunOutput,
-        run_messages: RunMessages,
         session: AgentSession,
         user_id: Optional[str] = None,
     ) -> RunOutput:
@@ -3696,18 +4022,7 @@ class Agent:
         if not run_response.content:
             run_response.content = get_paused_content(run_response)
-        # Save output to file if save_response_to_file is set
-        self.save_run_response_to_file(
-            run_response=run_response,
-            input=run_messages.user_message,
-            session_id=session.session_id,
-            user_id=user_id,
-        )
-        session.upsert_run(run=run_response)
-        # Save session to storage
-        self.save_session(session=session)
+        self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
         log_debug(f"Agent Run Paused: {run_response.run_id}", center=True, symbol="*")
@@ -3717,7 +4032,6 @@ class Agent:
     def _handle_agent_run_paused_stream(
         self,
         run_response: RunOutput,
-        run_messages: RunMessages,
         session: AgentSession,
         user_id: Optional[str] = None,
     ) -> Iterator[RunOutputEvent]:
@@ -3728,26 +4042,67 @@ class Agent:
             run_response.content = get_paused_content(run_response)
         # We return and await confirmation/completion for the tools that require it
-        pause_event = self._handle_event(
+        pause_event = handle_event(
             create_run_paused_event(
                 from_run_response=run_response,
                 tools=run_response.tools,
             ),
             run_response,
+            events_to_skip=self.events_to_skip,  # type: ignore
+            store_events=self.store_events,
         )
-        # Save output to file if save_response_to_file is set
-        self.save_run_response_to_file(
-            run_response=run_response,
-            input=run_messages.user_message,
-            session_id=session.session_id,
-            user_id=user_id,
+        self._cleanup_and_store(run_response=run_response, session=session, user_id=user_id)
+        yield pause_event  # type: ignore
+        log_debug(f"Agent Run Paused: {run_response.run_id}", center=True, symbol="*")
+    async def _ahandle_agent_run_paused(
+        self,
+        run_response: RunOutput,
+        session: AgentSession,
+        user_id: Optional[str] = None,
+    ) -> RunOutput:
+        # Set the run response to paused
+        run_response.status = RunStatus.paused
+        if not run_response.content:
+            run_response.content = get_paused_content(run_response)
+        await self._acleanup_and_store(run_response=run_response, session=session, user_id=user_id)
+        log_debug(f"Agent Run Paused: {run_response.run_id}", center=True, symbol="*")
+        # We return and await confirmation/completion for the tools that require it
+        return run_response
+    async def _ahandle_agent_run_paused_stream(
+        self,
+        run_response: RunOutput,
+        session: AgentSession,
+        user_id: Optional[str] = None,
+    ) -> AsyncIterator[RunOutputEvent]:
+        # Set the run response to paused
+        run_response.status = RunStatus.paused
+        if not run_response.content:
+            run_response.content = get_paused_content(run_response)
+        # We return and await confirmation/completion for the tools that require it
+        pause_event = handle_event(
+            create_run_paused_event(
+                from_run_response=run_response,
+                tools=run_response.tools,
+            ),
+            run_response,
+            events_to_skip=self.events_to_skip,  # type: ignore
+            store_events=self.store_events,
         )
-        session.upsert_run(run=run_response)
-        # Save session to storage
-        self.save_session(session=session)
-        yield pause_event
+        await self._acleanup_and_store(run_response=run_response, session=session, user_id=user_id)
+        yield pause_event  # type: ignore
         log_debug(f"Agent Run Paused: {run_response.run_id}", center=True, symbol="*")
@@ -3824,7 +4179,11 @@ class Agent:
         )
     def _run_tool(
-        self, run_response: RunOutput, run_messages: RunMessages, tool: ToolExecution
+        self,
+        run_response: RunOutput,
+        run_messages: RunMessages,
+        tool: ToolExecution,
+        stream_events: bool = False,
     ) -> Iterator[RunOutputEvent]:
         self.model = cast(Model, self.model)
         # Execute the tool
@@ -3837,23 +4196,27 @@ class Agent:
         ):
             if isinstance(call_result, ModelResponse):
                 if call_result.event == ModelResponseEvent.tool_call_started.value:
-                    yield self._handle_event(
-                        create_tool_call_started_event(from_run_response=run_response, tool=tool),
-                        run_response,
-                    )
+                    if stream_events:
+                        yield handle_event(  # type: ignore
+                            create_tool_call_started_event(from_run_response=run_response, tool=tool),
+                            run_response,
+                            events_to_skip=self.events_to_skip,  # type: ignore
+                            store_events=self.store_events,
+                        )
                 if call_result.event == ModelResponseEvent.tool_call_completed.value and call_result.tool_executions:
                     tool_execution = call_result.tool_executions[0]
                     tool.result = tool_execution.result
                     tool.tool_call_error = tool_execution.tool_call_error
-                    yield self._handle_event(
-                        create_tool_call_completed_event(
-                            from_run_response=run_response,
-                            tool=tool,
-                            content=call_result.content,
-                        ),
-                        run_response,
-                    )
+                    if stream_events:
+                        yield handle_event(  # type: ignore
+                            create_tool_call_completed_event(
+                                from_run_response=run_response, tool=tool, content=call_result.content
+                            ),
+                            run_response,
+                            events_to_skip=self.events_to_skip,  # type: ignore
+                            store_events=self.store_events,
+                        )
         if len(function_call_results) > 0:
             run_messages.messages.extend(function_call_results)
@@ -3873,6 +4236,7 @@ class Agent:
         run_response: RunOutput,
         run_messages: RunMessages,
         tool: ToolExecution,
+        stream_events: bool = False,
     ) -> AsyncIterator[RunOutputEvent]:
         self.model = cast(Model, self.model)
@@ -3887,22 +4251,26 @@ class Agent:
         ):
             if isinstance(call_result, ModelResponse):
                 if call_result.event == ModelResponseEvent.tool_call_started.value:
-                    yield self._handle_event(
-                        create_tool_call_started_event(from_run_response=run_response, tool=tool),
-                        run_response,
-                    )
+                    if stream_events:
+                        yield handle_event(  # type: ignore
+                            create_tool_call_started_event(from_run_response=run_response, tool=tool),
+                            run_response,
+                            events_to_skip=self.events_to_skip,  # type: ignore
+                            store_events=self.store_events,
+                        )
                 if call_result.event == ModelResponseEvent.tool_call_completed.value and call_result.tool_executions:
                     tool_execution = call_result.tool_executions[0]
                     tool.result = tool_execution.result
                     tool.tool_call_error = tool_execution.tool_call_error
-                    yield self._handle_event(
-                        create_tool_call_completed_event(
-                            from_run_response=run_response,
-                            tool=tool,
-                            content=call_result.content,
-                        ),
-                        run_response,
-                    )
+                    if stream_events:
+                        yield handle_event(  # type: ignore
+                            create_tool_call_completed_event(
+                                from_run_response=run_response, tool=tool, content=call_result.content
+                            ),
+                            run_response,
+                            events_to_skip=self.events_to_skip,  # type: ignore
+                            store_events=self.store_events,
+                        )
         if len(function_call_results) > 0:
             run_messages.messages.extend(function_call_results)
@@ -3944,7 +4312,7 @@ class Agent:
                 deque(self._run_tool(run_response, run_messages, _t), maxlen=0)
     def _handle_tool_call_updates_stream(
-        self, run_response: RunOutput, run_messages: RunMessages
+        self, run_response: RunOutput, run_messages: RunMessages, stream_events: bool = False
     ) -> Iterator[RunOutputEvent]:
         self.model = cast(Model, self.model)
         for _t in run_response.tools or []:
@@ -3952,7 +4320,7 @@ class Agent:
             if _t.requires_confirmation is not None and _t.requires_confirmation is True and self._functions_for_model:
                 # Tool is confirmed and hasn't been run before
                 if _t.confirmed is not None and _t.confirmed is True and _t.result is None:
-                    yield from self._run_tool(run_response, run_messages, _t)
+                    yield from self._run_tool(run_response, run_messages, _t, stream_events=stream_events)
                 else:
                     self._reject_tool_call(run_messages, _t)
                     _t.confirmed = False
@@ -3977,7 +4345,7 @@ class Agent:
             # Case 4: Handle user input required tools
             elif _t.requires_user_input is not None and _t.requires_user_input is True:
                 self._handle_user_input_update(tool=_t)
-                yield from self._run_tool(run_response, run_messages, _t)
+                yield from self._run_tool(run_response, run_messages, _t, stream_events=stream_events)
                 _t.requires_user_input = False
                 _t.answered = True
@@ -4018,7 +4386,7 @@ class Agent:
                 _t.answered = True
     async def _ahandle_tool_call_updates_stream(
-        self, run_response: RunOutput, run_messages: RunMessages
+        self, run_response: RunOutput, run_messages: RunMessages, stream_events: bool = False
     ) -> AsyncIterator[RunOutputEvent]:
         self.model = cast(Model, self.model)
         for _t in run_response.tools or []:
@@ -4026,7 +4394,7 @@ class Agent:
             if _t.requires_confirmation is not None and _t.requires_confirmation is True and self._functions_for_model:
                 # Tool is confirmed and hasn't been run before
                 if _t.confirmed is not None and _t.confirmed is True and _t.result is None:
-                    async for event in self._arun_tool(run_response, run_messages, _t):
+                    async for event in self._arun_tool(run_response, run_messages, _t, stream_events=stream_events):
                         yield event
                 else:
                     self._reject_tool_call(run_messages, _t)
@@ -4050,7 +4418,7 @@ class Agent:
             # # Case 4: Handle user input required tools
             elif _t.requires_user_input is not None and _t.requires_user_input is True:
                 self._handle_user_input_update(tool=_t)
-                async for event in self._arun_tool(run_response, run_messages, _t):
+                async for event in self._arun_tool(run_response, run_messages, _t, stream_events=stream_events):
                     yield event
                 _t.requires_user_input = False
                 _t.answered = True
@@ -4157,7 +4525,7 @@ class Agent:
         run_response: RunOutput,
         run_messages: RunMessages,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
     ) -> Iterator[RunOutputEvent]:
         self.model = cast(Model, self.model)
@@ -4190,11 +4558,11 @@ class Agent:
                 model_response_event=model_response_event,
                 reasoning_state=reasoning_state,
                 parse_structured_output=self.should_parse_structured_output,
-                stream_intermediate_steps=stream_intermediate_steps,
+                stream_events=stream_events,
             )
         # Determine reasoning completed
-        if stream_intermediate_steps and reasoning_state["reasoning_started"]:
+        if stream_events and reasoning_state["reasoning_started"]:
             all_reasoning_steps: List[ReasoningStep] = []
             if run_response and run_response.reasoning_steps:
                 all_reasoning_steps = cast(List[ReasoningStep], run_response.reasoning_steps)
@@ -4204,13 +4572,15 @@ class Agent:
                     run_response=run_response,
                     reasoning_time_taken=reasoning_state["reasoning_time_taken"],
                 )
-                yield self._handle_event(
+                yield handle_event(  # type: ignore
                     create_reasoning_completed_event(
                         from_run_response=run_response,
                         content=ReasoningSteps(reasoning_steps=all_reasoning_steps),
                         content_type=ReasoningSteps.__name__,
                     ),
                     run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
                 )
         # Update RunOutput
@@ -4233,7 +4603,7 @@ class Agent:
         run_response: RunOutput,
         run_messages: RunMessages,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
     ) -> AsyncIterator[RunOutputEvent]:
         self.model = cast(Model, self.model)
@@ -4268,11 +4638,11 @@ class Agent:
                 model_response_event=model_response_event,
                 reasoning_state=reasoning_state,
                 parse_structured_output=self.should_parse_structured_output,
-                stream_intermediate_steps=stream_intermediate_steps,
+                stream_events=stream_events,
             ):
                 yield event
-        if stream_intermediate_steps and reasoning_state["reasoning_started"]:
+        if stream_events and reasoning_state["reasoning_started"]:
             all_reasoning_steps: List[ReasoningStep] = []
             if run_response and run_response.reasoning_steps:
                 all_reasoning_steps = cast(List[ReasoningStep], run_response.reasoning_steps)
@@ -4282,13 +4652,15 @@ class Agent:
                     run_response=run_response,
                     reasoning_time_taken=reasoning_state["reasoning_time_taken"],
                 )
-                yield self._handle_event(
+                yield handle_event(  # type: ignore
                     create_reasoning_completed_event(
                         from_run_response=run_response,
                         content=ReasoningSteps(reasoning_steps=all_reasoning_steps),
                         content_type=ReasoningSteps.__name__,
                     ),
                     run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
                 )
         # Update RunOutput
@@ -4313,7 +4685,7 @@ class Agent:
         model_response_event: Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent],
         reasoning_state: Optional[Dict[str, Any]] = None,
         parse_structured_output: bool = False,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
     ) -> Iterator[RunOutputEvent]:
         if isinstance(model_response_event, tuple(get_args(RunOutputEvent))) or isinstance(
             model_response_event, tuple(get_args(TeamRunOutputEvent))
@@ -4325,7 +4697,12 @@ class Agent:
                 model_response_event.run_id = run_response.run_id  # type: ignore
             # We just bubble the event up
-            yield self._handle_event(model_response_event, run_response)  # type: ignore
+            yield handle_event(  # type: ignore
+                model_response_event,  # type: ignore
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
         else:
             model_response_event = cast(ModelResponse, model_response_event)
             # If the model response is an assistant_response, yield a RunOutput
@@ -4370,13 +4747,15 @@ class Agent:
                 # Only yield if we have content to show
                 if content_type != "str":
-                    yield self._handle_event(
+                    yield handle_event(  # type: ignore
                         create_run_output_content_event(
                             from_run_response=run_response,
                             content=model_response.content,
                             content_type=content_type,
                         ),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
                 elif (
                     model_response_event.content is not None
@@ -4385,7 +4764,7 @@ class Agent:
                     or model_response_event.citations is not None
                     or model_response_event.provider_data is not None
                 ):
-                    yield self._handle_event(
+                    yield handle_event(  # type: ignore
                         create_run_output_content_event(
                             from_run_response=run_response,
                             content=model_response_event.content,
@@ -4395,6 +4774,8 @@ class Agent:
                             model_provider_data=model_response_event.provider_data,
                         ),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
                 # Process audio
@@ -4449,21 +4830,25 @@ class Agent:
                     )
                     run_response.created_at = model_response_event.created_at
-                    yield self._handle_event(
+                    yield handle_event(  # type: ignore
                         create_run_output_content_event(
                             from_run_response=run_response,
                             response_audio=run_response.response_audio,
                         ),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
                 if model_response_event.images is not None:
-                    yield self._handle_event(
+                    yield handle_event(  # type: ignore
                         create_run_output_content_event(
                             from_run_response=run_response,
                             image=model_response_event.images[-1],
                         ),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
                     if model_response.images is None:
@@ -4498,11 +4883,14 @@ class Agent:
                         run_response.tools.extend(tool_executions_list)
                     # Yield each tool call started event
-                    for tool in tool_executions_list:
-                        yield self._handle_event(
-                            create_tool_call_started_event(from_run_response=run_response, tool=tool),
-                            run_response,
-                        )
+                    if stream_events:
+                        for tool in tool_executions_list:
+                            yield handle_event(  # type: ignore
+                                create_tool_call_started_event(from_run_response=run_response, tool=tool),
+                                run_response,
+                                events_to_skip=self.events_to_skip,  # type: ignore
+                                store_events=self.store_events,
+                            )
             # If the model response is a tool_call_completed, update the existing tool call in the run_response
             elif model_response_event.event == ModelResponseEvent.tool_call_completed.value:
@@ -4566,159 +4954,81 @@ class Agent:
                                     "reasoning_time_taken"
                                 ] + float(tool_call_metrics.duration)
-                        yield self._handle_event(
-                            create_tool_call_completed_event(
-                                from_run_response=run_response,
-                                tool=tool_call,
-                                content=model_response_event.content,
-                            ),
-                            run_response,
-                        )
+                        if stream_events:
+                            yield handle_event(  # type: ignore
+                                create_tool_call_completed_event(
+                                    from_run_response=run_response, tool=tool_call, content=model_response_event.content
+                                ),
+                                run_response,
+                                events_to_skip=self.events_to_skip,  # type: ignore
+                                store_events=self.store_events,
+                            )
-                if stream_intermediate_steps:
+                if stream_events:
                     if reasoning_step is not None:
                         if reasoning_state and not reasoning_state["reasoning_started"]:
-                            yield self._handle_event(
+                            yield handle_event(  # type: ignore
                                 create_reasoning_started_event(from_run_response=run_response),
                                 run_response,
+                                events_to_skip=self.events_to_skip,  # type: ignore
+                                store_events=self.store_events,
                             )
                             reasoning_state["reasoning_started"] = True
-                        yield self._handle_event(
+                        yield handle_event(  # type: ignore
                             create_reasoning_step_event(
                                 from_run_response=run_response,
                                 reasoning_step=reasoning_step,
                                 reasoning_content=run_response.reasoning_content or "",
                             ),
                             run_response,
+                            events_to_skip=self.events_to_skip,  # type: ignore
+                            store_events=self.store_events,
                         )
-    def _make_memories_cultural_knowledge_and_summaries(
+    def _make_cultural_knowledge(
         self,
-        run_response: RunOutput,
         run_messages: RunMessages,
-        session: AgentSession,
-        user_id: Optional[str] = None,
-    ) -> Iterator[RunOutputEvent]:
-        from concurrent.futures import ThreadPoolExecutor, as_completed
-        with ThreadPoolExecutor(max_workers=3) as executor:
-            futures = []
-            user_message_str = (
-                run_messages.user_message.get_content_string() if run_messages.user_message is not None else None
-            )
-            # Create user memories
-            if user_message_str is not None and self.memory_manager is not None and not self.enable_agentic_memory:
-                log_debug("Creating user memories.")
-                futures.append(
-                    executor.submit(
-                        self.memory_manager.create_user_memories,
-                        message=user_message_str,
-                        user_id=user_id,
-                        agent_id=self.id,
-                    )
-                )
-            # Parse messages if provided
-            if (
-                self.enable_user_memories
-                and run_messages.extra_messages is not None
-                and len(run_messages.extra_messages) > 0
-            ):
-                parsed_messages = []
-                for _im in run_messages.extra_messages:
-                    if isinstance(_im, Message):
-                        parsed_messages.append(_im)
-                    elif isinstance(_im, dict):
-                        try:
-                            parsed_messages.append(Message(**_im))
-                        except Exception as e:
-                            log_warning(f"Failed to validate message during memory update: {e}")
-                    else:
-                        log_warning(f"Unsupported message type: {type(_im)}")
-                        continue
-                if len(parsed_messages) > 0 and self.memory_manager is not None:
-                    futures.append(
-                        executor.submit(
-                            self.memory_manager.create_user_memories,
-                            messages=parsed_messages,
-                            user_id=user_id,
-                            agent_id=self.id,
-                        )
-                    )
-                else:
-                    log_warning("Unable to add messages to memory")
-            # Create cultural knowledge
-            if user_message_str is not None and self.culture_manager is not None and self.update_cultural_knowledge:
-                log_debug("Creating cultural knowledge.")
-                futures.append(
-                    executor.submit(
-                        self.culture_manager.create_cultural_knowledge,
-                        message=user_message_str,
-                    )
-                )
-            # Create session summary
-            if self.session_summary_manager is not None:
-                log_debug("Creating session summary.")
-                futures.append(
-                    executor.submit(
-                        self.session_summary_manager.create_session_summary,  # type: ignore
-                        session=session,
-                    )
-                )
-            if futures:
-                if self.stream_intermediate_steps:
-                    yield self._handle_event(
-                        create_memory_update_started_event(from_run_response=run_response),
-                        run_response,
-                    )
-                # Wait for all operations to complete and handle any errors
-                for future in as_completed(futures):
-                    try:
-                        future.result()
-                    except Exception as e:
-                        log_warning(f"Error in memory/summary operation: {str(e)}")
+    ):
+        if (
+            run_messages.user_message is not None
+            and self.culture_manager is not None
+            and self.update_cultural_knowledge
+        ):
+            log_debug("Creating cultural knowledge.")
+            self.culture_manager.create_cultural_knowledge(message=run_messages.user_message.get_content_string())
-                if self.stream_intermediate_steps:
-                    yield self._handle_event(
-                        create_memory_update_completed_event(from_run_response=run_response),
-                        run_response,
-                    )
+    async def _acreate_cultural_knowledge(
+        self,
+        run_messages: RunMessages,
+    ):
+        if (
+            run_messages.user_message is not None
+            and self.culture_manager is not None
+            and self.update_cultural_knowledge
+        ):
+            log_debug("Creating cultural knowledge.")
+            await self.culture_manager.acreate_cultural_knowledge(
+                message=run_messages.user_message.get_content_string()
+            )
-    async def _amake_memories_cultural_knowledge_and_summaries(
+    def _make_memories(
         self,
-        run_response: RunOutput,
         run_messages: RunMessages,
-        session: AgentSession,
         user_id: Optional[str] = None,
-    ) -> AsyncIterator[RunOutputEvent]:
-        tasks: List[Any] = []
-        # Create user memories from single message
-        if run_messages.user_message is not None and self.memory_manager is not None and not self.enable_agentic_memory:
+    ):
+        user_message_str = (
+            run_messages.user_message.get_content_string() if run_messages.user_message is not None else None
+        )
+        if user_message_str is not None and user_message_str.strip() != "" and self.memory_manager is not None:
             log_debug("Creating user memories.")
-            tasks.append(
-                self.memory_manager.acreate_user_memories(
-                    message=run_messages.user_message.get_content_string(),
-                    user_id=user_id,
-                    agent_id=self.id,
-                )
+            self.memory_manager.create_user_memories(  # type: ignore
+                message=user_message_str,
+                user_id=user_id,
+                agent_id=self.id,
             )
-        # Parse messages if provided
-        if (
-            self.memory_manager is not None
-            and run_messages.extra_messages is not None
-            and len(run_messages.extra_messages) > 0
-        ):
+        if run_messages.extra_messages is not None and len(run_messages.extra_messages) > 0:
             parsed_messages = []
             for _im in run_messages.extra_messages:
                 if isinstance(_im, Message):
@@ -4732,54 +5042,59 @@ class Agent:
                     log_warning(f"Unsupported message type: {type(_im)}")
                     continue
-            if len(parsed_messages) > 0:
-                tasks.append(
-                    self.memory_manager.acreate_user_memories(
-                        messages=parsed_messages, user_id=user_id, agent_id=self.id
-                    )
-                )
+            # Filter out messages with empty content before passing to memory manager
+            non_empty_messages = [
+                msg
+                for msg in parsed_messages
+                if msg.content and (not isinstance(msg.content, str) or msg.content.strip() != "")
+            ]
+            if len(non_empty_messages) > 0 and self.memory_manager is not None:
+                self.memory_manager.create_user_memories(messages=non_empty_messages, user_id=user_id, agent_id=self.id)  # type: ignore
             else:
                 log_warning("Unable to add messages to memory")
-        # Create cultural knowledge
-        if (
-            run_messages.user_message is not None
-            and self.culture_manager is not None
-            and self.update_cultural_knowledge
-        ):
-            log_debug("Creating cultural knowledge.")
-            tasks.append(
-                self.culture_manager.acreate_cultural_knowledge(message=run_messages.user_message.get_content_string())
-            )
-        # Create session summary
-        if self.session_summary_manager is not None:
-            log_debug("Creating session summary.")
-            tasks.append(
-                self.session_summary_manager.acreate_session_summary(
-                    session=session,
-                )
+    async def _amake_memories(
+        self,
+        run_messages: RunMessages,
+        user_id: Optional[str] = None,
+    ):
+        user_message_str = (
+            run_messages.user_message.get_content_string() if run_messages.user_message is not None else None
+        )
+        if user_message_str is not None and user_message_str.strip() != "" and self.memory_manager is not None:
+            log_debug("Creating user memories.")
+            await self.memory_manager.acreate_user_memories(  # type: ignore
+                message=user_message_str,
+                user_id=user_id,
+                agent_id=self.id,
             )
-        if tasks:
-            if self.stream_intermediate_steps:
-                yield self._handle_event(
-                    create_memory_update_started_event(from_run_response=run_response),
-                    run_response,
-                )
-            # Execute all tasks concurrently and handle any errors
-            try:
-                await asyncio.gather(*tasks)
-            except Exception as e:
-                log_warning(f"Error in memory/summary operation: {str(e)}")
+        if run_messages.extra_messages is not None and len(run_messages.extra_messages) > 0:
+            parsed_messages = []
+            for _im in run_messages.extra_messages:
+                if isinstance(_im, Message):
+                    parsed_messages.append(_im)
+                elif isinstance(_im, dict):
+                    try:
+                        parsed_messages.append(Message(**_im))
+                    except Exception as e:
+                        log_warning(f"Failed to validate message during memory update: {e}")
+                else:
+                    log_warning(f"Unsupported message type: {type(_im)}")
+                    continue
-            if self.stream_intermediate_steps:
-                yield self._handle_event(
-                    create_memory_update_completed_event(from_run_response=run_response),
-                    run_response,
+            # Filter out messages with empty content before passing to memory manager
+            non_empty_messages = [
+                msg
+                for msg in parsed_messages
+                if msg.content and (not isinstance(msg.content, str) or msg.content.strip() != "")
+            ]
+            if len(non_empty_messages) > 0 and self.memory_manager is not None:
+                await self.memory_manager.acreate_user_memories(  # type: ignore
+                    messages=non_empty_messages, user_id=user_id, agent_id=self.id
                 )
+            else:
+                log_warning("Unable to add messages to memory")
     def _raise_if_async_tools(self) -> None:
         """Raise an exception if any tools contain async functions"""
@@ -4878,7 +5193,7 @@ class Agent:
             self._rebuild_tools = True
         if self.enable_agentic_state:
-            agent_tools.append(self.update_session_state)
+            agent_tools.append(Function(name="update_session_state", entrypoint=self._update_session_state_tool))
         # Add tools for accessing knowledge
         if self.knowledge is not None or self.knowledge_retriever is not None:
@@ -4976,7 +5291,7 @@ class Agent:
             self._rebuild_tools = True
         if self.enable_agentic_state:
-            agent_tools.append(self.update_session_state)
+            agent_tools.append(Function(name="update_session_state", entrypoint=self._update_session_state_tool))
         # Add tools for accessing knowledge
         if self.knowledge is not None or self.knowledge_retriever is not None:
@@ -5014,137 +5329,6 @@ class Agent:
         return agent_tools
-    def _collect_joint_images(
-        self,
-        run_input: Optional[RunInput] = None,
-        session: Optional[AgentSession] = None,
-    ) -> Optional[Sequence[Image]]:
-        """Collect images from input, session history, and current run response."""
-        joint_images: List[Image] = []
-        # 1. Add images from current input
-        if run_input and run_input.images:
-            joint_images.extend(run_input.images)
-            log_debug(f"Added {len(run_input.images)} input images to joint list")
-        # 2. Add images from session history (from both input and generated sources)
-        try:
-            if session and session.runs:
-                for historical_run in session.runs:
-                    # Add generated images from previous runs
-                    if historical_run.images:
-                        joint_images.extend(historical_run.images)
-                        log_debug(
-                            f"Added {len(historical_run.images)} generated images from historical run {historical_run.run_id}"
-                        )
-                    # Add input images from previous runs
-                    if historical_run.input and historical_run.input.images:
-                        joint_images.extend(historical_run.input.images)
-                        log_debug(
-                            f"Added {len(historical_run.input.images)} input images from historical run {historical_run.run_id}"
-                        )
-        except Exception as e:
-            log_debug(f"Could not access session history for images: {e}")
-        if joint_images:
-            log_debug(f"Images Available to Model: {len(joint_images)} images")
-        return joint_images if joint_images else None
-    def _collect_joint_videos(
-        self,
-        run_input: Optional[RunInput] = None,
-        session: Optional[AgentSession] = None,
-    ) -> Optional[Sequence[Video]]:
-        """Collect videos from input, session history, and current run response."""
-        joint_videos: List[Video] = []
-        # 1. Add videos from current input
-        if run_input and run_input.videos:
-            joint_videos.extend(run_input.videos)
-            log_debug(f"Added {len(run_input.videos)} input videos to joint list")
-        # 2. Add videos from session history (from both input and generated sources)
-        try:
-            if session and session.runs:
-                for historical_run in session.runs:
-                    # Add generated videos from previous runs
-                    if historical_run.videos:
-                        joint_videos.extend(historical_run.videos)
-                        log_debug(
-                            f"Added {len(historical_run.videos)} generated videos from historical run {historical_run.run_id}"
-                        )
-                    # Add input videos from previous runs
-                    if historical_run.input and historical_run.input.videos:
-                        joint_videos.extend(historical_run.input.videos)
-                        log_debug(
-                            f"Added {len(historical_run.input.videos)} input videos from historical run {historical_run.run_id}"
-                        )
-        except Exception as e:
-            log_debug(f"Could not access session history for videos: {e}")
-        if joint_videos:
-            log_debug(f"Videos Available to Model: {len(joint_videos)} videos")
-        return joint_videos if joint_videos else None
-    def _collect_joint_audios(
-        self,
-        run_input: Optional[RunInput] = None,
-        session: Optional[AgentSession] = None,
-    ) -> Optional[Sequence[Audio]]:
-        """Collect audios from input, session history, and current run response."""
-        joint_audios: List[Audio] = []
-        # 1. Add audios from current input
-        if run_input and run_input.audios:
-            joint_audios.extend(run_input.audios)
-            log_debug(f"Added {len(run_input.audios)} input audios to joint list")
-        # 2. Add audios from session history (from both input and generated sources)
-        try:
-            if session and session.runs:
-                for historical_run in session.runs:
-                    # Add generated audios from previous runs
-                    if historical_run.audio:
-                        joint_audios.extend(historical_run.audio)
-                        log_debug(
-                            f"Added {len(historical_run.audio)} generated audios from historical run {historical_run.run_id}"
-                        )
-                    # Add input audios from previous runs
-                    if historical_run.input and historical_run.input.audios:
-                        joint_audios.extend(historical_run.input.audios)
-                        log_debug(
-                            f"Added {len(historical_run.input.audios)} input audios from historical run {historical_run.run_id}"
-                        )
-        except Exception as e:
-            log_debug(f"Could not access session history for audios: {e}")
-        if joint_audios:
-            log_debug(f"Audios Available to Model: {len(joint_audios)} audios")
-        return joint_audios if joint_audios else None
-    def _collect_joint_files(
-        self,
-        run_input: Optional[RunInput] = None,
-    ) -> Optional[Sequence[File]]:
-        """Collect files from input and session history."""
-        from agno.utils.log import log_debug
-        joint_files: List[File] = []
-        # 1. Add files from current input
-        if run_input and run_input.files:
-            joint_files.extend(run_input.files)
-        # TODO: Files aren't stored in session history yet and dont have a FileArtifact
-        if joint_files:
-            log_debug(f"Files Available to Model: {len(joint_files)} files")
-        return joint_files if joint_files else None
     def _determine_tools_for_model(
         self,
         model: Model,
@@ -5254,10 +5438,10 @@ class Agent:
             )
             # Only collect media if functions actually need them
-            joint_images = self._collect_joint_images(run_response.input, session) if needs_media else None
-            joint_files = self._collect_joint_files(run_response.input) if needs_media else None
-            joint_audios = self._collect_joint_audios(run_response.input, session) if needs_media else None
-            joint_videos = self._collect_joint_videos(run_response.input, session) if needs_media else None
+            joint_images = collect_joint_images(run_response.input, session) if needs_media else None
+            joint_files = collect_joint_files(run_response.input) if needs_media else None
+            joint_audios = collect_joint_audios(run_response.input, session) if needs_media else None
+            joint_videos = collect_joint_videos(run_response.input, session) if needs_media else None
             for func in self._functions_for_model.values():
                 func._session_state = session_state
@@ -5376,10 +5560,10 @@ class Agent:
             )
             # Only collect media if functions actually need them
-            joint_images = self._collect_joint_images(run_response.input, session) if needs_media else None
-            joint_files = self._collect_joint_files(run_response.input) if needs_media else None
-            joint_audios = self._collect_joint_audios(run_response.input, session) if needs_media else None
-            joint_videos = self._collect_joint_videos(run_response.input, session) if needs_media else None
+            joint_images = collect_joint_images(run_response.input, session) if needs_media else None
+            joint_files = collect_joint_files(run_response.input) if needs_media else None
+            joint_audios = collect_joint_audios(run_response.input, session) if needs_media else None
+            joint_videos = collect_joint_videos(run_response.input, session) if needs_media else None
             for func in self._functions_for_model.values():
                 func._session_state = session_state
@@ -5488,17 +5672,21 @@ class Agent:
         return agent_data
     # -*- Session Database Functions
-    def _read_session(self, session_id: str) -> Optional[AgentSession]:
+    def _read_session(
+        self, session_id: str, session_type: SessionType = SessionType.AGENT
+    ) -> Optional[Union[AgentSession, TeamSession, WorkflowSession]]:
         """Get a Session from the database."""
         try:
             if not self.db:
                 raise ValueError("Db not initialized")
-            return self.db.get_session(session_id=session_id, session_type=SessionType.AGENT)  # type: ignore
+            return self.db.get_session(session_id=session_id, session_type=session_type)  # type: ignore
         except Exception as e:
             log_warning(f"Error getting session from db: {e}")
             return None
-    async def _aread_session(self, session_id: str) -> Optional[AgentSession]:
+    async def _aread_session(
+        self, session_id: str, session_type: SessionType = SessionType.AGENT
+    ) -> Optional[Union[AgentSession, TeamSession, WorkflowSession]]:
         """Get a Session from the database."""
         try:
             if not self.db:
@@ -5599,12 +5787,17 @@ class Agent:
         if agent_session is None:
             # Creating new session if none found
             log_debug(f"Creating new AgentSession: {session_id}")
+            session_data = {}
+            if self.session_state is not None:
+                from copy import deepcopy
+                session_data["session_state"] = deepcopy(self.session_state)
             agent_session = AgentSession(
                 session_id=session_id,
                 agent_id=self.id,
                 user_id=user_id,
                 agent_data=self._get_agent_data(),
-                session_data={},
+                session_data=session_data,
                 metadata=self.metadata,
                 created_at=int(time()),
             )
@@ -5629,18 +5822,25 @@ class Agent:
         agent_session = None
         if self.db is not None and self.team_id is None and self.workflow_id is None:
             log_debug(f"Reading AgentSession: {session_id}")
-            agent_session = cast(AgentSession, await self._aread_session(session_id=session_id))
+            if self._has_async_db():
+                agent_session = cast(AgentSession, await self._aread_session(session_id=session_id))
+            else:
+                agent_session = cast(AgentSession, self._read_session(session_id=session_id))
         if agent_session is None:
             # Creating new session if none found
             log_debug(f"Creating new AgentSession: {session_id}")
+            session_data = {}
+            if self.session_state is not None:
+                from copy import deepcopy
+                session_data["session_state"] = deepcopy(self.session_state)
             agent_session = AgentSession(
                 session_id=session_id,
                 agent_id=self.id,
                 user_id=user_id,
                 agent_data=self._get_agent_data(),
-                session_data={},
+                session_data=session_data,
                 metadata=self.metadata,
                 created_at=int(time()),
             )
@@ -5666,13 +5866,13 @@ class Agent:
                 log_warning(f"RunOutput {run_id} not found in AgentSession {self._agent_session.session_id}")
                 return None
         else:
-            agent_session = self.get_session(session_id=session_id)
-            if agent_session is not None:
-                run_response = agent_session.get_run(run_id=run_id)
+            session = self.get_session(session_id=session_id)
+            if session is not None:
+                run_response = session.get_run(run_id=run_id)
                 if run_response is not None:
                     return run_response
                 else:
-                    log_warning(f"RunOutput {run_id} not found in AgentSession {session_id}")
+                    log_warning(f"RunOutput {run_id} not found in Session {session_id}")
         return None
     def get_last_run_output(self, session_id: Optional[str] = None) -> Optional[RunOutput]:
@@ -5690,17 +5890,17 @@ class Agent:
             and self._agent_session.runs is not None
             and len(self._agent_session.runs) > 0
         ):
-            run_response = self._agent_session.runs[-1]
-            if run_response is not None:
-                return run_response
+            for run_output in reversed(self._agent_session.runs):
+                if hasattr(run_output, "agent_id") and run_output.agent_id == self.id:
+                    return run_output
         else:
-            agent_session = self.get_session(session_id=session_id)
-            if agent_session is not None and agent_session.runs is not None and len(agent_session.runs) > 0:
-                run_response = agent_session.runs[-1]
-                if run_response is not None:
-                    return run_response
+            session = self.get_session(session_id=session_id)
+            if session is not None and session.runs is not None and len(session.runs) > 0:
+                for run_output in reversed(session.runs):
+                    if hasattr(run_output, "agent_id") and run_output.agent_id == self.id:
+                        return run_output
             else:
-                log_warning(f"No run responses found in AgentSession {session_id}")
+                log_warning(f"No run responses found in Session {session_id}")
         return None
     def cancel_run(self, run_id: str) -> bool:
@@ -5738,7 +5938,65 @@ class Agent:
         # Load and return the session from the database
         if self.db is not None:
-            agent_session = cast(AgentSession, self._read_session(session_id=session_id_to_load))  # type: ignore
+            loaded_session = None
+            # We have a standalone agent, so we are loading an AgentSession
+            if self.team_id is None and self.workflow_id is None:
+                loaded_session = cast(
+                    AgentSession,
+                    self._read_session(session_id=session_id_to_load, session_type=SessionType.AGENT),  # type: ignore
+                )
+            # We have a team member agent, so we are loading a TeamSession
+            if loaded_session is None and self.team_id is not None:
+                # Load session for team member agents
+                loaded_session = cast(
+                    TeamSession,
+                    self._read_session(session_id=session_id_to_load, session_type=SessionType.TEAM),  # type: ignore
+                )
+            # We have a workflow member agent, so we are loading a WorkflowSession
+            if loaded_session is None and self.workflow_id is not None:
+                # Load session for workflow memberagents
+                loaded_session = cast(
+                    WorkflowSession,
+                    self._read_session(session_id=session_id_to_load, session_type=SessionType.WORKFLOW),  # type: ignore
+                )
+            # Cache the session if relevant
+            if loaded_session is not None and self.cache_session:
+                self._agent_session = loaded_session
+            return loaded_session
+        log_debug(f"Session {session_id_to_load} not found in db")
+        return None
+    async def aget_session(
+        self,
+        session_id: Optional[str] = None,
+    ) -> Optional[AgentSession]:
+        """Load an AgentSession from database or cache.
+        Args:
+            session_id: The session_id to load from storage.
+        Returns:
+            AgentSession: The AgentSession loaded from the database/cache or None if not found.
+        """
+        if not session_id and not self.session_id:
+            raise Exception("No session_id provided")
+        session_id_to_load = session_id or self.session_id
+        # If there is a cached session, return it
+        if self.cache_session and hasattr(self, "_agent_session") and self._agent_session is not None:
+            if self._agent_session.session_id == session_id_to_load:
+                return self._agent_session
+        # Load and return the session from the database
+        if self.db is not None:
+            agent_session = cast(AgentSession, await self._aread_session(session_id=session_id_to_load))  # type: ignore
             # Cache the session if relevant
             if agent_session is not None and self.cache_session:
@@ -5787,8 +6045,10 @@ class Agent:
                 session.session_data["session_state"].pop("current_session_id", None)
                 session.session_data["session_state"].pop("current_user_id", None)
                 session.session_data["session_state"].pop("current_run_id", None)
-            await self._aupsert_session(session=session)
+            if self._has_async_db():
+                await self._aupsert_session(session=session)
+            else:
+                self._upsert_session(session=session)
             log_debug(f"Created or updated AgentSession record: {session.session_id}")
     def get_chat_history(self, session_id: Optional[str] = None) -> List[Message]:
@@ -5918,6 +6178,61 @@ class Agent:
             raise Exception("Session not found")
         return session.session_data.get("session_state", {}) if session.session_data is not None else {}
+    def update_session_state(self, session_state_updates: Dict[str, Any], session_id: Optional[str] = None) -> str:
+        """
+        Update the session state for the given session ID and user ID.
+        Args:
+            session_state_updates: The updates to apply to the session state. Should be a dictionary of key-value pairs.
+            session_id: The session ID to update. If not provided, the current cached session ID is used.
+        Returns:
+            dict: The updated session state.
+        """
+        session_id = session_id or self.session_id
+        if session_id is None:
+            raise Exception("Session ID is not set")
+        session = self.get_session(session_id=session_id)  # type: ignore
+        if session is None:
+            raise Exception("Session not found")
+        if session.session_data is not None and "session_state" not in session.session_data:
+            session.session_data["session_state"] = {}
+        # Overwrite the loaded DB session state with the new session state
+        for key, value in session_state_updates.items():
+            session.session_data["session_state"][key] = value  # type: ignore
+        self.save_session(session=session)
+        return session.session_data["session_state"]  # type: ignore
+    async def aupdate_session_state(
+        self, session_state_updates: Dict[str, Any], session_id: Optional[str] = None
+    ) -> str:
+        """
+        Update the session state for the given session ID and user ID.
+        Args:
+            session_state_updates: The updates to apply to the session state. Should be a dictionary of key-value pairs.
+            session_id: The session ID to update. If not provided, the current cached session ID is used.
+        Returns:
+            dict: The updated session state.
+        """
+        session_id = session_id or self.session_id
+        if session_id is None:
+            raise Exception("Session ID is not set")
+        session = await self.aget_session(session_id=session_id)  # type: ignore
+        if session is None:
+            raise Exception("Session not found")
+        if session.session_data is not None and "session_state" not in session.session_data:
+            session.session_data["session_state"] = {}
+        for key, value in session_state_updates.items():
+            session.session_data["session_state"][key] = value  # type: ignore
+        await self.asave_session(session=session)
+        return session.session_data["session_state"]  # type: ignore
     def get_session_metrics(self, session_id: Optional[str] = None) -> Optional[Metrics]:
         """Get the session metrics for the given session ID and user ID."""
         session_id = session_id or self.session_id
@@ -6377,7 +6692,7 @@ class Agent:
             system_message_content += f"{get_response_model_format_prompt(self.output_schema)}"
         # 3.3.15 Add the session state to the system message
-        if self.add_session_state_to_context and session_state is not None:
+        if add_session_state_to_context and session_state is not None:
             system_message_content += f"\n<session_state>\n{session_state}\n</session_state>\n\n"
         # Return the system message
@@ -7013,6 +7328,10 @@ class Agent:
                 for _msg in history_copy:
                     _msg.from_history = True
+                # Filter tool calls from history if limit is set (before adding to run_messages)
+                if self.max_tool_calls_from_history is not None:
+                    filter_tool_calls(history_copy, self.max_tool_calls_from_history)
                 log_debug(f"Adding {len(history_copy)} messages from history")
                 run_messages.messages += history_copy
@@ -7206,6 +7525,10 @@ class Agent:
                 for _msg in history_copy:
                     _msg.from_history = True
+                # Filter tool calls from history if limit is set (before adding to run_messages)
+                if self.max_tool_calls_from_history is not None:
+                    filter_tool_calls(history_copy, self.max_tool_calls_from_history)
                 log_debug(f"Adding {len(history_copy)} messages from history")
                 run_messages.messages += history_copy
@@ -7750,28 +8073,40 @@ class Agent:
     def _handle_reasoning(self, run_response: RunOutput, run_messages: RunMessages) -> None:
         if self.reasoning or self.reasoning_model is not None:
-            reasoning_generator = self._reason(run_response=run_response, run_messages=run_messages)
+            reasoning_generator = self._reason(
+                run_response=run_response, run_messages=run_messages, stream_events=False
+            )
             # Consume the generator without yielding
             deque(reasoning_generator, maxlen=0)
-    def _handle_reasoning_stream(self, run_response: RunOutput, run_messages: RunMessages) -> Iterator[RunOutputEvent]:
+    def _handle_reasoning_stream(
+        self, run_response: RunOutput, run_messages: RunMessages, stream_events: Optional[bool] = None
+    ) -> Iterator[RunOutputEvent]:
         if self.reasoning or self.reasoning_model is not None:
-            reasoning_generator = self._reason(run_response=run_response, run_messages=run_messages)
+            reasoning_generator = self._reason(
+                run_response=run_response,
+                run_messages=run_messages,
+                stream_events=stream_events,
+            )
             yield from reasoning_generator
     async def _ahandle_reasoning(self, run_response: RunOutput, run_messages: RunMessages) -> None:
         if self.reasoning or self.reasoning_model is not None:
-            reason_generator = self._areason(run_response=run_response, run_messages=run_messages)
+            reason_generator = self._areason(run_response=run_response, run_messages=run_messages, stream_events=False)
             # Consume the generator without yielding
             async for _ in reason_generator:
                 pass
     async def _ahandle_reasoning_stream(
-        self, run_response: RunOutput, run_messages: RunMessages
+        self, run_response: RunOutput, run_messages: RunMessages, stream_events: Optional[bool] = None
     ) -> AsyncIterator[RunOutputEvent]:
         if self.reasoning or self.reasoning_model is not None:
-            reason_generator = self._areason(run_response=run_response, run_messages=run_messages)
+            reason_generator = self._areason(
+                run_response=run_response,
+                run_messages=run_messages,
+                stream_events=stream_events,
+            )
             async for item in reason_generator:
                 yield item
@@ -7798,12 +8133,16 @@ class Agent:
         return updated_reasoning_content
-    def _reason(self, run_response: RunOutput, run_messages: RunMessages) -> Iterator[RunOutputEvent]:
+    def _reason(
+        self, run_response: RunOutput, run_messages: RunMessages, stream_events: Optional[bool] = None
+    ) -> Iterator[RunOutputEvent]:
         # Yield a reasoning started event
-        if self.stream_intermediate_steps:
-            yield self._handle_event(
+        if stream_events:
+            yield handle_event(  # type: ignore
                 create_reasoning_started_event(from_run_response=run_response),
                 run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
         use_default_reasoning = False
@@ -7935,14 +8274,16 @@ class Agent:
                     reasoning_steps=[ReasoningStep(result=reasoning_message.content)],
                     reasoning_agent_messages=[reasoning_message],
                 )
-                if self.stream_intermediate_steps:
-                    yield self._handle_event(
+                if stream_events:
+                    yield handle_event(  # type: ignore
                         create_reasoning_completed_event(
                             from_run_response=run_response,
                             content=ReasoningSteps(reasoning_steps=[ReasoningStep(result=reasoning_message.content)]),
                             content_type=ReasoningSteps.__name__,
                         ),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
             else:
                 log_warning(
@@ -8011,7 +8352,7 @@ class Agent:
                         )
                         break
-                    if (
+                    if reasoning_agent_response.content is not None and (
                         reasoning_agent_response.content.reasoning_steps is None
                         or len(reasoning_agent_response.content.reasoning_steps) == 0
                     ):
@@ -8021,20 +8362,22 @@ class Agent:
                     reasoning_steps: List[ReasoningStep] = reasoning_agent_response.content.reasoning_steps
                     all_reasoning_steps.extend(reasoning_steps)
                     # Yield reasoning steps
-                    if self.stream_intermediate_steps:
+                    if stream_events:
                         for reasoning_step in reasoning_steps:
                             updated_reasoning_content = self._format_reasoning_step_content(
                                 run_response=run_response,
                                 reasoning_step=reasoning_step,
                             )
-                            yield self._handle_event(
+                            yield handle_event(  # type: ignore
                                 create_reasoning_step_event(
                                     from_run_response=run_response,
                                     reasoning_step=reasoning_step,
                                     reasoning_content=updated_reasoning_content,
                                 ),
                                 run_response,
+                                events_to_skip=self.events_to_skip,  # type: ignore
+                                store_events=self.store_events,
                             )
                     # Find the index of the first assistant message
@@ -8071,22 +8414,28 @@ class Agent:
             )
             # Yield the final reasoning completed event
-            if self.stream_intermediate_steps:
-                yield self._handle_event(
+            if stream_events:
+                yield handle_event(  # type: ignore
                     create_reasoning_completed_event(
                         from_run_response=run_response,
                         content=ReasoningSteps(reasoning_steps=all_reasoning_steps),
                         content_type=ReasoningSteps.__name__,
                     ),
                     run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
                 )
-    async def _areason(self, run_response: RunOutput, run_messages: RunMessages) -> Any:
+    async def _areason(
+        self, run_response: RunOutput, run_messages: RunMessages, stream_events: Optional[bool] = None
+    ) -> Any:
         # Yield a reasoning started event
-        if self.stream_intermediate_steps:
-            yield self._handle_event(
+        if stream_events:
+            yield handle_event(  # type: ignore
                 create_reasoning_started_event(from_run_response=run_response),
                 run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
             )
         use_default_reasoning = False
@@ -8218,14 +8567,16 @@ class Agent:
                     reasoning_steps=[ReasoningStep(result=reasoning_message.content)],
                     reasoning_agent_messages=[reasoning_message],
                 )
-                if self.stream_intermediate_steps:
-                    yield self._handle_event(
+                if stream_events:
+                    yield handle_event(
                         create_reasoning_completed_event(
                             from_run_response=run_response,
                             content=ReasoningSteps(reasoning_steps=[ReasoningStep(result=reasoning_message.content)]),
                             content_type=ReasoningSteps.__name__,
                         ),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
             else:
                 log_warning(
@@ -8304,7 +8655,7 @@ class Agent:
                     reasoning_steps: List[ReasoningStep] = reasoning_agent_response.content.reasoning_steps
                     all_reasoning_steps.extend(reasoning_steps)
                     # Yield reasoning steps
-                    if self.stream_intermediate_steps:
+                    if stream_events:
                         for reasoning_step in reasoning_steps:
                             updated_reasoning_content = self._format_reasoning_step_content(
                                 run_response=run_response,
@@ -8312,13 +8663,15 @@ class Agent:
                             )
                             # Yield the response with the updated reasoning_content
-                            yield self._handle_event(
+                            yield handle_event(
                                 create_reasoning_step_event(
                                     from_run_response=run_response,
                                     reasoning_step=reasoning_step,
                                     reasoning_content=updated_reasoning_content,
                                 ),
                                 run_response,
+                                events_to_skip=self.events_to_skip,  # type: ignore
+                                store_events=self.store_events,
                             )
                     # Find the index of the first assistant message
@@ -8354,14 +8707,16 @@ class Agent:
             )
             # Yield the final reasoning completed event
-            if self.stream_intermediate_steps:
-                yield self._handle_event(
+            if stream_events:
+                yield handle_event(
                     create_reasoning_completed_event(
                         from_run_response=run_response,
                         content=ReasoningSteps(reasoning_steps=all_reasoning_steps),
                         content_type=ReasoningSteps.__name__,
                     ),
                     run_response,
+                    events_to_skip=self.events_to_skip,  # type: ignore
+                    store_events=self.store_events,
                 )
     def _process_parser_response(
@@ -8430,18 +8785,17 @@ class Agent:
             log_warning("A response model is required to parse the response with a parser model")
     def _parse_response_with_parser_model_stream(
-        self,
-        session: AgentSession,
-        run_response: RunOutput,
-        stream_intermediate_steps: bool = True,
+        self, session: AgentSession, run_response: RunOutput, stream_events: bool = True
     ):
         """Parse the model response using the parser model"""
         if self.parser_model is not None:
             if self.output_schema is not None:
-                if stream_intermediate_steps:
-                    yield self._handle_event(
+                if stream_events:
+                    yield handle_event(
                         create_parser_model_response_started_event(run_response),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
                 parser_model_response = ModelResponse(content="")
@@ -8460,7 +8814,7 @@ class Agent:
                         model_response=parser_model_response,
                         model_response_event=model_response_event,
                         parse_structured_output=True,
-                        stream_intermediate_steps=stream_intermediate_steps,
+                        stream_events=stream_events,
                     )
                 parser_model_response_message: Optional[Message] = None
@@ -8474,28 +8828,29 @@ class Agent:
                 else:
                     log_warning("Unable to parse response with parser model")
-                if stream_intermediate_steps:
-                    yield self._handle_event(
+                if stream_events:
+                    yield handle_event(
                         create_parser_model_response_completed_event(run_response),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
             else:
                 log_warning("A response model is required to parse the response with a parser model")
     async def _aparse_response_with_parser_model_stream(
-        self,
-        session: AgentSession,
-        run_response: RunOutput,
-        stream_intermediate_steps: bool = True,
+        self, session: AgentSession, run_response: RunOutput, stream_events: bool = True
     ):
         """Parse the model response using the parser model stream."""
         if self.parser_model is not None:
             if self.output_schema is not None:
-                if stream_intermediate_steps:
-                    yield self._handle_event(
+                if stream_events:
+                    yield handle_event(
                         create_parser_model_response_started_event(run_response),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
                 parser_model_response = ModelResponse(content="")
@@ -8515,7 +8870,7 @@ class Agent:
                         model_response=parser_model_response,
                         model_response_event=model_response_event,
                         parse_structured_output=True,
-                        stream_intermediate_steps=stream_intermediate_steps,
+                        stream_events=stream_events,
                     ):
                         yield event
@@ -8530,10 +8885,12 @@ class Agent:
                 else:
                     log_warning("Unable to parse response with parser model")
-                if stream_intermediate_steps:
-                    yield self._handle_event(
+                if stream_events:
+                    yield handle_event(
                         create_parser_model_response_completed_event(run_response),
                         run_response,
+                        events_to_skip=self.events_to_skip,  # type: ignore
+                        store_events=self.store_events,
                     )
             else:
                 log_warning("A response model is required to parse the response with a parser model")
@@ -8552,7 +8909,7 @@ class Agent:
         session: AgentSession,
         run_response: RunOutput,
         run_messages: RunMessages,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
     ):
         """Parse the model response using the output model."""
         from agno.utils.events import (
@@ -8563,8 +8920,13 @@ class Agent:
         if self.output_model is None:
             return
-        if stream_intermediate_steps:
-            yield self._handle_event(create_output_model_response_started_event(run_response), run_response)
+        if stream_events:
+            yield handle_event(
+                create_output_model_response_started_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
         messages_for_output_model = self._get_messages_for_output_model(run_messages.messages)
@@ -8576,11 +8938,16 @@ class Agent:
                 run_response=run_response,
                 model_response=model_response,
                 model_response_event=model_response_event,
-                stream_intermediate_steps=stream_intermediate_steps,
+                stream_events=stream_events,
             )
-        if stream_intermediate_steps:
-            yield self._handle_event(create_output_model_response_completed_event(run_response), run_response)
+        if stream_events:
+            yield handle_event(
+                create_output_model_response_completed_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
         # Build a list of messages that should be added to the RunResponse
         messages_for_run_response = [m for m in run_messages.messages if m.add_to_agent_memory]
@@ -8603,7 +8970,7 @@ class Agent:
         session: AgentSession,
         run_response: RunOutput,
         run_messages: RunMessages,
-        stream_intermediate_steps: bool = False,
+        stream_events: bool = False,
     ):
         """Parse the model response using the output model."""
         from agno.utils.events import (
@@ -8614,8 +8981,13 @@ class Agent:
         if self.output_model is None:
             return
-        if stream_intermediate_steps:
-            yield self._handle_event(create_output_model_response_started_event(run_response), run_response)
+        if stream_events:
+            yield handle_event(
+                create_output_model_response_started_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
         messages_for_output_model = self._get_messages_for_output_model(run_messages.messages)
@@ -8629,12 +9001,17 @@ class Agent:
                 run_response=run_response,
                 model_response=model_response,
                 model_response_event=model_response_event,
-                stream_intermediate_steps=stream_intermediate_steps,
+                stream_events=stream_events,
             ):
                 yield event
-        if stream_intermediate_steps:
-            yield self._handle_event(create_output_model_response_completed_event(run_response), run_response)
+        if stream_events:
+            yield handle_event(
+                create_output_model_response_completed_event(run_response),
+                run_response,
+                events_to_skip=self.events_to_skip,  # type: ignore
+                store_events=self.store_events,
+            )
         # Build a list of messages that should be added to the RunResponse
         messages_for_run_response = [m for m in run_messages.messages if m.add_to_agent_memory]
@@ -8643,15 +9020,6 @@ class Agent:
         # Update the RunResponse metrics
         run_response.metrics = self._calculate_run_metrics(messages_for_run_response)
-    def _handle_event(self, event: RunOutputEvent, run_response: RunOutput):
-        # We only store events that are not run_response_content events
-        events_to_skip = [event.value for event in self.events_to_skip] if self.events_to_skip else []
-        if self.store_events and event.event not in events_to_skip:
-            if run_response.events is None:
-                run_response.events = []
-            run_response.events.append(event)
-        return event
     ###########################################################################
     # Default Tools
     ###########################################################################
@@ -8779,7 +9147,7 @@ class Agent:
         return get_tool_call_history
-    def update_session_state(self, session_state, session_state_updates: dict) -> str:
+    def _update_session_state_tool(self, session_state, session_state_updates: dict) -> str:
         """
         Update the shared session state.  Provide any updates as a dictionary of key-value pairs.
         Example:
@@ -9125,6 +9493,7 @@ class Agent:
         videos: Optional[Sequence[Video]] = None,
         files: Optional[Sequence[File]] = None,
         stream: Optional[bool] = None,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         markdown: Optional[bool] = None,
         knowledge_filters: Optional[Dict[str, Any]] = None,
@@ -9156,11 +9525,19 @@ class Agent:
         if self.output_schema is not None:
             markdown = False
+        # Use stream override value when necessary
         if stream is None:
-            stream = self.stream or False
+            stream = False if self.stream is None else self.stream
+        # Considering both stream_events and stream_intermediate_steps (deprecated)
+        stream_events = stream_events or stream_intermediate_steps
+        # Can't stream events if streaming is disabled
+        if stream is False:
+            stream_events = False
-        if stream_intermediate_steps is None:
-            stream_intermediate_steps = self.stream_intermediate_steps or False
+        if stream_events is None:
+            stream_events = False if self.stream_events is None else self.stream_events
         if stream:
             print_response_stream(
@@ -9173,7 +9550,7 @@ class Agent:
                 images=images,
                 videos=videos,
                 files=files,
-                stream_intermediate_steps=stream_intermediate_steps,
+                stream_events=stream_events,
                 knowledge_filters=knowledge_filters,
                 debug_mode=debug_mode,
                 markdown=markdown,
@@ -9201,7 +9578,7 @@ class Agent:
                 images=images,
                 videos=videos,
                 files=files,
-                stream_intermediate_steps=stream_intermediate_steps,
+                stream_events=stream_events,
                 knowledge_filters=knowledge_filters,
                 debug_mode=debug_mode,
                 markdown=markdown,
@@ -9230,6 +9607,7 @@ class Agent:
         videos: Optional[Sequence[Video]] = None,
         files: Optional[Sequence[File]] = None,
         stream: Optional[bool] = None,
+        stream_events: Optional[bool] = None,
         stream_intermediate_steps: Optional[bool] = None,
         markdown: Optional[bool] = None,
         knowledge_filters: Optional[Dict[str, Any]] = None,
@@ -9259,8 +9637,15 @@ class Agent:
         if stream is None:
             stream = self.stream or False
-        if stream_intermediate_steps is None:
-            stream_intermediate_steps = self.stream_intermediate_steps or False
+        # Considering both stream_events and stream_intermediate_steps (deprecated)
+        stream_events = stream_events or stream_intermediate_steps
+        # Can't stream events if streaming is disabled
+        if stream is False:
+            stream_events = False
+        if stream_events is None:
+            stream_events = False if self.stream_events is None else self.stream_events
         if stream:
             await aprint_response_stream(
@@ -9273,7 +9658,7 @@ class Agent:
                 images=images,
                 videos=videos,
                 files=files,
-                stream_intermediate_steps=stream_intermediate_steps,
+                stream_events=stream_events,
                 knowledge_filters=knowledge_filters,
                 debug_mode=debug_mode,
                 markdown=markdown,
@@ -9300,7 +9685,6 @@ class Agent:
                 images=images,
                 videos=videos,
                 files=files,
-                stream_intermediate_steps=stream_intermediate_steps,
                 knowledge_filters=knowledge_filters,
                 debug_mode=debug_mode,
                 markdown=markdown,
@@ -9436,108 +9820,70 @@ class Agent:
         return effective_filters
-    def _scrub_media_from_run_output(self, run_response: RunOutput) -> None:
-        """
-        Completely remove all media from RunOutput when store_media=False.
-        This includes media in input, output artifacts, and all messages.
-        """
-        # 1. Scrub RunInput media
-        if run_response.input is not None:
-            run_response.input.images = []
-            run_response.input.videos = []
-            run_response.input.audios = []
-            run_response.input.files = []
-        # 3. Scrub media from all messages
-        if run_response.messages:
-            for message in run_response.messages:
-                self._scrub_media_from_message(message)
-        # 4. Scrub media from additional_input messages if any
-        if run_response.additional_input:
-            for message in run_response.additional_input:
-                self._scrub_media_from_message(message)
-        # 5. Scrub media from reasoning_messages if any
-        if run_response.reasoning_messages:
-            for message in run_response.reasoning_messages:
-                self._scrub_media_from_message(message)
-    def _scrub_media_from_message(self, message: Message) -> None:
-        """Remove all media from a Message object."""
-        # Input media
-        message.images = None
-        message.videos = None
-        message.audio = None
-        message.files = None
-        # Output media
-        message.audio_output = None
-        message.image_output = None
-        message.video_output = None
-    def _scrub_tool_results_from_run_output(self, run_response: RunOutput) -> None:
-        """
-        Remove all tool-related data from RunOutput when store_tool_messages=False.
-        This removes both the tool call and its corresponding result to maintain API consistency.
-        """
-        if not run_response.messages:
-            return
+    def _cleanup_and_store(self, run_response: RunOutput, session: AgentSession, user_id: Optional[str] = None) -> None:
+        #  Scrub the stored run based on storage flags
+        self._scrub_run_output_for_storage(run_response)
-        # Step 1: Collect all tool_call_ids from tool result messages
-        tool_call_ids_to_remove = set()
-        for message in run_response.messages:
-            if message.role == "tool" and message.tool_call_id:
-                tool_call_ids_to_remove.add(message.tool_call_id)
-        # Step 2: Remove tool result messages (role="tool")
-        run_response.messages = [msg for msg in run_response.messages if msg.role != "tool"]
-        # Step 3: Remove the assistant messages related to the scrubbed tool calls
-        filtered_messages = []
-        for message in run_response.messages:
-            # Check if this assistant message made any of the tool calls we're removing
-            should_remove = False
-            if message.role == "assistant" and message.tool_calls:
-                for tool_call in message.tool_calls:
-                    if tool_call.get("id") in tool_call_ids_to_remove:
-                        should_remove = True
-                        break
+        # Stop the timer for the Run duration
+        if run_response.metrics:
+            run_response.metrics.stop_timer()
-            if not should_remove:
-                filtered_messages.append(message)
+        # Optional: Save output to file if save_response_to_file is set
+        self.save_run_response_to_file(
+            run_response=run_response,
+            input=run_response.input.input_content_string() if run_response.input else "",
+            session_id=session.session_id,
+            user_id=user_id,
+        )
-        run_response.messages = filtered_messages
+        # Add RunOutput to Agent Session
+        session.upsert_run(run=run_response)
-    def _scrub_history_messages_from_run_output(self, run_response: RunOutput) -> None:
-        """
-        Remove all history messages from RunOutput when store_history_messages=False.
-        This removes messages that were loaded from the agent's memory.
-        """
-        # Remove messages with from_history=True
-        if run_response.messages:
-            run_response.messages = [msg for msg in run_response.messages if not msg.from_history]
+        # Calculate session metrics
+        self._update_session_metrics(session=session, run_response=run_response)
+        # Save session to memory
+        self.save_session(session=session)
+    async def _acleanup_and_store(
+        self, run_response: RunOutput, session: AgentSession, user_id: Optional[str] = None
+    ) -> None:
+        #  Scrub the stored run based on storage flags
+        self._scrub_run_output_for_storage(run_response)
+        # Stop the timer for the Run duration
+        if run_response.metrics:
+            run_response.metrics.stop_timer()
+        # Optional: Save output to file if save_response_to_file is set
+        self.save_run_response_to_file(
+            run_response=run_response,
+            input=run_response.input.input_content_string() if run_response.input else "",
+            session_id=session.session_id,
+            user_id=user_id,
+        )
-    def _scrub_run_output_for_storage(self, run_response: RunOutput) -> bool:
+        # Add RunOutput to Agent Session
+        session.upsert_run(run=run_response)
+        # Calculate session metrics
+        self._update_session_metrics(session=session, run_response=run_response)
+        # Save session to storage
+        await self.asave_session(session=session)
+    def _scrub_run_output_for_storage(self, run_response: RunOutput) -> None:
         """
         Scrub run output based on storage flags before persisting to database.
-        Returns True if any scrubbing was done, False otherwise.
         """
-        scrubbed = False
         if not self.store_media:
-            self._scrub_media_from_run_output(run_response)
-            scrubbed = True
+            scrub_media_from_run_output(run_response)
         if not self.store_tool_messages:
-            self._scrub_tool_results_from_run_output(run_response)
-            scrubbed = True
+            scrub_tool_results_from_run_output(run_response)
         if not self.store_history_messages:
-            self._scrub_history_messages_from_run_output(run_response)
-            scrubbed = True
-        return scrubbed
+            scrub_history_messages_from_run_output(run_response)
     def _validate_media_object_id(
         self,

agno 2.1.10__py3-none-any.whl → 2.2.1__py3-none-any.whl

agno 2.1.10py3-none-any.whl → 2.2.1py3-none-any.whl