PyPI - agent-framework-core - Versions diffs - 1.2.1__tar.gz → 1.2.2__tar.gz - Mend

agent-framework-core 1.2.1tar.gz → 1.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agent-framework-core
-Version: 1.2.1
+Version: 1.2.2
 Summary: Microsoft Agent Framework for building AI Agents with Python. This is the core package that has all the core abstractions and implementations.
 Author-email: Microsoft <af-support@microsoft.com>
 Requires-Python: >=3.10

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/_types.py RENAMED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import base64
+import contextlib
 import json
 import logging
 import re
@@ -2890,6 +2891,7 @@ class ResponseStream(AsyncIterable[UpdateT], Generic[UpdateT, FinalT]):
         self._inner_stream_source: ResponseStream[Any, Any] | Awaitable[ResponseStream[Any, Any]] | None = None
         self._wrap_inner: bool = False
         self._map_update: Callable[[Any], UpdateT | Awaitable[UpdateT]] | None = None
+        self._pull_context_manager_factories: list[Callable[[], contextlib.AbstractContextManager[Any]]] = []
     def map(
         self,
@@ -3008,11 +3010,18 @@ class ResponseStream(AsyncIterable[UpdateT], Generic[UpdateT, FinalT]):
         return self
     async def __anext__(self) -> UpdateT:
-        if self._iterator is None:
-            stream = await self._get_stream()
-            self._iterator = stream.__aiter__()
         try:
-            update: UpdateT = await self._iterator.__anext__()
+            with contextlib.ExitStack() as stack:
+                for factory in self._pull_context_manager_factories:
+                    stack.enter_context(factory())
+                # Resolve the underlying stream inside the pull contexts so that any
+                # spans/contexts created during stream resolution (e.g. inner chat
+                # completion spans created on the first pull of a wrapped agent stream)
+                # inherit the active context (e.g. an outer agent invoke span).
+                if self._iterator is None:
+                    stream = await self._get_stream()
+                    self._iterator = stream.__aiter__()
+                update: UpdateT = await self._iterator.__anext__()
         except StopAsyncIteration:
             self._consumed = True
             await self._run_cleanup_hooks()
@@ -3038,9 +3047,25 @@ class ResponseStream(AsyncIterable[UpdateT], Generic[UpdateT, FinalT]):
                 update = hooked
         return update
+    async def _resolve_stream_with_pull_contexts(self) -> AsyncIterable[UpdateT]:
+        """Resolve the underlying stream while activating any registered pull context managers.
+        Used by ``__await__`` and ``get_final_response`` so that any spans/contexts created
+        during stream resolution (e.g. when the source is an Awaitable that internally
+        creates child telemetry spans) inherit the same active context as iterator pulls.
+        ``__anext__`` resolves the stream inside its own ExitStack and so calls ``_get_stream``
+        directly.
+        """
+        if self._stream is not None:
+            return await self._get_stream()
+        with contextlib.ExitStack() as stack:
+            for factory in self._pull_context_manager_factories:
+                stack.enter_context(factory())
+            return await self._get_stream()
     def __await__(self) -> Any:
         async def _wrap() -> ResponseStream[UpdateT, FinalT]:
-            await self._get_stream()
+            await self._resolve_stream_with_pull_contexts()
             return self
         return _wrap().__await__()
@@ -3064,10 +3089,12 @@ class ResponseStream(AsyncIterable[UpdateT], Generic[UpdateT, FinalT]):
         """
         if self._wrap_inner:
             if self._inner_stream is None:
-                # Use _get_stream() to resolve the awaitable - this properly handles
+                # Use _resolve_stream_with_pull_contexts() so that any spans/contexts
+                # created while resolving the awaitable (e.g. inner telemetry spans)
+                # inherit the same active context as iterator pulls. This also handles
                 # the case where _stream_source and _inner_stream_source are the same
                 # coroutine (e.g., from from_awaitable), avoiding double-await errors.
-                await self._get_stream()
+                await self._resolve_stream_with_pull_contexts()
             if self._inner_stream is None:
                 raise RuntimeError("Inner stream not available")
             if not self._finalized and not self._consumed:
@@ -3177,6 +3204,25 @@ class ResponseStream(AsyncIterable[UpdateT], Generic[UpdateT, FinalT]):
         self._cleanup_hooks.append(hook)
         return self
+    def with_pull_context_manager(
+        self,
+        cm_factory: Callable[[], contextlib.AbstractContextManager[Any]],
+    ) -> ResponseStream[UpdateT, FinalT]:
+        """Register a context manager factory invoked around each underlying iterator pull.
+        The factory is called once per ``__anext__`` and the returned context manager wraps
+        the await of the underlying iterator. This is useful for state that needs to be
+        active while the inner async work runs - for example, attaching an OpenTelemetry
+        span to the current context so child spans created by inner code (HTTP clients,
+        tool execution) are correctly parented.
+        Because the context manager is entered and exited within the same ``__anext__``
+        invocation, attach/detach style operations remain symmetric in the same async
+        context regardless of where the stream is iterated.
+        """
+        self._pull_context_manager_factories.append(cm_factory)
+        return self
     async def _run_cleanup_hooks(self) -> None:
         if self._cleanup_run:
             return

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/_workflows/_agent.py RENAMED Viewed

@@ -437,6 +437,13 @@ class WorkflowAgent(BaseAgent):
                     yield event
         elif checkpoint_id is not None:
+            # Restore the prior workflow state from the checkpoint. Shared
+            # state (e.g. accumulated conversation history maintained by the
+            # workflow's executors) survives across turns because Workflow.run
+            # no longer wipes state per call. Callers who want to deliver a
+            # new user message after restore should make a second
+            # `workflow.run(message=...)` call - they are NOT mutually
+            # exclusive on the same instance, but each must be its own call.
             if streaming:
                 async for event in self.workflow.run(
                     stream=True,
@@ -528,6 +535,7 @@ class WorkflowAgent(BaseAgent):
                 raw_representations.append(output_event)
             else:
                 data = output_event.data
                 if isinstance(data, AgentResponseUpdate):
                     # We cannot support AgentResponseUpdate in non-streaming mode. This is because the message
                     # sequence cannot be guaranteed when there are streaming updates in between non-streaming
@@ -628,16 +636,23 @@ class WorkflowAgent(BaseAgent):
             A list of AgentResponseUpdate objects. Empty list if the event is not relevant.
         """
         if event.type == "output":
-            # Convert workflow output to agent response updates.
-            # Handle different data types appropriately.
             data = event.data
             executor_id = event.executor_id
             if isinstance(data, AgentResponseUpdate):
-                # Pass through AgentResponseUpdate directly (streaming from AgentExecutor)
-                if not data.author_name:
-                    data.author_name = executor_id
-                return [data]
+                # Construct a fresh AgentResponseUpdate so we don't mutate a payload
+                # that AgentExecutor still holds a reference to in its `updates` list.
+                return [
+                    AgentResponseUpdate(
+                        contents=list(data.contents),
+                        role=data.role,
+                        author_name=data.author_name or executor_id,
+                        response_id=data.response_id,
+                        message_id=data.message_id,
+                        created_at=data.created_at,
+                        raw_representation=data.raw_representation,
+                    )
+                ]
             if isinstance(data, AgentResponse):
                 # Convert each message in AgentResponse to an AgentResponseUpdate
                 updates: list[AgentResponseUpdate] = []

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/_workflows/_agent_executor.py RENAMED Viewed

@@ -156,8 +156,9 @@ class AgentExecutor(Executor):
                    the agent run.
                 - "custom": use the provided context_filter function to determine which messages to include
                    as context for the agent run.
-            context_filter: An optional function for filtering conversation context when context_mode is set
-                to "custom".
+            context_filter: A function that takes the full conversation (list of Messages) as input and returns
+                a filtered list of Messages to be used as context for the agent run. This is required
+                if context_mode is set to "custom".
         """
         # Prefer provided id; else use agent.name if present; else generate deterministic prefix
         exec_id = id or resolve_agent_id(agent)

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/_workflows/_runner.py RENAMED Viewed

@@ -278,7 +278,12 @@ class Runner:
                     "Please rebuild the original workflow before resuming."
                 )
-            # Restore state
+            # Restore state. Clear first so import_state (which merges) does
+            # not leak stale keys from a prior run on this Workflow instance.
+            # This matters more now that Workflow.run() no longer wipes state
+            # per call - the only reset point for shared state on a reused
+            # instance is at restore time.
+            self._state.clear()
             self._state.import_state(checkpoint.state)
             # Restore executor states using the restored state
             await self._restore_executor_states()

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/_workflows/_workflow.py RENAMED Viewed

@@ -299,7 +299,7 @@ class Workflow(DictConvertible):
     async def _run_workflow_with_tracing(
         self,
         initial_executor_fn: Callable[[], Awaitable[None]] | None = None,
-        reset_context: bool = True,
+        is_continuation: bool = False,
         streaming: bool = False,
         function_invocation_kwargs: Mapping[str, Mapping[str, Any]] | Mapping[str, Any] | None = None,
         client_kwargs: Mapping[str, Mapping[str, Any]] | Mapping[str, Any] | None = None,
@@ -310,13 +310,19 @@ class Workflow(DictConvertible):
         of external callers to maintain context across different workflow runs.
         Args:
-            initial_executor_fn: Optional function to execute initial executor
-            reset_context: Whether to reset the context for a new run
-            streaming: Whether to enable streaming mode for agents
+            initial_executor_fn: Optional function to execute initial executor.
+            is_continuation: True when this run is a continuation of prior
+                work (a checkpoint restore or a responses-only replay) rather
+                than a fresh new turn delivered via the start executor with
+                ``message=...``. Continuations preserve per-run accounting
+                (iteration counter and run kwargs) from the prior turn;
+                fresh-message runs reset them. Shared workflow state is
+                preserved in both cases.
+            streaming: Whether to enable streaming mode for agents.
             function_invocation_kwargs: Optional kwargs to store in State for function
-                invocations in subagents
+                invocations in subagents.
             client_kwargs: Optional kwargs to store in State for chat client
-                invocations in subagents
+                invocations in subagents.
         Yields:
             WorkflowEvent: The events generated during the workflow execution.
@@ -345,16 +351,26 @@ class Workflow(DictConvertible):
                     in_progress = WorkflowEvent.status(WorkflowRunState.IN_PROGRESS)
                 yield in_progress  # noqa: RUF070
-                # Reset context for a new run if supported
-                if reset_context:
+                # Per-run reset for fresh-message runs only. We deliberately
+                # do NOT clear shared workflow state (`_state.clear()`) or the
+                # runner context's in-flight messages (`reset_for_new_run()`)
+                # here - state and pending work persist across `run()` calls
+                # so that a `WorkflowAgent` can deliver multi-turn input on
+                # the same instance and have prior turns' context survive.
+                # Iteration counting and per-run kwargs ARE per-run though,
+                # so they're reset here.
+                if not is_continuation:
                     self._runner.reset_iteration_count()
-                    self._runner.context.reset_for_new_run()
-                    self._state.clear()
                 # Store run kwargs in State so executors can access them.
-                # Only overwrite when new kwargs are explicitly provided or state was
-                # just cleared (fresh run). On continuation (reset_context=False) with
-                # no new kwargs, preserve the kwargs from the original run.
+                # Per-run kwargs semantics:
+                # - On a fresh message run, prior kwargs go away (set to {}
+                #   by default, or to the new kwargs if provided). This
+                #   prevents stale kwargs from a prior turn leaking into the
+                #   current turn.
+                # - On a continuation (checkpoint restore or responses), the
+                #   prior run's kwargs are preserved unless the caller
+                #   explicitly provides new kwargs.
                 if function_invocation_kwargs is not None or client_kwargs is not None:
                     combined_kwargs: dict[str, Any] = {}
                     if function_invocation_kwargs is not None:
@@ -366,11 +382,12 @@ class Workflow(DictConvertible):
                             client_kwargs, "client_kwargs"
                         )
                     self._state.set(WORKFLOW_RUN_KWARGS_KEY, combined_kwargs)
-                elif reset_context:
+                elif not is_continuation:
                     self._state.set(WORKFLOW_RUN_KWARGS_KEY, {})
                 self._state.commit()  # Commit immediately so kwargs are available
-                # Set streaming mode after reset
+                # Set streaming mode (always set explicitly per run since
+                # reset_for_new_run() no longer runs to clear it).
                 self._runner_context.set_streaming(streaming)
                 # Execute initial setup if provided
@@ -585,13 +602,31 @@ class Workflow(DictConvertible):
         if checkpoint_storage is not None:
             self._runner.context.set_runtime_checkpoint_storage(checkpoint_storage)
-        initial_executor_fn, reset_context = self._resolve_execution_mode(
-            message, responses, checkpoint_id, checkpoint_storage
-        )
+        # Async validation: a fresh-message run is only allowed when the
+        # runner context has fully drained from any prior run. If it still
+        # has in-flight executor messages, the prior run didn't complete -
+        # the caller must either resume from a checkpoint or wait for the
+        # prior run to drain. (Pending request_info events are intentionally
+        # NOT blocked here: a follow-up run with message=... is the normal
+        # way to deliver a response to those pending requests, e.g. via
+        # WorkflowAgent._process_pending_requests.)
+        # NOTE: _validate_run_params already enforces that ``message`` is
+        # mutually exclusive with both ``checkpoint_id`` and ``responses``,
+        # so we don't need to re-check those here.
+        if message is not None and await self._runner.context.has_messages():
+            raise RuntimeError(
+                "Cannot start a new run with 'message' while in-flight executor "
+                "messages remain from a prior run. Resume from a checkpoint "
+                "(checkpoint_id=...) or wait for the prior run to complete. "
+                "Workflows that need to recover from a mid-run failure must use "
+                "checkpointing; there is no in-process recovery path."
+            )
+        initial_executor_fn = self._resolve_execution_mode(message, responses, checkpoint_id, checkpoint_storage)
         async for event in self._run_workflow_with_tracing(
             initial_executor_fn=initial_executor_fn,
-            reset_context=reset_context,
+            is_continuation=(message is None),
             streaming=streaming,
             function_invocation_kwargs=function_invocation_kwargs,
             client_kwargs=client_kwargs,
@@ -674,12 +709,8 @@ class Workflow(DictConvertible):
         responses: Mapping[str, Any] | None,
         checkpoint_id: str | None,
         checkpoint_storage: CheckpointStorage | None,
-    ) -> tuple[Callable[[], Awaitable[None]], bool]:
-        """Determine the initial executor function and reset_context flag based on parameters.
-        Returns:
-            A tuple of (initial_executor_fn, reset_context).
-        """
+    ) -> Callable[[], Awaitable[None]]:
+        """Determine the initial executor function based on parameters."""
         if responses is not None:
             if checkpoint_id is not None:
                 # Combined: restore checkpoint then send responses
@@ -689,13 +720,9 @@ class Workflow(DictConvertible):
             else:
                 # Send responses only (requires pending requests in workflow state)
                 initial_executor_fn = functools.partial(self._send_responses_internal, responses)
-            return initial_executor_fn, False
+            return initial_executor_fn
         # Regular run or checkpoint restoration
-        initial_executor_fn = functools.partial(
-            self._execute_with_message_or_checkpoint, message, checkpoint_id, checkpoint_storage
-        )
-        reset_context = message is not None and checkpoint_id is None
-        return initial_executor_fn, reset_context
+        return functools.partial(self._execute_with_message_or_checkpoint, message, checkpoint_id, checkpoint_storage)
     async def _restore_and_send_responses(
         self,

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/_workflows/_workflow_executor.py RENAMED Viewed

@@ -361,7 +361,7 @@ class WorkflowExecutor(Executor):
         return any(is_instance_of(message.data, input_type) for input_type in self.workflow.input_types)
     @handler
-    async def process_workflow(self, input_data: object, ctx: WorkflowContext[Any]) -> None:
+    async def process_workflow(self, input_data: object, ctx: WorkflowContext[Any, Any]) -> None:
         """Execute the sub-workflow with raw input data.
         This handler starts a new sub-workflow execution. When the sub-workflow
@@ -428,7 +428,7 @@ class WorkflowExecutor(Executor):
     async def handle_message_wrapped_request_response(
         self,
         response: SubWorkflowResponseMessage,
-        ctx: WorkflowContext[Any],
+        ctx: WorkflowContext[Any, Any],
     ) -> None:
         """Handle response from parent for a forwarded request.

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/foundry/__init__.py RENAMED Viewed

@@ -4,6 +4,7 @@
 This module lazily re-exports objects from:
 - ``agent-framework-anthropic``
+- ``agent-framework-azure-contentunderstanding``
 - ``agent-framework-foundry``
 - ``agent-framework-foundry-local``
 """
@@ -12,7 +13,15 @@ import importlib
 from typing import Any
 _IMPORTS: dict[str, tuple[str, str]] = {
+    "AnalysisSection": ("agent_framework_azure_contentunderstanding", "agent-framework-azure-contentunderstanding"),
     "AnthropicFoundryClient": ("agent_framework_anthropic", "agent-framework-anthropic"),
+    "ContentUnderstandingContextProvider": (
+        "agent_framework_azure_contentunderstanding",
+        "agent-framework-azure-contentunderstanding",
+    ),
+    "DocumentStatus": ("agent_framework_azure_contentunderstanding", "agent-framework-azure-contentunderstanding"),
+    "FileSearchBackend": ("agent_framework_azure_contentunderstanding", "agent-framework-azure-contentunderstanding"),
+    "FileSearchConfig": ("agent_framework_azure_contentunderstanding", "agent-framework-azure-contentunderstanding"),
     "FoundryAgent": ("agent_framework_foundry", "agent-framework-foundry"),
     "FoundryAgentOptions": ("agent_framework_foundry", "agent-framework-foundry"),
     "FoundryChatClient": ("agent_framework_foundry", "agent-framework-foundry"),

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/foundry/__init__.pyi RENAMED Viewed

@@ -4,6 +4,13 @@
 # Install the relevant packages for full type support.
 from agent_framework_anthropic import AnthropicFoundryClient, RawAnthropicFoundryClient
+from agent_framework_azure_contentunderstanding import (  # pyright: ignore[reportMissingImports]
+    AnalysisSection,  # pyright: ignore[reportUnknownVariableType]
+    ContentUnderstandingContextProvider,  # pyright: ignore[reportUnknownVariableType]
+    DocumentStatus,  # pyright: ignore[reportUnknownVariableType]
+    FileSearchBackend,  # pyright: ignore[reportUnknownVariableType]
+    FileSearchConfig,  # pyright: ignore[reportUnknownVariableType]
+)
 from agent_framework_foundry import (
     FoundryAgent,
     FoundryChatClient,
@@ -31,7 +38,12 @@ from agent_framework_foundry_local import (
 )
 __all__ = [
+    "AnalysisSection",
     "AnthropicFoundryClient",
+    "ContentUnderstandingContextProvider",
+    "DocumentStatus",
+    "FileSearchBackend",
+    "FileSearchConfig",
     "FoundryAgent",
     "FoundryChatClient",
     "FoundryChatOptions",

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/agent_framework/observability.py RENAMED Viewed

@@ -26,6 +26,7 @@ from time import perf_counter, time_ns
 from typing import TYPE_CHECKING, Any, ClassVar, Final, Generic, Literal, TypedDict, cast, overload
 from dotenv import load_dotenv
+from opentelemetry import context as otel_context
 from opentelemetry import metrics, trace
 from . import __version__ as version_info
@@ -1277,27 +1278,8 @@ class ChatTelemetryLayer(Generic[OptionsCoT]):
         )
         if stream:
-            result_stream = cast(
-                ResponseStream[ChatResponseUpdate, ChatResponse[Any]],
-                super_get_response(
-                    messages=messages,
-                    stream=True,
-                    options=opts,
-                    compaction_strategy=compaction_strategy,
-                    tokenizer=tokenizer,
-                    function_invocation_kwargs=function_invocation_kwargs,
-                    client_kwargs=merged_client_kwargs,
-                ),
-            )
+            span = _start_streaming_span(attributes, OtelAttr.REQUEST_MODEL)
-            # Create span directly without trace.use_span() context attachment.
-            # Streaming spans are closed asynchronously in cleanup hooks, which run
-            # in a different async context than creation — using use_span() would
-            # cause "Failed to detach context" errors from OpenTelemetry.
-            operation = attributes.get(OtelAttr.OPERATION, "operation")
-            span_name = attributes.get(OtelAttr.REQUEST_MODEL, "unknown")
-            span = get_tracer().start_span(f"{operation} {span_name}")
-            span.set_attributes(attributes)
             if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages:
                 _capture_messages(
                     span=span,
@@ -1319,6 +1301,24 @@ class ChatTelemetryLayer(Generic[OptionsCoT]):
             def _record_duration() -> None:
                 duration_state["duration"] = perf_counter() - start_time
+            try:
+                result_stream = cast(
+                    ResponseStream[ChatResponseUpdate, ChatResponse[Any]],
+                    super_get_response(
+                        messages=messages,
+                        stream=True,
+                        options=opts,
+                        compaction_strategy=compaction_strategy,
+                        tokenizer=tokenizer,
+                        function_invocation_kwargs=function_invocation_kwargs,
+                        client_kwargs=merged_client_kwargs,
+                    ),
+                )
+            except Exception as exception:
+                capture_exception(span=span, exception=exception, timestamp=time_ns())
+                _close_span()
+                raise
             async def _finalize_stream() -> None:
                 from ._types import ChatResponse
@@ -1357,11 +1357,18 @@ class ChatTelemetryLayer(Generic[OptionsCoT]):
                 finally:
                     _close_span()
-            # Register a weak reference callback to close the span if stream is garbage collected
-            # without being consumed. This ensures spans don't leak if users don't consume streams.
-            wrapped_stream: ResponseStream[ChatResponseUpdate, ChatResponse[Any]] = result_stream.with_cleanup_hook(
-                _record_duration
-            ).with_cleanup_hook(_finalize_stream)
+            # The pull context manager attaches the span around each underlying iterator pull so
+            # that child spans created during the pull (e.g. HTTP requests, inner tool execution)
+            # are parented under this chat span. Attach and detach happen in the same async
+            # context as the pull, avoiding cross-context cleanup issues. The weakref finalizer
+            # ensures the span is closed even if the stream is garbage collected without being
+            # consumed.
+            wrapped_stream: ResponseStream[ChatResponseUpdate, ChatResponse[Any]] = (
+                result_stream
+                .with_cleanup_hook(_record_duration)
+                .with_cleanup_hook(_finalize_stream)
+                .with_pull_context_manager(lambda: _activate_span(span))
+            )
             weakref.finalize(wrapped_stream, _close_span)
             return wrapped_stream
@@ -1543,23 +1550,8 @@ class AgentTelemetryLayer:
         inner_accumulated_usage_token = INNER_ACCUMULATED_USAGE.set({})
         if stream:
-            try:
-                run_result: object = execute()
-                if isinstance(run_result, ResponseStream):
-                    result_stream: ResponseStream[AgentResponseUpdate, AgentResponse[Any]] = run_result  # pyright: ignore[reportUnknownVariableType]
-                elif isinstance(run_result, Awaitable):
-                    result_stream = ResponseStream.from_awaitable(run_result)  # type: ignore[arg-type]  # pyright: ignore[reportArgumentType]
-                else:
-                    raise RuntimeError("Streaming telemetry requires a ResponseStream result.")
-            except Exception:
-                INNER_RESPONSE_TELEMETRY_CAPTURED_FIELDS.reset(inner_response_telemetry_captured_fields_token)
-                INNER_ACCUMULATED_USAGE.reset(inner_accumulated_usage_token)
-                raise
+            span = _start_streaming_span(attributes, OtelAttr.AGENT_NAME)
-            operation = attributes.get(OtelAttr.OPERATION, "operation")
-            span_name = attributes.get(OtelAttr.AGENT_NAME, "unknown")
-            span = get_tracer().start_span(f"{operation} {span_name}")
-            span.set_attributes(attributes)
             if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED and messages:
                 _capture_messages(
                     span=span,
@@ -1581,6 +1573,21 @@ class AgentTelemetryLayer:
             def _record_duration() -> None:
                 duration_state["duration"] = perf_counter() - start_time
+            try:
+                run_result: object = execute()
+                if isinstance(run_result, ResponseStream):
+                    result_stream: ResponseStream[AgentResponseUpdate, AgentResponse[Any]] = run_result  # pyright: ignore[reportUnknownVariableType]
+                elif isinstance(run_result, Awaitable):
+                    result_stream = ResponseStream.from_awaitable(run_result)  # type: ignore[arg-type]  # pyright: ignore[reportArgumentType]
+                else:
+                    raise RuntimeError("Streaming telemetry requires a ResponseStream result.")
+            except Exception as exception:
+                capture_exception(span=span, exception=exception, timestamp=time_ns())
+                INNER_RESPONSE_TELEMETRY_CAPTURED_FIELDS.reset(inner_response_telemetry_captured_fields_token)
+                INNER_ACCUMULATED_USAGE.reset(inner_accumulated_usage_token)
+                _close_span()
+                raise
             async def _finalize_stream() -> None:
                 from ._types import AgentResponse
@@ -1620,9 +1627,18 @@ class AgentTelemetryLayer:
                     INNER_ACCUMULATED_USAGE.reset(inner_accumulated_usage_token)
                     _close_span()
-            wrapped_stream: ResponseStream[AgentResponseUpdate, AgentResponse[Any]] = result_stream.with_cleanup_hook(
-                _record_duration
-            ).with_cleanup_hook(_finalize_stream)
+            # The pull context manager attaches the span around each underlying iterator pull so
+            # that child spans created during the pull (e.g. inner chat completion spans from the
+            # underlying ChatTelemetryLayer) are parented under this agent invoke span. Attach and
+            # detach happen in the same async context as the pull, avoiding cross-context cleanup
+            # issues. The weakref finalizer ensures the span is closed even if the stream is
+            # garbage collected without being consumed.
+            wrapped_stream: ResponseStream[AgentResponseUpdate, AgentResponse[Any]] = (
+                result_stream
+                .with_cleanup_hook(_record_duration)
+                .with_cleanup_hook(_finalize_stream)
+                .with_pull_context_manager(lambda: _activate_span(span))
+            )
             weakref.finalize(wrapped_stream, _close_span)
             return wrapped_stream
@@ -1809,6 +1825,27 @@ def get_function_span(
     )
+@contextlib.contextmanager
+def _activate_span(span: trace.Span) -> Generator[None]:
+    """Attach ``span`` as the current span in the OpenTelemetry context.
+    Designed to be used as a per-pull context manager registered on a
+    ``ResponseStream`` via ``with_pull_context_manager``: it attaches the span
+    before each underlying iterator pull and detaches immediately after, so
+    child spans created during the pull (HTTP clients, inner chat completions,
+    tool execution) are correctly parented under ``span``.
+    Because attach and detach happen within the same ``__anext__`` invocation
+    (and therefore the same async task / contextvars context), there is no risk
+    of "Failed to detach context" warnings from cross-context cleanup.
+    """
+    token = otel_context.attach(trace.set_span_in_context(span))
+    try:
+        yield
+    finally:
+        otel_context.detach(token)
 @contextlib.contextmanager
 def _get_span(
     attributes: dict[str, Any],
@@ -1831,6 +1868,29 @@ def _get_span(
         yield current_span
+def _start_streaming_span(attributes: dict[str, Any], span_name_attribute: str) -> trace.Span:
+    """Start a non-current span for a streaming operation.
+    Unlike :func:`_get_span`, the returned span is not attached to the current
+    OpenTelemetry context. The caller is responsible for:
+    - Ending the span via cleanup hooks on the wrapped
+      :class:`~agent_framework._types.ResponseStream`.
+    - Activating the span around each iterator pull via
+      :func:`_activate_span` registered with ``with_pull_context_manager`` so
+      that child spans created during stream production inherit it as parent.
+    Streaming spans are closed asynchronously in cleanup hooks that run in a
+    different async context than creation, so attaching the span at creation
+    time would cause "Failed to detach context" errors from OpenTelemetry.
+    """
+    operation = attributes.get(OtelAttr.OPERATION, "operation")
+    span_name = attributes.get(span_name_attribute, "unknown")
+    span = get_tracer().start_span(f"{operation} {span_name}")
+    span.set_attributes(attributes)
+    return span
 def _get_instructions_from_options(options: Any) -> str | list[str] | None:
     """Extract instructions from options dict."""
     if options is None:

{agent_framework_core-1.2.1 → agent_framework_core-1.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ description = "Microsoft Agent Framework for building AI Agents with Python. Thi
 authors = [{ name = "Microsoft", email = "af-support@microsoft.com"}]
 readme = "README.md"
 requires-python = ">=3.10"
-version = "1.2.1"
+version = "1.2.2"
 license-files = ["LICENSE"]
 urls.homepage = "https://aka.ms/agent-framework"
 urls.source = "https://github.com/microsoft/agent-framework/tree/main/python"