PyPI - agent-framework-core - Versions diffs - 1.7.0__tar.gz → 1.8.0__tar.gz - Mend

agent-framework-core 1.7.0tar.gz → 1.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agent-framework-core
-Version: 1.7.0
+Version: 1.8.0
 Summary: Microsoft Agent Framework for building AI Agents with Python. This is the core package that has all the core abstractions and implementations.
 Author-email: Microsoft <af-support@microsoft.com>
 Requires-Python: >=3.10

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/__init__.py RENAMED Viewed

@@ -71,6 +71,7 @@ from ._evaluation import (
     Evaluator,
     ExpectedToolCall,
     LocalEvaluator,
+    RubricScore,
     evaluate_agent,
     evaluate_workflow,
     evaluator,
@@ -90,6 +91,16 @@ from ._harness._background_agents import (
     BackgroundTaskInfo,
     BackgroundTaskStatus,
 )
+from ._harness._file_access import (
+    DEFAULT_FILE_ACCESS_INSTRUCTIONS,
+    DEFAULT_FILE_ACCESS_SOURCE_ID,
+    AgentFileStore,
+    FileAccessProvider,
+    FileSearchMatch,
+    FileSearchResult,
+    FileSystemAgentFileStore,
+    InMemoryAgentFileStore,
+)
 from ._harness._memory import (
     DEFAULT_MEMORY_SOURCE_ID,
     MemoryContextProvider,
@@ -157,6 +168,9 @@ from ._skills import (
     InlineSkillResource,
     InlineSkillScript,
     InMemorySkillsSource,
+    MCPSkill,
+    MCPSkillResource,
+    MCPSkillsSource,
     Skill,
     SkillFrontmatter,
     SkillResource,
@@ -309,6 +323,8 @@ __all__ = [
     "APP_INFO",
     "COMPACTION_STATE_KEY",
     "DEFAULT_BACKGROUND_AGENTS_SOURCE_ID",
+    "DEFAULT_FILE_ACCESS_INSTRUCTIONS",
+    "DEFAULT_FILE_ACCESS_SOURCE_ID",
     "DEFAULT_HARNESS_INSTRUCTIONS",
     "DEFAULT_MAX_ITERATIONS",
     "DEFAULT_MEMORY_SOURCE_ID",
@@ -334,6 +350,7 @@ __all__ = [
     "AgentExecutor",
     "AgentExecutorRequest",
     "AgentExecutorResponse",
+    "AgentFileStore",
     "AgentFrameworkException",
     "AgentMiddleware",
     "AgentMiddlewareLayer",
@@ -393,11 +410,15 @@ __all__ = [
     "ExperimentalFeature",
     "FanInEdgeGroup",
     "FanOutEdgeGroup",
+    "FileAccessProvider",
     "FileCheckpointStorage",
     "FileHistoryProvider",
+    "FileSearchMatch",
+    "FileSearchResult",
     "FileSkill",
     "FileSkillScript",
     "FileSkillsSource",
+    "FileSystemAgentFileStore",
     "FilteringSkillsSource",
     "FinalT",
     "FinishReason",
@@ -414,6 +435,7 @@ __all__ = [
     "GeneratedEmbeddings",
     "GraphConnectivityError",
     "HistoryProvider",
+    "InMemoryAgentFileStore",
     "InMemoryCheckpointStorage",
     "InMemoryHistoryProvider",
     "InMemorySkillsSource",
@@ -425,6 +447,9 @@ __all__ = [
     "MCPStdioTool",
     "MCPStreamableHTTPTool",
     "MCPWebsocketTool",
+    "MCPSkill",
+    "MCPSkillResource",
+    "MCPSkillsSource",
     "MemoryContextProvider",
     "MemoryFileStore",
     "MemoryIndexEntry",
@@ -442,6 +467,7 @@ __all__ = [
     "ResponseStream",
     "Role",
     "RoleLiteral",
+    "RubricScore",
     "RunContext",
     "Runner",
     "RunnerContext",

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_clients.py RENAMED Viewed

@@ -380,8 +380,15 @@ class BaseChatClient(SerializationMixin, ABC, Generic[OptionsCoT]):
             return prepared_messages
         from ._compaction import apply_compaction
+        # Compact the caller's list in place when possible. A compaction operation has
+        # two halves: exclusion flags (mutated on shared Message objects) and inserted
+        # summary messages. Operating on the original list keeps both halves on the list
+        # the function-invocation tool loop reuses across iterations; otherwise inserted
+        # summaries would be lost on a throwaway copy while exclusions persisted, silently
+        # dropping older groups (issue #4991).
+        working_messages = messages if isinstance(messages, list) else prepared_messages
         return await apply_compaction(
-            prepared_messages,
+            working_messages,
             strategy=compaction_strategy,
             tokenizer=tokenizer,
         )

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_compaction.py RENAMED Viewed

@@ -4,7 +4,7 @@ from __future__ import annotations
 import json
 import logging
-from collections.abc import Mapping, Sequence
+from collections.abc import Iterable, Mapping, Sequence
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -92,10 +92,23 @@ def _is_reasoning_only_assistant(message: Message) -> bool:
     return all(content.type == "text_reasoning" for content in message.contents)
-def _ensure_message_ids(messages: list[Message]) -> None:
+def _ensure_message_ids(
+    messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
+) -> None:
+    existing_ids: set[str] = set(reserved_ids) if reserved_ids is not None else set()
+    existing_ids.update(message.message_id for message in messages if message.message_id)
     for index, message in enumerate(messages):
-        if not message.message_id:
-            message.message_id = f"msg_{index}"
+        if message.message_id:
+            continue
+        candidate = f"msg_{id_offset + index}"
+        if candidate in existing_ids:
+            counter = id_offset + len(messages)
+            candidate = f"msg_{counter}"
+            while candidate in existing_ids:
+                counter += 1
+                candidate = f"msg_{counter}"
+        message.message_id = candidate
+        existing_ids.add(candidate)
 def _group_id_for(message: Message, group_index: int) -> str:
@@ -104,14 +117,27 @@ def _group_id_for(message: Message, group_index: int) -> str:
     return f"group_index_{group_index}"
-def group_messages(messages: list[Message]) -> list[dict[str, Any]]:
+def group_messages(
+    messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
+) -> list[dict[str, Any]]:
     """Compute group spans and metadata for annotation.
+    Args:
+        messages: The messages (or a slice of them) to group.
+    Keyword Args:
+        id_offset: Absolute starting index used when auto-assigning ``message_id``
+            values, so incremental annotation of a list slice produces ids that
+            stay unique across the full list.
+        reserved_ids: Message ids that already exist outside ``messages`` (for
+            example in a preserved prefix). Auto-assigned ids are guaranteed not
+            to collide with these, preventing duplicate ids across the full list.
     Returns:
         Ordered list of lightweight span dicts with keys:
         ``group_id``, ``kind``, ``start_index``, ``end_index``, ``has_reasoning``.
     """
-    _ensure_message_ids(messages)
+    _ensure_message_ids(messages, id_offset=id_offset, reserved_ids=reserved_ids)
     spans: list[dict[str, Any]] = []
     i = 0
     group_index = 0
@@ -439,7 +465,8 @@ def annotate_message_groups(
         if previous_group_index is not None:
             group_index_offset = previous_group_index + 1
-    spans = group_messages(messages[start_index:])
+    reserved_ids = {message.message_id for message in messages[:start_index] if message.message_id}
+    spans = group_messages(messages[start_index:], id_offset=start_index, reserved_ids=reserved_ids)
     for span_index, span in enumerate(spans):
         group_id = str(span["group_id"])
         kind = _coerce_group_kind(span["kind"])

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_evaluation.py RENAMED Viewed

@@ -311,12 +311,15 @@ class EvalScoreResult:
         score: Numeric score from the evaluator.
         passed: Whether the item passed this evaluator's threshold.
         sample: Optional raw evaluator output (rationale, metadata).
+        dimensions: Per-dimension scores when this evaluator is a rubric
+            evaluator.  ``None`` for non-rubric (e.g. built-in) evaluators.
     """
     name: str
     score: float
     passed: bool | None = None
     sample: dict[str, Any] | None = None
+    dimensions: list[RubricScore] | None = None
 @experimental(feature_id=ExperimentalFeature.EVALS)
@@ -496,6 +499,179 @@ class EvalResults:
                     detail += f" Errored items: {', '.join(summaries)}."
             raise EvalNotPassedError(detail)
+    def assert_score_at_least(
+        self,
+        min_score: float,
+        *,
+        evaluator: str | None = None,
+        msg: str | None = None,
+    ) -> None:
+        """Assert every item's score (optionally filtered by evaluator) is ``>= min_score``.
+        Designed for CI gates on generated rubric evaluators (e.g.
+        ``results.assert_score_at_least(0.80)``).  Includes any
+        sub-results from workflow evaluations.
+        Args:
+            min_score: Minimum acceptable score (inclusive).
+            evaluator: When set, only check scores from the evaluator
+                whose ``EvalScoreResult.name`` matches.
+            msg: Optional custom failure message.
+        Raises:
+            EvalNotPassedError: When any matching score is below the threshold.
+        """
+        offenders: list[str] = []
+        def _check(results: EvalResults) -> None:
+            for item in results.items:
+                for score in item.scores:
+                    if evaluator is not None and score.name != evaluator:
+                        continue
+                    if score.score < min_score:
+                        offenders.append(f"{item.item_id}/{score.name}={score.score:.3f}")
+            for sub in results.sub_results.values():
+                _check(sub)
+        _check(self)
+        if offenders:
+            detail = msg or (
+                f"{len(offenders)} score(s) below threshold {min_score}"
+                f"{' for ' + evaluator if evaluator else ''}: {', '.join(offenders[:5])}"
+                + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
+            )
+            raise EvalNotPassedError(detail)
+    def assert_dimension_score_at_least(
+        self,
+        dimension_id: str,
+        min_score: float,
+        *,
+        evaluator: str | None = None,
+        require_applicable: bool = False,
+        msg: str | None = None,
+    ) -> None:
+        """Assert every item's score for a rubric *dimension* is ``>= min_score``.
+        Walks ``EvalScoreResult.dimensions`` looking for the named
+        dimension across all items (and sub-results).  Non-applicable
+        dimensions are skipped by default; pass
+        ``require_applicable=True`` to fail when no applicable score is
+        produced.
+        Args:
+            dimension_id: Dimension id (matches the rubric definition).
+            min_score: Minimum acceptable dimension score (inclusive).
+            evaluator: When set, only consider scores from the evaluator
+                whose ``EvalScoreResult.name`` matches.
+            require_applicable: When ``True``, missing or non-applicable
+                dimension scores raise.  Defaults to ``False`` (skip).
+            msg: Optional custom failure message.
+        Raises:
+            EvalNotPassedError: When the dimension fails the threshold.
+        """
+        offenders: list[str] = []
+        missing_items: list[str] = []
+        def _check(results: EvalResults) -> None:
+            for item in results.items:
+                found_applicable = False
+                for score in item.scores:
+                    if evaluator is not None and score.name != evaluator:
+                        continue
+                    if not score.dimensions:
+                        continue
+                    for rs in score.dimensions:
+                        if rs.id != dimension_id:
+                            continue
+                        if not rs.applicable:
+                            continue
+                        found_applicable = True
+                        if rs.score is None or rs.score < min_score:
+                            offenders.append(
+                                f"{item.item_id}/{score.name}/{dimension_id}="
+                                f"{rs.score if rs.score is not None else 'None'}"
+                            )
+                if require_applicable and not found_applicable:
+                    missing_items.append(item.item_id)
+            for sub in results.sub_results.values():
+                _check(sub)
+        _check(self)
+        problems: list[str] = []
+        if offenders:
+            problems.append(
+                f"{len(offenders)} dimension score(s) for '{dimension_id}' below {min_score}: "
+                f"{', '.join(offenders[:5])}" + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
+            )
+        if missing_items:
+            problems.append(
+                f"Dimension '{dimension_id}' not applicable on {len(missing_items)} item(s): "
+                f"{', '.join(missing_items[:5])}"
+            )
+        if problems:
+            raise EvalNotPassedError(msg or "; ".join(problems))
+    def assert_no_failed_items(self, msg: str | None = None) -> None:
+        """Assert no item ended in ``fail`` or ``error`` status.
+        Includes any sub-results from workflow evaluations.
+        Args:
+            msg: Optional custom failure message.
+        Raises:
+            EvalNotPassedError: When any item failed or errored.
+        """
+        bad: list[str] = []
+        def _check(results: EvalResults) -> None:
+            for item in results.items:
+                if item.is_failed or item.is_error:
+                    bad.append(f"{item.item_id}:{item.status}")
+            for sub in results.sub_results.values():
+                _check(sub)
+        _check(self)
+        if bad:
+            detail = msg or (
+                f"{len(bad)} item(s) failed or errored: {', '.join(bad[:5])}"
+                + (f" (+{len(bad) - 5} more)" if len(bad) > 5 else "")
+            )
+            raise EvalNotPassedError(detail)
+# endregion
+# region Generated rubric evaluators
+@experimental(feature_id=ExperimentalFeature.EVALS)
+@dataclass(frozen=True)
+class RubricScore:
+    """A single dimension's score from a rubric-based evaluator run.
+    Rubric evaluators emit one ``RubricScore`` per dimension per item.
+    Attached to :class:`EvalScoreResult` as a typed view of the raw
+    ``properties.rubric_scores`` payload returned by providers such as
+    Foundry's generated rubric evaluators.
+    Attributes:
+        id: Dimension id (matches the rubric definition).
+        score: Numeric score, or ``None`` when the dimension was marked
+            non-applicable for this item.
+        applicable: Whether the dimension applied to this item.
+        weight: Dimension weight (mirrors the rubric definition).
+        reason: Short rationale produced by the evaluator.
+    """
+    id: str
+    score: int | None
+    applicable: bool
+    weight: int
+    reason: str
 # endregion

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_feature_stage.py RENAMED Viewed

@@ -50,6 +50,7 @@ class ExperimentalFeature(str, Enum):
     on enum membership or attribute presence over time.
     """
+    DECLARATIVE_AGENTS = "DECLARATIVE_AGENTS"
     EVALS = "EVALS"
     FILE_HISTORY = "FILE_HISTORY"
     FIDES = "FIDES"
@@ -57,6 +58,8 @@ class ExperimentalFeature(str, Enum):
     FOUNDRY_PREVIEW_TOOLS = "FOUNDRY_PREVIEW_TOOLS"
     FUNCTIONAL_WORKFLOWS = "FUNCTIONAL_WORKFLOWS"
     HARNESS = "HARNESS"
+    MCP_SKILLS = "MCP_SKILLS"
+    PROGRESSIVE_TOOLS = "PROGRESSIVE_TOOLS"
     SKILLS = "SKILLS"
     TO_PROMPT_AGENT = "TO_PROMPT_AGENT"

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_agent.py RENAMED Viewed

@@ -14,12 +14,13 @@ import logging
 from collections.abc import Callable, Sequence
 from typing import TYPE_CHECKING, Any
-from .._agents import Agent
+from .._agents import Agent, SupportsAgentRun
 from .._clients import SupportsWebSearchTool
 from .._compaction import CompactionProvider, ContextWindowCompactionStrategy, ToolResultCompactionStrategy
 from .._feature_stage import ExperimentalFeature, experimental
 from .._sessions import ContextProvider, HistoryProvider, InMemoryHistoryProvider
 from .._skills import SkillsProvider
+from ._background_agents import BackgroundAgentsProvider
 from ._memory import MemoryContextProvider, MemoryStore
 from ._mode import AgentModeProvider
 from ._todo import TodoProvider
@@ -103,6 +104,8 @@ def _assemble_context_providers(
     memory_store: MemoryStore | None,
     skills_provider: SkillsProvider | None,
     skills_paths: Sequence[str] | None,
+    background_agents: Sequence[SupportsAgentRun] | None,
+    background_agents_instructions: str | None,
     extra_context_providers: Sequence[ContextProvider] | None,
 ) -> list[ContextProvider]:
     """Assemble the ordered list of context providers."""
@@ -130,6 +133,10 @@ def _assemble_context_providers(
     if skills_paths:
         providers.append(SkillsProvider.from_paths(*skills_paths))
+    # Background agents are opt-in: only added when agents are provided.
+    if background_agents:
+        providers.append(BackgroundAgentsProvider(background_agents, instructions=background_agents_instructions))
     # Append any user-supplied additional providers.
     if extra_context_providers:
         providers.extend(extra_context_providers)
@@ -165,6 +172,8 @@ def create_harness_agent(
     memory_store: MemoryStore | None = None,
     skills_provider: SkillsProvider | None = None,
     skills_paths: Sequence[str] | None = None,
+    background_agents: Sequence[SupportsAgentRun] | None = None,
+    background_agents_instructions: str | None = None,
     disable_web_search: bool = False,
     otel_provider_name: str | None = None,
     context_providers: Sequence[ContextProvider] | None = None,
@@ -182,6 +191,7 @@ def create_harness_agent(
     - **AgentModeProvider** — plan/execute mode tracking
     - **MemoryContextProvider** — file-based durable memory (when ``memory_store`` provided)
     - **SkillsProvider** — skill discovery and progressive loading
+    - **BackgroundAgentsProvider** — delegate work to background sub-agents
     - **OpenTelemetry** — observability via ``AgentTelemetryLayer``
     Each feature can be disabled or customized via keyword arguments.
@@ -253,6 +263,13 @@ def create_harness_agent(
         skills_paths: Paths for file-based skill discovery (looks for SKILL.md files).
             Can be combined with ``skills_provider``. When neither ``skills_provider``
             nor ``skills_paths`` is provided, no SkillsProvider is added.
+        background_agents: Collection of agents available for background task delegation.
+            When provided, a ``BackgroundAgentsProvider`` is automatically included,
+            enabling the agent to start, monitor, and retrieve results from background tasks.
+            Each agent must have a non-empty, unique name (case-insensitive).
+        background_agents_instructions: Optional instruction override for the
+            ``BackgroundAgentsProvider``. May include ``{background_agents}`` placeholder
+            which will be replaced with the agent listing.
         disable_web_search: When True, skip automatic web search tool inclusion.
             When False (default), the web search tool is automatically added if the
             client implements SupportsWebSearchTool. A warning is logged if the client
@@ -302,6 +319,8 @@ def create_harness_agent(
         memory_store=memory_store,
         skills_provider=skills_provider,
         skills_paths=skills_paths,
+        background_agents=background_agents,
+        background_agents_instructions=background_agents_instructions,
         extra_context_providers=context_providers,
     )

{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_background_agents.py RENAMED Viewed

@@ -349,6 +349,8 @@ class BackgroundAgentsProvider(ContextProvider):
             _save_provider_state(session, provider_state, source_id=source_id)
             return f"Background task {task_id} started on agent '{agent_name}'."
+        background_agents_start_task._invoke_sync_on_event_loop = True  # pyright: ignore[reportPrivateUsage]
         @tool(name="background_agents_wait_for_first_completion", approval_mode="never_require")
         async def background_agents_wait_for_first_completion(task_ids: list[int]) -> str:
             """Block until the first of the specified background tasks completes. Returns the completed task's ID."""
@@ -471,6 +473,8 @@ class BackgroundAgentsProvider(ContextProvider):
             _save_provider_state(session, provider_state, source_id=source_id)
             return f"Task {task_id} continued with new input."
+        background_agents_continue_task._invoke_sync_on_event_loop = True  # pyright: ignore[reportPrivateUsage]
         @tool(name="background_agents_clear_completed_task", approval_mode="never_require")
         def background_agents_clear_completed_task(task_id: int) -> str:
             """Remove a completed or failed task and release its session to free memory."""

agent-framework-core 1.7.0__tar.gz → 1.8.0__tar.gz

agent-framework-core 1.7.0tar.gz → 1.8.0tar.gz