PyPI - agent-framework-core - Versions diffs - 1.6.0__tar.gz → 1.8.0__tar.gz - Mend

agent-framework-core 1.6.0tar.gz → 1.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

{agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agent-framework-core
-Version: 1.6.0
+Version: 1.8.0
 Summary: Microsoft Agent Framework for building AI Agents with Python. This is the core package that has all the core abstractions and implementations.
 Author-email: Microsoft <af-support@microsoft.com>
 Requires-Python: >=3.10

{agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/__init__.py RENAMED Viewed

@@ -45,6 +45,7 @@ from ._compaction import (
     CharacterEstimatorTokenizer,
     CompactionProvider,
     CompactionStrategy,
+    ContextWindowCompactionStrategy,
     SelectiveToolCallCompactionStrategy,
     SlidingWindowStrategy,
     SummarizationStrategy,
@@ -70,6 +71,7 @@ from ._evaluation import (
     Evaluator,
     ExpectedToolCall,
     LocalEvaluator,
+    RubricScore,
     evaluate_agent,
     evaluate_workflow,
     evaluator,
@@ -79,6 +81,26 @@ from ._evaluation import (
     tool_calls_present,
 )
 from ._feature_stage import ExperimentalFeature, ReleaseCandidateFeature
+from ._harness._agent import (
+    DEFAULT_HARNESS_INSTRUCTIONS,
+    create_harness_agent,
+)
+from ._harness._background_agents import (
+    DEFAULT_BACKGROUND_AGENTS_SOURCE_ID,
+    BackgroundAgentsProvider,
+    BackgroundTaskInfo,
+    BackgroundTaskStatus,
+)
+from ._harness._file_access import (
+    DEFAULT_FILE_ACCESS_INSTRUCTIONS,
+    DEFAULT_FILE_ACCESS_SOURCE_ID,
+    AgentFileStore,
+    FileAccessProvider,
+    FileSearchMatch,
+    FileSearchResult,
+    FileSystemAgentFileStore,
+    InMemoryAgentFileStore,
+)
 from ._harness._memory import (
     DEFAULT_MEMORY_SOURCE_ID,
     MemoryContextProvider,
@@ -146,6 +168,9 @@ from ._skills import (
     InlineSkillResource,
     InlineSkillScript,
     InMemorySkillsSource,
+    MCPSkill,
+    MCPSkillResource,
+    MCPSkillsSource,
     Skill,
     SkillFrontmatter,
     SkillResource,
@@ -297,6 +322,10 @@ __all__ = [
     "AGENT_FRAMEWORK_USER_AGENT",
     "APP_INFO",
     "COMPACTION_STATE_KEY",
+    "DEFAULT_BACKGROUND_AGENTS_SOURCE_ID",
+    "DEFAULT_FILE_ACCESS_INSTRUCTIONS",
+    "DEFAULT_FILE_ACCESS_SOURCE_ID",
+    "DEFAULT_HARNESS_INSTRUCTIONS",
     "DEFAULT_MAX_ITERATIONS",
     "DEFAULT_MEMORY_SOURCE_ID",
     "DEFAULT_MODE_SOURCE_ID",
@@ -321,6 +350,7 @@ __all__ = [
     "AgentExecutor",
     "AgentExecutorRequest",
     "AgentExecutorResponse",
+    "AgentFileStore",
     "AgentFrameworkException",
     "AgentMiddleware",
     "AgentMiddlewareLayer",
@@ -332,6 +362,9 @@ __all__ = [
     "AgentSession",
     "AggregatingSkillsSource",
     "Annotation",
+    "BackgroundAgentsProvider",
+    "BackgroundTaskInfo",
+    "BackgroundTaskStatus",
     "BaseAgent",
     "BaseChatClient",
     "BaseEmbeddingClient",
@@ -352,6 +385,7 @@ __all__ = [
     "CompactionStrategy",
     "Content",
     "ContextProvider",
+    "ContextWindowCompactionStrategy",
     "ContinuationToken",
     "ConversationSplit",
     "ConversationSplitter",
@@ -376,11 +410,15 @@ __all__ = [
     "ExperimentalFeature",
     "FanInEdgeGroup",
     "FanOutEdgeGroup",
+    "FileAccessProvider",
     "FileCheckpointStorage",
     "FileHistoryProvider",
+    "FileSearchMatch",
+    "FileSearchResult",
     "FileSkill",
     "FileSkillScript",
     "FileSkillsSource",
+    "FileSystemAgentFileStore",
     "FilteringSkillsSource",
     "FinalT",
     "FinishReason",
@@ -397,6 +435,7 @@ __all__ = [
     "GeneratedEmbeddings",
     "GraphConnectivityError",
     "HistoryProvider",
+    "InMemoryAgentFileStore",
     "InMemoryCheckpointStorage",
     "InMemoryHistoryProvider",
     "InMemorySkillsSource",
@@ -408,6 +447,9 @@ __all__ = [
     "MCPStdioTool",
     "MCPStreamableHTTPTool",
     "MCPWebsocketTool",
+    "MCPSkill",
+    "MCPSkillResource",
+    "MCPSkillsSource",
     "MemoryContextProvider",
     "MemoryFileStore",
     "MemoryIndexEntry",
@@ -425,6 +467,7 @@ __all__ = [
     "ResponseStream",
     "Role",
     "RoleLiteral",
+    "RubricScore",
     "RunContext",
     "Runner",
     "RunnerContext",
@@ -499,6 +542,7 @@ __all__ = [
     "apply_compaction",
     "chat_middleware",
     "create_edge_runner",
+    "create_harness_agent",
     "detect_media_type_from_base64",
     "evaluate_agent",
     "evaluate_workflow",

{agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_clients.py RENAMED Viewed

@@ -380,8 +380,15 @@ class BaseChatClient(SerializationMixin, ABC, Generic[OptionsCoT]):
             return prepared_messages
         from ._compaction import apply_compaction
+        # Compact the caller's list in place when possible. A compaction operation has
+        # two halves: exclusion flags (mutated on shared Message objects) and inserted
+        # summary messages. Operating on the original list keeps both halves on the list
+        # the function-invocation tool loop reuses across iterations; otherwise inserted
+        # summaries would be lost on a throwaway copy while exclusions persisted, silently
+        # dropping older groups (issue #4991).
+        working_messages = messages if isinstance(messages, list) else prepared_messages
         return await apply_compaction(
-            prepared_messages,
+            working_messages,
             strategy=compaction_strategy,
             tokenizer=tokenizer,
         )

{agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_compaction.py RENAMED Viewed

@@ -4,7 +4,7 @@ from __future__ import annotations
 import json
 import logging
-from collections.abc import Mapping, Sequence
+from collections.abc import Iterable, Mapping, Sequence
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -92,10 +92,23 @@ def _is_reasoning_only_assistant(message: Message) -> bool:
     return all(content.type == "text_reasoning" for content in message.contents)
-def _ensure_message_ids(messages: list[Message]) -> None:
+def _ensure_message_ids(
+    messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
+) -> None:
+    existing_ids: set[str] = set(reserved_ids) if reserved_ids is not None else set()
+    existing_ids.update(message.message_id for message in messages if message.message_id)
     for index, message in enumerate(messages):
-        if not message.message_id:
-            message.message_id = f"msg_{index}"
+        if message.message_id:
+            continue
+        candidate = f"msg_{id_offset + index}"
+        if candidate in existing_ids:
+            counter = id_offset + len(messages)
+            candidate = f"msg_{counter}"
+            while candidate in existing_ids:
+                counter += 1
+                candidate = f"msg_{counter}"
+        message.message_id = candidate
+        existing_ids.add(candidate)
 def _group_id_for(message: Message, group_index: int) -> str:
@@ -104,14 +117,27 @@ def _group_id_for(message: Message, group_index: int) -> str:
     return f"group_index_{group_index}"
-def group_messages(messages: list[Message]) -> list[dict[str, Any]]:
+def group_messages(
+    messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
+) -> list[dict[str, Any]]:
     """Compute group spans and metadata for annotation.
+    Args:
+        messages: The messages (or a slice of them) to group.
+    Keyword Args:
+        id_offset: Absolute starting index used when auto-assigning ``message_id``
+            values, so incremental annotation of a list slice produces ids that
+            stay unique across the full list.
+        reserved_ids: Message ids that already exist outside ``messages`` (for
+            example in a preserved prefix). Auto-assigned ids are guaranteed not
+            to collide with these, preventing duplicate ids across the full list.
     Returns:
         Ordered list of lightweight span dicts with keys:
         ``group_id``, ``kind``, ``start_index``, ``end_index``, ``has_reasoning``.
     """
-    _ensure_message_ids(messages)
+    _ensure_message_ids(messages, id_offset=id_offset, reserved_ids=reserved_ids)
     spans: list[dict[str, Any]] = []
     i = 0
     group_index = 0
@@ -439,7 +465,8 @@ def annotate_message_groups(
         if previous_group_index is not None:
             group_index_offset = previous_group_index + 1
-    spans = group_messages(messages[start_index:])
+    reserved_ids = {message.message_id for message in messages[:start_index] if message.message_id}
+    spans = group_messages(messages[start_index:], id_offset=start_index, reserved_ids=reserved_ids)
     for span_index, span in enumerate(spans):
         group_id = str(span["group_id"])
         kind = _coerce_group_kind(span["kind"])
@@ -1277,6 +1304,121 @@ class CompactionProvider(ContextProvider):
         # whether excluded messages are loaded on the next turn.
+class ContextWindowCompactionStrategy:
+    """Token-budget compaction derived from a model's context window size.
+    Computes an input budget from the model's context window and output token
+    limits, then applies a two-phase compaction pipeline:
+    1. **Tool result eviction** — collapses older tool-call groups into summaries
+       when included tokens exceed ``tool_eviction_threshold`` of the input budget.
+    2. **Truncation** — removes oldest non-system groups when included tokens
+       exceed ``truncation_threshold`` of the input budget.
+    The class uses two independent :class:`TokenBudgetComposedStrategy`
+    instances — one per phase — so each fires only when its own threshold
+    is exceeded.
+    Examples:
+        .. code-block:: python
+            from agent_framework import ContextWindowCompactionStrategy, CompactionProvider
+            strategy = ContextWindowCompactionStrategy(
+                max_context_window_tokens=128_000,
+                max_output_tokens=16_384,
+            )
+            provider = CompactionProvider(before_strategy=strategy)
+    """
+    DEFAULT_TOOL_EVICTION_THRESHOLD: float = 0.5
+    """Default fraction of input budget at which tool result eviction triggers."""
+    DEFAULT_TRUNCATION_THRESHOLD: float = 0.8
+    """Default fraction of input budget at which truncation triggers."""
+    def __init__(
+        self,
+        *,
+        max_context_window_tokens: int,
+        max_output_tokens: int,
+        tokenizer: TokenizerProtocol | None = None,
+        tool_eviction_threshold: float = DEFAULT_TOOL_EVICTION_THRESHOLD,
+        truncation_threshold: float = DEFAULT_TRUNCATION_THRESHOLD,
+        keep_last_tool_call_groups: int = 4,
+    ) -> None:
+        """Create a context-window compaction strategy.
+        Keyword Args:
+            max_context_window_tokens: The model's maximum context window size
+                in tokens (e.g. 128,000).
+            max_output_tokens: The model's maximum output tokens per response
+                (e.g. 16,384).
+            tokenizer: Token counter for measuring message sizes. Defaults to
+                :class:`CharacterEstimatorTokenizer` (4 chars/token heuristic).
+            tool_eviction_threshold: Fraction of input budget (0.0, 1.0] at
+                which tool result eviction triggers. Defaults to 0.5.
+            truncation_threshold: Fraction of input budget (0.0, 1.0] at which
+                truncation triggers. Must be ≥ ``tool_eviction_threshold``.
+                Defaults to 0.8.
+            keep_last_tool_call_groups: Number of most recent tool-call groups
+                to retain verbatim during tool eviction. Older groups are
+                collapsed into summaries. Defaults to 4.
+        Raises:
+            ValueError: If thresholds are out of range or inconsistent.
+        """
+        if max_context_window_tokens <= 0:
+            raise ValueError("max_context_window_tokens must be positive.")
+        if max_output_tokens < 0 or max_output_tokens >= max_context_window_tokens:
+            raise ValueError("max_output_tokens must be >= 0 and < max_context_window_tokens.")
+        if not (0.0 < tool_eviction_threshold <= 1.0):
+            raise ValueError("tool_eviction_threshold must be in (0.0, 1.0].")
+        if not (0.0 < truncation_threshold <= 1.0):
+            raise ValueError("truncation_threshold must be in (0.0, 1.0].")
+        if truncation_threshold < tool_eviction_threshold:
+            raise ValueError("truncation_threshold must be >= tool_eviction_threshold.")
+        resolved_tokenizer = tokenizer or CharacterEstimatorTokenizer()
+        input_budget = max_context_window_tokens - max_output_tokens
+        tool_eviction_tokens = int(input_budget * tool_eviction_threshold)
+        truncation_tokens = int(input_budget * truncation_threshold)
+        self.max_context_window_tokens = max_context_window_tokens
+        self.max_output_tokens = max_output_tokens
+        self.input_budget_tokens = input_budget
+        self.tool_eviction_threshold = tool_eviction_threshold
+        self.truncation_threshold = truncation_threshold
+        self._tool_eviction = TokenBudgetComposedStrategy(
+            token_budget=tool_eviction_tokens,
+            tokenizer=resolved_tokenizer,
+            strategies=[
+                ToolResultCompactionStrategy(keep_last_tool_call_groups=keep_last_tool_call_groups),
+            ],
+        )
+        self._truncation = TokenBudgetComposedStrategy(
+            token_budget=truncation_tokens,
+            tokenizer=resolved_tokenizer,
+            strategies=[
+                TruncationStrategy(
+                    max_n=truncation_tokens,
+                    compact_to=tool_eviction_tokens,
+                    tokenizer=resolved_tokenizer,
+                ),
+            ],
+        )
+    async def __call__(self, messages: list[Message]) -> bool:
+        """Apply the two-phase compaction pipeline.
+        Returns:
+            True if compaction changed message inclusion; otherwise False.
+        """
+        changed = await self._tool_eviction(messages)
+        return (await self._truncation(messages)) or changed
 __all__ = [
     "COMPACTION_STATE_KEY",
     "EXCLUDED_KEY",
@@ -1293,6 +1435,7 @@ __all__ = [
     "CharacterEstimatorTokenizer",
     "CompactionProvider",
     "CompactionStrategy",
+    "ContextWindowCompactionStrategy",
     "GroupKind",
     "SelectiveToolCallCompactionStrategy",
     "SlidingWindowStrategy",

{agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_evaluation.py RENAMED Viewed

@@ -311,12 +311,15 @@ class EvalScoreResult:
         score: Numeric score from the evaluator.
         passed: Whether the item passed this evaluator's threshold.
         sample: Optional raw evaluator output (rationale, metadata).
+        dimensions: Per-dimension scores when this evaluator is a rubric
+            evaluator.  ``None`` for non-rubric (e.g. built-in) evaluators.
     """
     name: str
     score: float
     passed: bool | None = None
     sample: dict[str, Any] | None = None
+    dimensions: list[RubricScore] | None = None
 @experimental(feature_id=ExperimentalFeature.EVALS)
@@ -496,6 +499,179 @@ class EvalResults:
                     detail += f" Errored items: {', '.join(summaries)}."
             raise EvalNotPassedError(detail)
+    def assert_score_at_least(
+        self,
+        min_score: float,
+        *,
+        evaluator: str | None = None,
+        msg: str | None = None,
+    ) -> None:
+        """Assert every item's score (optionally filtered by evaluator) is ``>= min_score``.
+        Designed for CI gates on generated rubric evaluators (e.g.
+        ``results.assert_score_at_least(0.80)``).  Includes any
+        sub-results from workflow evaluations.
+        Args:
+            min_score: Minimum acceptable score (inclusive).
+            evaluator: When set, only check scores from the evaluator
+                whose ``EvalScoreResult.name`` matches.
+            msg: Optional custom failure message.
+        Raises:
+            EvalNotPassedError: When any matching score is below the threshold.
+        """
+        offenders: list[str] = []
+        def _check(results: EvalResults) -> None:
+            for item in results.items:
+                for score in item.scores:
+                    if evaluator is not None and score.name != evaluator:
+                        continue
+                    if score.score < min_score:
+                        offenders.append(f"{item.item_id}/{score.name}={score.score:.3f}")
+            for sub in results.sub_results.values():
+                _check(sub)
+        _check(self)
+        if offenders:
+            detail = msg or (
+                f"{len(offenders)} score(s) below threshold {min_score}"
+                f"{' for ' + evaluator if evaluator else ''}: {', '.join(offenders[:5])}"
+                + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
+            )
+            raise EvalNotPassedError(detail)
+    def assert_dimension_score_at_least(
+        self,
+        dimension_id: str,
+        min_score: float,
+        *,
+        evaluator: str | None = None,
+        require_applicable: bool = False,
+        msg: str | None = None,
+    ) -> None:
+        """Assert every item's score for a rubric *dimension* is ``>= min_score``.
+        Walks ``EvalScoreResult.dimensions`` looking for the named
+        dimension across all items (and sub-results).  Non-applicable
+        dimensions are skipped by default; pass
+        ``require_applicable=True`` to fail when no applicable score is
+        produced.
+        Args:
+            dimension_id: Dimension id (matches the rubric definition).
+            min_score: Minimum acceptable dimension score (inclusive).
+            evaluator: When set, only consider scores from the evaluator
+                whose ``EvalScoreResult.name`` matches.
+            require_applicable: When ``True``, missing or non-applicable
+                dimension scores raise.  Defaults to ``False`` (skip).
+            msg: Optional custom failure message.
+        Raises:
+            EvalNotPassedError: When the dimension fails the threshold.
+        """
+        offenders: list[str] = []
+        missing_items: list[str] = []
+        def _check(results: EvalResults) -> None:
+            for item in results.items:
+                found_applicable = False
+                for score in item.scores:
+                    if evaluator is not None and score.name != evaluator:
+                        continue
+                    if not score.dimensions:
+                        continue
+                    for rs in score.dimensions:
+                        if rs.id != dimension_id:
+                            continue
+                        if not rs.applicable:
+                            continue
+                        found_applicable = True
+                        if rs.score is None or rs.score < min_score:
+                            offenders.append(
+                                f"{item.item_id}/{score.name}/{dimension_id}="
+                                f"{rs.score if rs.score is not None else 'None'}"
+                            )
+                if require_applicable and not found_applicable:
+                    missing_items.append(item.item_id)
+            for sub in results.sub_results.values():
+                _check(sub)
+        _check(self)
+        problems: list[str] = []
+        if offenders:
+            problems.append(
+                f"{len(offenders)} dimension score(s) for '{dimension_id}' below {min_score}: "
+                f"{', '.join(offenders[:5])}" + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
+            )
+        if missing_items:
+            problems.append(
+                f"Dimension '{dimension_id}' not applicable on {len(missing_items)} item(s): "
+                f"{', '.join(missing_items[:5])}"
+            )
+        if problems:
+            raise EvalNotPassedError(msg or "; ".join(problems))
+    def assert_no_failed_items(self, msg: str | None = None) -> None:
+        """Assert no item ended in ``fail`` or ``error`` status.
+        Includes any sub-results from workflow evaluations.
+        Args:
+            msg: Optional custom failure message.
+        Raises:
+            EvalNotPassedError: When any item failed or errored.
+        """
+        bad: list[str] = []
+        def _check(results: EvalResults) -> None:
+            for item in results.items:
+                if item.is_failed or item.is_error:
+                    bad.append(f"{item.item_id}:{item.status}")
+            for sub in results.sub_results.values():
+                _check(sub)
+        _check(self)
+        if bad:
+            detail = msg or (
+                f"{len(bad)} item(s) failed or errored: {', '.join(bad[:5])}"
+                + (f" (+{len(bad) - 5} more)" if len(bad) > 5 else "")
+            )
+            raise EvalNotPassedError(detail)
+# endregion
+# region Generated rubric evaluators
+@experimental(feature_id=ExperimentalFeature.EVALS)
+@dataclass(frozen=True)
+class RubricScore:
+    """A single dimension's score from a rubric-based evaluator run.
+    Rubric evaluators emit one ``RubricScore`` per dimension per item.
+    Attached to :class:`EvalScoreResult` as a typed view of the raw
+    ``properties.rubric_scores`` payload returned by providers such as
+    Foundry's generated rubric evaluators.
+    Attributes:
+        id: Dimension id (matches the rubric definition).
+        score: Numeric score, or ``None`` when the dimension was marked
+            non-applicable for this item.
+        applicable: Whether the dimension applied to this item.
+        weight: Dimension weight (mirrors the rubric definition).
+        reason: Short rationale produced by the evaluator.
+    """
+    id: str
+    score: int | None
+    applicable: bool
+    weight: int
+    reason: str
 # endregion

agent-framework-core 1.6.0__tar.gz → 1.8.0__tar.gz

agent-framework-core 1.6.0tar.gz → 1.8.0tar.gz