agent-framework-core 1.7.0__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/PKG-INFO +1 -1
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/__init__.py +26 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_clients.py +8 -1
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_compaction.py +34 -7
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_evaluation.py +176 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_feature_stage.py +3 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_agent.py +20 -1
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_background_agents.py +4 -0
- agent_framework_core-1.8.0/agent_framework/_harness/_file_access.py +1018 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_mcp.py +72 -20
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_middleware.py +114 -2
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_serialization.py +45 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_skills.py +666 -198
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_tools.py +38 -4
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_types.py +86 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent.py +2 -1
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_functional.py +2 -1
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_request_info_mixin.py +369 -369
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.py +1 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.pyi +2 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/observability.py +42 -4
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/pyproject.toml +1 -1
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/LICENSE +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/README.md +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_agents.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_docstrings.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_memory.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_mode.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_todo.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_sessions.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_settings.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_telemetry.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_executor.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_utils.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint_encoding.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_const.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_conversation_history.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge_runner.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_events.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_executor.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_function_executor.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_message_utils.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_model_utils.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner_context.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_state.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_typing_utils.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_validation.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_viz.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_builder.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_context.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_executor.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/a2a/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/a2a/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/exceptions.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/hyperlight/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/lab/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/py.typed +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.py +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.pyi +0 -0
- {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/security.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-framework-core
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: Microsoft Agent Framework for building AI Agents with Python. This is the core package that has all the core abstractions and implementations.
|
|
5
5
|
Author-email: Microsoft <af-support@microsoft.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -71,6 +71,7 @@ from ._evaluation import (
|
|
|
71
71
|
Evaluator,
|
|
72
72
|
ExpectedToolCall,
|
|
73
73
|
LocalEvaluator,
|
|
74
|
+
RubricScore,
|
|
74
75
|
evaluate_agent,
|
|
75
76
|
evaluate_workflow,
|
|
76
77
|
evaluator,
|
|
@@ -90,6 +91,16 @@ from ._harness._background_agents import (
|
|
|
90
91
|
BackgroundTaskInfo,
|
|
91
92
|
BackgroundTaskStatus,
|
|
92
93
|
)
|
|
94
|
+
from ._harness._file_access import (
|
|
95
|
+
DEFAULT_FILE_ACCESS_INSTRUCTIONS,
|
|
96
|
+
DEFAULT_FILE_ACCESS_SOURCE_ID,
|
|
97
|
+
AgentFileStore,
|
|
98
|
+
FileAccessProvider,
|
|
99
|
+
FileSearchMatch,
|
|
100
|
+
FileSearchResult,
|
|
101
|
+
FileSystemAgentFileStore,
|
|
102
|
+
InMemoryAgentFileStore,
|
|
103
|
+
)
|
|
93
104
|
from ._harness._memory import (
|
|
94
105
|
DEFAULT_MEMORY_SOURCE_ID,
|
|
95
106
|
MemoryContextProvider,
|
|
@@ -157,6 +168,9 @@ from ._skills import (
|
|
|
157
168
|
InlineSkillResource,
|
|
158
169
|
InlineSkillScript,
|
|
159
170
|
InMemorySkillsSource,
|
|
171
|
+
MCPSkill,
|
|
172
|
+
MCPSkillResource,
|
|
173
|
+
MCPSkillsSource,
|
|
160
174
|
Skill,
|
|
161
175
|
SkillFrontmatter,
|
|
162
176
|
SkillResource,
|
|
@@ -309,6 +323,8 @@ __all__ = [
|
|
|
309
323
|
"APP_INFO",
|
|
310
324
|
"COMPACTION_STATE_KEY",
|
|
311
325
|
"DEFAULT_BACKGROUND_AGENTS_SOURCE_ID",
|
|
326
|
+
"DEFAULT_FILE_ACCESS_INSTRUCTIONS",
|
|
327
|
+
"DEFAULT_FILE_ACCESS_SOURCE_ID",
|
|
312
328
|
"DEFAULT_HARNESS_INSTRUCTIONS",
|
|
313
329
|
"DEFAULT_MAX_ITERATIONS",
|
|
314
330
|
"DEFAULT_MEMORY_SOURCE_ID",
|
|
@@ -334,6 +350,7 @@ __all__ = [
|
|
|
334
350
|
"AgentExecutor",
|
|
335
351
|
"AgentExecutorRequest",
|
|
336
352
|
"AgentExecutorResponse",
|
|
353
|
+
"AgentFileStore",
|
|
337
354
|
"AgentFrameworkException",
|
|
338
355
|
"AgentMiddleware",
|
|
339
356
|
"AgentMiddlewareLayer",
|
|
@@ -393,11 +410,15 @@ __all__ = [
|
|
|
393
410
|
"ExperimentalFeature",
|
|
394
411
|
"FanInEdgeGroup",
|
|
395
412
|
"FanOutEdgeGroup",
|
|
413
|
+
"FileAccessProvider",
|
|
396
414
|
"FileCheckpointStorage",
|
|
397
415
|
"FileHistoryProvider",
|
|
416
|
+
"FileSearchMatch",
|
|
417
|
+
"FileSearchResult",
|
|
398
418
|
"FileSkill",
|
|
399
419
|
"FileSkillScript",
|
|
400
420
|
"FileSkillsSource",
|
|
421
|
+
"FileSystemAgentFileStore",
|
|
401
422
|
"FilteringSkillsSource",
|
|
402
423
|
"FinalT",
|
|
403
424
|
"FinishReason",
|
|
@@ -414,6 +435,7 @@ __all__ = [
|
|
|
414
435
|
"GeneratedEmbeddings",
|
|
415
436
|
"GraphConnectivityError",
|
|
416
437
|
"HistoryProvider",
|
|
438
|
+
"InMemoryAgentFileStore",
|
|
417
439
|
"InMemoryCheckpointStorage",
|
|
418
440
|
"InMemoryHistoryProvider",
|
|
419
441
|
"InMemorySkillsSource",
|
|
@@ -425,6 +447,9 @@ __all__ = [
|
|
|
425
447
|
"MCPStdioTool",
|
|
426
448
|
"MCPStreamableHTTPTool",
|
|
427
449
|
"MCPWebsocketTool",
|
|
450
|
+
"MCPSkill",
|
|
451
|
+
"MCPSkillResource",
|
|
452
|
+
"MCPSkillsSource",
|
|
428
453
|
"MemoryContextProvider",
|
|
429
454
|
"MemoryFileStore",
|
|
430
455
|
"MemoryIndexEntry",
|
|
@@ -442,6 +467,7 @@ __all__ = [
|
|
|
442
467
|
"ResponseStream",
|
|
443
468
|
"Role",
|
|
444
469
|
"RoleLiteral",
|
|
470
|
+
"RubricScore",
|
|
445
471
|
"RunContext",
|
|
446
472
|
"Runner",
|
|
447
473
|
"RunnerContext",
|
|
@@ -380,8 +380,15 @@ class BaseChatClient(SerializationMixin, ABC, Generic[OptionsCoT]):
|
|
|
380
380
|
return prepared_messages
|
|
381
381
|
from ._compaction import apply_compaction
|
|
382
382
|
|
|
383
|
+
# Compact the caller's list in place when possible. A compaction operation has
|
|
384
|
+
# two halves: exclusion flags (mutated on shared Message objects) and inserted
|
|
385
|
+
# summary messages. Operating on the original list keeps both halves on the list
|
|
386
|
+
# the function-invocation tool loop reuses across iterations; otherwise inserted
|
|
387
|
+
# summaries would be lost on a throwaway copy while exclusions persisted, silently
|
|
388
|
+
# dropping older groups (issue #4991).
|
|
389
|
+
working_messages = messages if isinstance(messages, list) else prepared_messages
|
|
383
390
|
return await apply_compaction(
|
|
384
|
-
|
|
391
|
+
working_messages,
|
|
385
392
|
strategy=compaction_strategy,
|
|
386
393
|
tokenizer=tokenizer,
|
|
387
394
|
)
|
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
|
-
from collections.abc import Mapping, Sequence
|
|
7
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
8
8
|
from typing import (
|
|
9
9
|
TYPE_CHECKING,
|
|
10
10
|
Any,
|
|
@@ -92,10 +92,23 @@ def _is_reasoning_only_assistant(message: Message) -> bool:
|
|
|
92
92
|
return all(content.type == "text_reasoning" for content in message.contents)
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def _ensure_message_ids(
|
|
95
|
+
def _ensure_message_ids(
|
|
96
|
+
messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
|
|
97
|
+
) -> None:
|
|
98
|
+
existing_ids: set[str] = set(reserved_ids) if reserved_ids is not None else set()
|
|
99
|
+
existing_ids.update(message.message_id for message in messages if message.message_id)
|
|
96
100
|
for index, message in enumerate(messages):
|
|
97
|
-
if
|
|
98
|
-
|
|
101
|
+
if message.message_id:
|
|
102
|
+
continue
|
|
103
|
+
candidate = f"msg_{id_offset + index}"
|
|
104
|
+
if candidate in existing_ids:
|
|
105
|
+
counter = id_offset + len(messages)
|
|
106
|
+
candidate = f"msg_{counter}"
|
|
107
|
+
while candidate in existing_ids:
|
|
108
|
+
counter += 1
|
|
109
|
+
candidate = f"msg_{counter}"
|
|
110
|
+
message.message_id = candidate
|
|
111
|
+
existing_ids.add(candidate)
|
|
99
112
|
|
|
100
113
|
|
|
101
114
|
def _group_id_for(message: Message, group_index: int) -> str:
|
|
@@ -104,14 +117,27 @@ def _group_id_for(message: Message, group_index: int) -> str:
|
|
|
104
117
|
return f"group_index_{group_index}"
|
|
105
118
|
|
|
106
119
|
|
|
107
|
-
def group_messages(
|
|
120
|
+
def group_messages(
|
|
121
|
+
messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
|
|
122
|
+
) -> list[dict[str, Any]]:
|
|
108
123
|
"""Compute group spans and metadata for annotation.
|
|
109
124
|
|
|
125
|
+
Args:
|
|
126
|
+
messages: The messages (or a slice of them) to group.
|
|
127
|
+
|
|
128
|
+
Keyword Args:
|
|
129
|
+
id_offset: Absolute starting index used when auto-assigning ``message_id``
|
|
130
|
+
values, so incremental annotation of a list slice produces ids that
|
|
131
|
+
stay unique across the full list.
|
|
132
|
+
reserved_ids: Message ids that already exist outside ``messages`` (for
|
|
133
|
+
example in a preserved prefix). Auto-assigned ids are guaranteed not
|
|
134
|
+
to collide with these, preventing duplicate ids across the full list.
|
|
135
|
+
|
|
110
136
|
Returns:
|
|
111
137
|
Ordered list of lightweight span dicts with keys:
|
|
112
138
|
``group_id``, ``kind``, ``start_index``, ``end_index``, ``has_reasoning``.
|
|
113
139
|
"""
|
|
114
|
-
_ensure_message_ids(messages)
|
|
140
|
+
_ensure_message_ids(messages, id_offset=id_offset, reserved_ids=reserved_ids)
|
|
115
141
|
spans: list[dict[str, Any]] = []
|
|
116
142
|
i = 0
|
|
117
143
|
group_index = 0
|
|
@@ -439,7 +465,8 @@ def annotate_message_groups(
|
|
|
439
465
|
if previous_group_index is not None:
|
|
440
466
|
group_index_offset = previous_group_index + 1
|
|
441
467
|
|
|
442
|
-
|
|
468
|
+
reserved_ids = {message.message_id for message in messages[:start_index] if message.message_id}
|
|
469
|
+
spans = group_messages(messages[start_index:], id_offset=start_index, reserved_ids=reserved_ids)
|
|
443
470
|
for span_index, span in enumerate(spans):
|
|
444
471
|
group_id = str(span["group_id"])
|
|
445
472
|
kind = _coerce_group_kind(span["kind"])
|
|
@@ -311,12 +311,15 @@ class EvalScoreResult:
|
|
|
311
311
|
score: Numeric score from the evaluator.
|
|
312
312
|
passed: Whether the item passed this evaluator's threshold.
|
|
313
313
|
sample: Optional raw evaluator output (rationale, metadata).
|
|
314
|
+
dimensions: Per-dimension scores when this evaluator is a rubric
|
|
315
|
+
evaluator. ``None`` for non-rubric (e.g. built-in) evaluators.
|
|
314
316
|
"""
|
|
315
317
|
|
|
316
318
|
name: str
|
|
317
319
|
score: float
|
|
318
320
|
passed: bool | None = None
|
|
319
321
|
sample: dict[str, Any] | None = None
|
|
322
|
+
dimensions: list[RubricScore] | None = None
|
|
320
323
|
|
|
321
324
|
|
|
322
325
|
@experimental(feature_id=ExperimentalFeature.EVALS)
|
|
@@ -496,6 +499,179 @@ class EvalResults:
|
|
|
496
499
|
detail += f" Errored items: {', '.join(summaries)}."
|
|
497
500
|
raise EvalNotPassedError(detail)
|
|
498
501
|
|
|
502
|
+
def assert_score_at_least(
|
|
503
|
+
self,
|
|
504
|
+
min_score: float,
|
|
505
|
+
*,
|
|
506
|
+
evaluator: str | None = None,
|
|
507
|
+
msg: str | None = None,
|
|
508
|
+
) -> None:
|
|
509
|
+
"""Assert every item's score (optionally filtered by evaluator) is ``>= min_score``.
|
|
510
|
+
|
|
511
|
+
Designed for CI gates on generated rubric evaluators (e.g.
|
|
512
|
+
``results.assert_score_at_least(0.80)``). Includes any
|
|
513
|
+
sub-results from workflow evaluations.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
min_score: Minimum acceptable score (inclusive).
|
|
517
|
+
evaluator: When set, only check scores from the evaluator
|
|
518
|
+
whose ``EvalScoreResult.name`` matches.
|
|
519
|
+
msg: Optional custom failure message.
|
|
520
|
+
|
|
521
|
+
Raises:
|
|
522
|
+
EvalNotPassedError: When any matching score is below the threshold.
|
|
523
|
+
"""
|
|
524
|
+
offenders: list[str] = []
|
|
525
|
+
|
|
526
|
+
def _check(results: EvalResults) -> None:
|
|
527
|
+
for item in results.items:
|
|
528
|
+
for score in item.scores:
|
|
529
|
+
if evaluator is not None and score.name != evaluator:
|
|
530
|
+
continue
|
|
531
|
+
if score.score < min_score:
|
|
532
|
+
offenders.append(f"{item.item_id}/{score.name}={score.score:.3f}")
|
|
533
|
+
for sub in results.sub_results.values():
|
|
534
|
+
_check(sub)
|
|
535
|
+
|
|
536
|
+
_check(self)
|
|
537
|
+
if offenders:
|
|
538
|
+
detail = msg or (
|
|
539
|
+
f"{len(offenders)} score(s) below threshold {min_score}"
|
|
540
|
+
f"{' for ' + evaluator if evaluator else ''}: {', '.join(offenders[:5])}"
|
|
541
|
+
+ (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
|
|
542
|
+
)
|
|
543
|
+
raise EvalNotPassedError(detail)
|
|
544
|
+
|
|
545
|
+
def assert_dimension_score_at_least(
|
|
546
|
+
self,
|
|
547
|
+
dimension_id: str,
|
|
548
|
+
min_score: float,
|
|
549
|
+
*,
|
|
550
|
+
evaluator: str | None = None,
|
|
551
|
+
require_applicable: bool = False,
|
|
552
|
+
msg: str | None = None,
|
|
553
|
+
) -> None:
|
|
554
|
+
"""Assert every item's score for a rubric *dimension* is ``>= min_score``.
|
|
555
|
+
|
|
556
|
+
Walks ``EvalScoreResult.dimensions`` looking for the named
|
|
557
|
+
dimension across all items (and sub-results). Non-applicable
|
|
558
|
+
dimensions are skipped by default; pass
|
|
559
|
+
``require_applicable=True`` to fail when no applicable score is
|
|
560
|
+
produced.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
dimension_id: Dimension id (matches the rubric definition).
|
|
564
|
+
min_score: Minimum acceptable dimension score (inclusive).
|
|
565
|
+
evaluator: When set, only consider scores from the evaluator
|
|
566
|
+
whose ``EvalScoreResult.name`` matches.
|
|
567
|
+
require_applicable: When ``True``, missing or non-applicable
|
|
568
|
+
dimension scores raise. Defaults to ``False`` (skip).
|
|
569
|
+
msg: Optional custom failure message.
|
|
570
|
+
|
|
571
|
+
Raises:
|
|
572
|
+
EvalNotPassedError: When the dimension fails the threshold.
|
|
573
|
+
"""
|
|
574
|
+
offenders: list[str] = []
|
|
575
|
+
missing_items: list[str] = []
|
|
576
|
+
|
|
577
|
+
def _check(results: EvalResults) -> None:
|
|
578
|
+
for item in results.items:
|
|
579
|
+
found_applicable = False
|
|
580
|
+
for score in item.scores:
|
|
581
|
+
if evaluator is not None and score.name != evaluator:
|
|
582
|
+
continue
|
|
583
|
+
if not score.dimensions:
|
|
584
|
+
continue
|
|
585
|
+
for rs in score.dimensions:
|
|
586
|
+
if rs.id != dimension_id:
|
|
587
|
+
continue
|
|
588
|
+
if not rs.applicable:
|
|
589
|
+
continue
|
|
590
|
+
found_applicable = True
|
|
591
|
+
if rs.score is None or rs.score < min_score:
|
|
592
|
+
offenders.append(
|
|
593
|
+
f"{item.item_id}/{score.name}/{dimension_id}="
|
|
594
|
+
f"{rs.score if rs.score is not None else 'None'}"
|
|
595
|
+
)
|
|
596
|
+
if require_applicable and not found_applicable:
|
|
597
|
+
missing_items.append(item.item_id)
|
|
598
|
+
for sub in results.sub_results.values():
|
|
599
|
+
_check(sub)
|
|
600
|
+
|
|
601
|
+
_check(self)
|
|
602
|
+
problems: list[str] = []
|
|
603
|
+
if offenders:
|
|
604
|
+
problems.append(
|
|
605
|
+
f"{len(offenders)} dimension score(s) for '{dimension_id}' below {min_score}: "
|
|
606
|
+
f"{', '.join(offenders[:5])}" + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
|
|
607
|
+
)
|
|
608
|
+
if missing_items:
|
|
609
|
+
problems.append(
|
|
610
|
+
f"Dimension '{dimension_id}' not applicable on {len(missing_items)} item(s): "
|
|
611
|
+
f"{', '.join(missing_items[:5])}"
|
|
612
|
+
)
|
|
613
|
+
if problems:
|
|
614
|
+
raise EvalNotPassedError(msg or "; ".join(problems))
|
|
615
|
+
|
|
616
|
+
def assert_no_failed_items(self, msg: str | None = None) -> None:
|
|
617
|
+
"""Assert no item ended in ``fail`` or ``error`` status.
|
|
618
|
+
|
|
619
|
+
Includes any sub-results from workflow evaluations.
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
msg: Optional custom failure message.
|
|
623
|
+
|
|
624
|
+
Raises:
|
|
625
|
+
EvalNotPassedError: When any item failed or errored.
|
|
626
|
+
"""
|
|
627
|
+
bad: list[str] = []
|
|
628
|
+
|
|
629
|
+
def _check(results: EvalResults) -> None:
|
|
630
|
+
for item in results.items:
|
|
631
|
+
if item.is_failed or item.is_error:
|
|
632
|
+
bad.append(f"{item.item_id}:{item.status}")
|
|
633
|
+
for sub in results.sub_results.values():
|
|
634
|
+
_check(sub)
|
|
635
|
+
|
|
636
|
+
_check(self)
|
|
637
|
+
if bad:
|
|
638
|
+
detail = msg or (
|
|
639
|
+
f"{len(bad)} item(s) failed or errored: {', '.join(bad[:5])}"
|
|
640
|
+
+ (f" (+{len(bad) - 5} more)" if len(bad) > 5 else "")
|
|
641
|
+
)
|
|
642
|
+
raise EvalNotPassedError(detail)
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
# endregion
|
|
646
|
+
|
|
647
|
+
# region Generated rubric evaluators
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
@experimental(feature_id=ExperimentalFeature.EVALS)
|
|
651
|
+
@dataclass(frozen=True)
|
|
652
|
+
class RubricScore:
|
|
653
|
+
"""A single dimension's score from a rubric-based evaluator run.
|
|
654
|
+
|
|
655
|
+
Rubric evaluators emit one ``RubricScore`` per dimension per item.
|
|
656
|
+
Attached to :class:`EvalScoreResult` as a typed view of the raw
|
|
657
|
+
``properties.rubric_scores`` payload returned by providers such as
|
|
658
|
+
Foundry's generated rubric evaluators.
|
|
659
|
+
|
|
660
|
+
Attributes:
|
|
661
|
+
id: Dimension id (matches the rubric definition).
|
|
662
|
+
score: Numeric score, or ``None`` when the dimension was marked
|
|
663
|
+
non-applicable for this item.
|
|
664
|
+
applicable: Whether the dimension applied to this item.
|
|
665
|
+
weight: Dimension weight (mirrors the rubric definition).
|
|
666
|
+
reason: Short rationale produced by the evaluator.
|
|
667
|
+
"""
|
|
668
|
+
|
|
669
|
+
id: str
|
|
670
|
+
score: int | None
|
|
671
|
+
applicable: bool
|
|
672
|
+
weight: int
|
|
673
|
+
reason: str
|
|
674
|
+
|
|
499
675
|
|
|
500
676
|
# endregion
|
|
501
677
|
|
|
@@ -50,6 +50,7 @@ class ExperimentalFeature(str, Enum):
|
|
|
50
50
|
on enum membership or attribute presence over time.
|
|
51
51
|
"""
|
|
52
52
|
|
|
53
|
+
DECLARATIVE_AGENTS = "DECLARATIVE_AGENTS"
|
|
53
54
|
EVALS = "EVALS"
|
|
54
55
|
FILE_HISTORY = "FILE_HISTORY"
|
|
55
56
|
FIDES = "FIDES"
|
|
@@ -57,6 +58,8 @@ class ExperimentalFeature(str, Enum):
|
|
|
57
58
|
FOUNDRY_PREVIEW_TOOLS = "FOUNDRY_PREVIEW_TOOLS"
|
|
58
59
|
FUNCTIONAL_WORKFLOWS = "FUNCTIONAL_WORKFLOWS"
|
|
59
60
|
HARNESS = "HARNESS"
|
|
61
|
+
MCP_SKILLS = "MCP_SKILLS"
|
|
62
|
+
PROGRESSIVE_TOOLS = "PROGRESSIVE_TOOLS"
|
|
60
63
|
SKILLS = "SKILLS"
|
|
61
64
|
TO_PROMPT_AGENT = "TO_PROMPT_AGENT"
|
|
62
65
|
|
{agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_agent.py
RENAMED
|
@@ -14,12 +14,13 @@ import logging
|
|
|
14
14
|
from collections.abc import Callable, Sequence
|
|
15
15
|
from typing import TYPE_CHECKING, Any
|
|
16
16
|
|
|
17
|
-
from .._agents import Agent
|
|
17
|
+
from .._agents import Agent, SupportsAgentRun
|
|
18
18
|
from .._clients import SupportsWebSearchTool
|
|
19
19
|
from .._compaction import CompactionProvider, ContextWindowCompactionStrategy, ToolResultCompactionStrategy
|
|
20
20
|
from .._feature_stage import ExperimentalFeature, experimental
|
|
21
21
|
from .._sessions import ContextProvider, HistoryProvider, InMemoryHistoryProvider
|
|
22
22
|
from .._skills import SkillsProvider
|
|
23
|
+
from ._background_agents import BackgroundAgentsProvider
|
|
23
24
|
from ._memory import MemoryContextProvider, MemoryStore
|
|
24
25
|
from ._mode import AgentModeProvider
|
|
25
26
|
from ._todo import TodoProvider
|
|
@@ -103,6 +104,8 @@ def _assemble_context_providers(
|
|
|
103
104
|
memory_store: MemoryStore | None,
|
|
104
105
|
skills_provider: SkillsProvider | None,
|
|
105
106
|
skills_paths: Sequence[str] | None,
|
|
107
|
+
background_agents: Sequence[SupportsAgentRun] | None,
|
|
108
|
+
background_agents_instructions: str | None,
|
|
106
109
|
extra_context_providers: Sequence[ContextProvider] | None,
|
|
107
110
|
) -> list[ContextProvider]:
|
|
108
111
|
"""Assemble the ordered list of context providers."""
|
|
@@ -130,6 +133,10 @@ def _assemble_context_providers(
|
|
|
130
133
|
if skills_paths:
|
|
131
134
|
providers.append(SkillsProvider.from_paths(*skills_paths))
|
|
132
135
|
|
|
136
|
+
# Background agents are opt-in: only added when agents are provided.
|
|
137
|
+
if background_agents:
|
|
138
|
+
providers.append(BackgroundAgentsProvider(background_agents, instructions=background_agents_instructions))
|
|
139
|
+
|
|
133
140
|
# Append any user-supplied additional providers.
|
|
134
141
|
if extra_context_providers:
|
|
135
142
|
providers.extend(extra_context_providers)
|
|
@@ -165,6 +172,8 @@ def create_harness_agent(
|
|
|
165
172
|
memory_store: MemoryStore | None = None,
|
|
166
173
|
skills_provider: SkillsProvider | None = None,
|
|
167
174
|
skills_paths: Sequence[str] | None = None,
|
|
175
|
+
background_agents: Sequence[SupportsAgentRun] | None = None,
|
|
176
|
+
background_agents_instructions: str | None = None,
|
|
168
177
|
disable_web_search: bool = False,
|
|
169
178
|
otel_provider_name: str | None = None,
|
|
170
179
|
context_providers: Sequence[ContextProvider] | None = None,
|
|
@@ -182,6 +191,7 @@ def create_harness_agent(
|
|
|
182
191
|
- **AgentModeProvider** — plan/execute mode tracking
|
|
183
192
|
- **MemoryContextProvider** — file-based durable memory (when ``memory_store`` provided)
|
|
184
193
|
- **SkillsProvider** — skill discovery and progressive loading
|
|
194
|
+
- **BackgroundAgentsProvider** — delegate work to background sub-agents
|
|
185
195
|
- **OpenTelemetry** — observability via ``AgentTelemetryLayer``
|
|
186
196
|
|
|
187
197
|
Each feature can be disabled or customized via keyword arguments.
|
|
@@ -253,6 +263,13 @@ def create_harness_agent(
|
|
|
253
263
|
skills_paths: Paths for file-based skill discovery (looks for SKILL.md files).
|
|
254
264
|
Can be combined with ``skills_provider``. When neither ``skills_provider``
|
|
255
265
|
nor ``skills_paths`` is provided, no SkillsProvider is added.
|
|
266
|
+
background_agents: Collection of agents available for background task delegation.
|
|
267
|
+
When provided, a ``BackgroundAgentsProvider`` is automatically included,
|
|
268
|
+
enabling the agent to start, monitor, and retrieve results from background tasks.
|
|
269
|
+
Each agent must have a non-empty, unique name (case-insensitive).
|
|
270
|
+
background_agents_instructions: Optional instruction override for the
|
|
271
|
+
``BackgroundAgentsProvider``. May include ``{background_agents}`` placeholder
|
|
272
|
+
which will be replaced with the agent listing.
|
|
256
273
|
disable_web_search: When True, skip automatic web search tool inclusion.
|
|
257
274
|
When False (default), the web search tool is automatically added if the
|
|
258
275
|
client implements SupportsWebSearchTool. A warning is logged if the client
|
|
@@ -302,6 +319,8 @@ def create_harness_agent(
|
|
|
302
319
|
memory_store=memory_store,
|
|
303
320
|
skills_provider=skills_provider,
|
|
304
321
|
skills_paths=skills_paths,
|
|
322
|
+
background_agents=background_agents,
|
|
323
|
+
background_agents_instructions=background_agents_instructions,
|
|
305
324
|
extra_context_providers=context_providers,
|
|
306
325
|
)
|
|
307
326
|
|
|
@@ -349,6 +349,8 @@ class BackgroundAgentsProvider(ContextProvider):
|
|
|
349
349
|
_save_provider_state(session, provider_state, source_id=source_id)
|
|
350
350
|
return f"Background task {task_id} started on agent '{agent_name}'."
|
|
351
351
|
|
|
352
|
+
background_agents_start_task._invoke_sync_on_event_loop = True # pyright: ignore[reportPrivateUsage]
|
|
353
|
+
|
|
352
354
|
@tool(name="background_agents_wait_for_first_completion", approval_mode="never_require")
|
|
353
355
|
async def background_agents_wait_for_first_completion(task_ids: list[int]) -> str:
|
|
354
356
|
"""Block until the first of the specified background tasks completes. Returns the completed task's ID."""
|
|
@@ -471,6 +473,8 @@ class BackgroundAgentsProvider(ContextProvider):
|
|
|
471
473
|
_save_provider_state(session, provider_state, source_id=source_id)
|
|
472
474
|
return f"Task {task_id} continued with new input."
|
|
473
475
|
|
|
476
|
+
background_agents_continue_task._invoke_sync_on_event_loop = True # pyright: ignore[reportPrivateUsage]
|
|
477
|
+
|
|
474
478
|
@tool(name="background_agents_clear_completed_task", approval_mode="never_require")
|
|
475
479
|
def background_agents_clear_completed_task(task_id: int) -> str:
|
|
476
480
|
"""Remove a completed or failed task and release its session to free memory."""
|