agent-framework-core 1.7.0__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/PKG-INFO +1 -1
  2. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/__init__.py +26 -0
  3. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_clients.py +8 -1
  4. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_compaction.py +34 -7
  5. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_evaluation.py +176 -0
  6. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_feature_stage.py +3 -0
  7. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_agent.py +20 -1
  8. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_background_agents.py +4 -0
  9. agent_framework_core-1.8.0/agent_framework/_harness/_file_access.py +1018 -0
  10. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_mcp.py +72 -20
  11. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_middleware.py +114 -2
  12. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_serialization.py +45 -0
  13. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_skills.py +666 -198
  14. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_tools.py +38 -4
  15. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_types.py +86 -0
  16. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent.py +2 -1
  17. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_functional.py +2 -1
  18. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_request_info_mixin.py +369 -369
  19. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.py +1 -0
  20. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.pyi +2 -0
  21. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/observability.py +42 -4
  22. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/pyproject.toml +1 -1
  23. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/LICENSE +0 -0
  24. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/README.md +0 -0
  25. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_agents.py +0 -0
  26. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_docstrings.py +0 -0
  27. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/__init__.py +0 -0
  28. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_memory.py +0 -0
  29. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_mode.py +0 -0
  30. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_todo.py +0 -0
  31. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_sessions.py +0 -0
  32. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_settings.py +0 -0
  33. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_telemetry.py +0 -0
  34. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/__init__.py +0 -0
  35. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_executor.py +0 -0
  36. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_utils.py +0 -0
  37. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint.py +0 -0
  38. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint_encoding.py +0 -0
  39. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_const.py +0 -0
  40. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_conversation_history.py +0 -0
  41. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge.py +0 -0
  42. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge_runner.py +0 -0
  43. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_events.py +0 -0
  44. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_executor.py +0 -0
  45. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_function_executor.py +0 -0
  46. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_message_utils.py +0 -0
  47. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_model_utils.py +0 -0
  48. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner.py +0 -0
  49. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner_context.py +0 -0
  50. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_state.py +0 -0
  51. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_typing_utils.py +0 -0
  52. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_validation.py +0 -0
  53. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_viz.py +0 -0
  54. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow.py +0 -0
  55. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_builder.py +0 -0
  56. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_context.py +0 -0
  57. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_executor.py +0 -0
  58. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/a2a/__init__.py +0 -0
  59. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/a2a/__init__.pyi +0 -0
  60. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.py +0 -0
  61. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.pyi +0 -0
  62. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.py +0 -0
  63. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.pyi +0 -0
  64. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.py +0 -0
  65. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.pyi +0 -0
  66. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.py +0 -0
  67. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.pyi +0 -0
  68. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.py +0 -0
  69. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.pyi +0 -0
  70. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.py +0 -0
  71. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.pyi +0 -0
  72. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.py +0 -0
  73. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.pyi +0 -0
  74. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/exceptions.py +0 -0
  75. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.py +0 -0
  76. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.pyi +0 -0
  77. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.py +0 -0
  78. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.pyi +0 -0
  79. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/hyperlight/__init__.py +0 -0
  80. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/lab/__init__.py +0 -0
  81. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.py +0 -0
  82. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.pyi +0 -0
  83. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.py +0 -0
  84. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.pyi +0 -0
  85. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.py +0 -0
  86. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.pyi +0 -0
  87. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.py +0 -0
  88. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.pyi +0 -0
  89. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.py +0 -0
  90. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.pyi +0 -0
  91. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/py.typed +0 -0
  92. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.py +0 -0
  93. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.pyi +0 -0
  94. {agent_framework_core-1.7.0 → agent_framework_core-1.8.0}/agent_framework/security.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-framework-core
3
- Version: 1.7.0
3
+ Version: 1.8.0
4
4
  Summary: Microsoft Agent Framework for building AI Agents with Python. This is the core package that has all the core abstractions and implementations.
5
5
  Author-email: Microsoft <af-support@microsoft.com>
6
6
  Requires-Python: >=3.10
@@ -71,6 +71,7 @@ from ._evaluation import (
71
71
  Evaluator,
72
72
  ExpectedToolCall,
73
73
  LocalEvaluator,
74
+ RubricScore,
74
75
  evaluate_agent,
75
76
  evaluate_workflow,
76
77
  evaluator,
@@ -90,6 +91,16 @@ from ._harness._background_agents import (
90
91
  BackgroundTaskInfo,
91
92
  BackgroundTaskStatus,
92
93
  )
94
+ from ._harness._file_access import (
95
+ DEFAULT_FILE_ACCESS_INSTRUCTIONS,
96
+ DEFAULT_FILE_ACCESS_SOURCE_ID,
97
+ AgentFileStore,
98
+ FileAccessProvider,
99
+ FileSearchMatch,
100
+ FileSearchResult,
101
+ FileSystemAgentFileStore,
102
+ InMemoryAgentFileStore,
103
+ )
93
104
  from ._harness._memory import (
94
105
  DEFAULT_MEMORY_SOURCE_ID,
95
106
  MemoryContextProvider,
@@ -157,6 +168,9 @@ from ._skills import (
157
168
  InlineSkillResource,
158
169
  InlineSkillScript,
159
170
  InMemorySkillsSource,
171
+ MCPSkill,
172
+ MCPSkillResource,
173
+ MCPSkillsSource,
160
174
  Skill,
161
175
  SkillFrontmatter,
162
176
  SkillResource,
@@ -309,6 +323,8 @@ __all__ = [
309
323
  "APP_INFO",
310
324
  "COMPACTION_STATE_KEY",
311
325
  "DEFAULT_BACKGROUND_AGENTS_SOURCE_ID",
326
+ "DEFAULT_FILE_ACCESS_INSTRUCTIONS",
327
+ "DEFAULT_FILE_ACCESS_SOURCE_ID",
312
328
  "DEFAULT_HARNESS_INSTRUCTIONS",
313
329
  "DEFAULT_MAX_ITERATIONS",
314
330
  "DEFAULT_MEMORY_SOURCE_ID",
@@ -334,6 +350,7 @@ __all__ = [
334
350
  "AgentExecutor",
335
351
  "AgentExecutorRequest",
336
352
  "AgentExecutorResponse",
353
+ "AgentFileStore",
337
354
  "AgentFrameworkException",
338
355
  "AgentMiddleware",
339
356
  "AgentMiddlewareLayer",
@@ -393,11 +410,15 @@ __all__ = [
393
410
  "ExperimentalFeature",
394
411
  "FanInEdgeGroup",
395
412
  "FanOutEdgeGroup",
413
+ "FileAccessProvider",
396
414
  "FileCheckpointStorage",
397
415
  "FileHistoryProvider",
416
+ "FileSearchMatch",
417
+ "FileSearchResult",
398
418
  "FileSkill",
399
419
  "FileSkillScript",
400
420
  "FileSkillsSource",
421
+ "FileSystemAgentFileStore",
401
422
  "FilteringSkillsSource",
402
423
  "FinalT",
403
424
  "FinishReason",
@@ -414,6 +435,7 @@ __all__ = [
414
435
  "GeneratedEmbeddings",
415
436
  "GraphConnectivityError",
416
437
  "HistoryProvider",
438
+ "InMemoryAgentFileStore",
417
439
  "InMemoryCheckpointStorage",
418
440
  "InMemoryHistoryProvider",
419
441
  "InMemorySkillsSource",
@@ -425,6 +447,9 @@ __all__ = [
425
447
  "MCPStdioTool",
426
448
  "MCPStreamableHTTPTool",
427
449
  "MCPWebsocketTool",
450
+ "MCPSkill",
451
+ "MCPSkillResource",
452
+ "MCPSkillsSource",
428
453
  "MemoryContextProvider",
429
454
  "MemoryFileStore",
430
455
  "MemoryIndexEntry",
@@ -442,6 +467,7 @@ __all__ = [
442
467
  "ResponseStream",
443
468
  "Role",
444
469
  "RoleLiteral",
470
+ "RubricScore",
445
471
  "RunContext",
446
472
  "Runner",
447
473
  "RunnerContext",
@@ -380,8 +380,15 @@ class BaseChatClient(SerializationMixin, ABC, Generic[OptionsCoT]):
380
380
  return prepared_messages
381
381
  from ._compaction import apply_compaction
382
382
 
383
+ # Compact the caller's list in place when possible. A compaction operation has
384
+ # two halves: exclusion flags (mutated on shared Message objects) and inserted
385
+ # summary messages. Operating on the original list keeps both halves on the list
386
+ # the function-invocation tool loop reuses across iterations; otherwise inserted
387
+ # summaries would be lost on a throwaway copy while exclusions persisted, silently
388
+ # dropping older groups (issue #4991).
389
+ working_messages = messages if isinstance(messages, list) else prepared_messages
383
390
  return await apply_compaction(
384
- prepared_messages,
391
+ working_messages,
385
392
  strategy=compaction_strategy,
386
393
  tokenizer=tokenizer,
387
394
  )
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import json
6
6
  import logging
7
- from collections.abc import Mapping, Sequence
7
+ from collections.abc import Iterable, Mapping, Sequence
8
8
  from typing import (
9
9
  TYPE_CHECKING,
10
10
  Any,
@@ -92,10 +92,23 @@ def _is_reasoning_only_assistant(message: Message) -> bool:
92
92
  return all(content.type == "text_reasoning" for content in message.contents)
93
93
 
94
94
 
95
- def _ensure_message_ids(messages: list[Message]) -> None:
95
+ def _ensure_message_ids(
96
+ messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
97
+ ) -> None:
98
+ existing_ids: set[str] = set(reserved_ids) if reserved_ids is not None else set()
99
+ existing_ids.update(message.message_id for message in messages if message.message_id)
96
100
  for index, message in enumerate(messages):
97
- if not message.message_id:
98
- message.message_id = f"msg_{index}"
101
+ if message.message_id:
102
+ continue
103
+ candidate = f"msg_{id_offset + index}"
104
+ if candidate in existing_ids:
105
+ counter = id_offset + len(messages)
106
+ candidate = f"msg_{counter}"
107
+ while candidate in existing_ids:
108
+ counter += 1
109
+ candidate = f"msg_{counter}"
110
+ message.message_id = candidate
111
+ existing_ids.add(candidate)
99
112
 
100
113
 
101
114
  def _group_id_for(message: Message, group_index: int) -> str:
@@ -104,14 +117,27 @@ def _group_id_for(message: Message, group_index: int) -> str:
104
117
  return f"group_index_{group_index}"
105
118
 
106
119
 
107
- def group_messages(messages: list[Message]) -> list[dict[str, Any]]:
120
+ def group_messages(
121
+ messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
122
+ ) -> list[dict[str, Any]]:
108
123
  """Compute group spans and metadata for annotation.
109
124
 
125
+ Args:
126
+ messages: The messages (or a slice of them) to group.
127
+
128
+ Keyword Args:
129
+ id_offset: Absolute starting index used when auto-assigning ``message_id``
130
+ values, so incremental annotation of a list slice produces ids that
131
+ stay unique across the full list.
132
+ reserved_ids: Message ids that already exist outside ``messages`` (for
133
+ example in a preserved prefix). Auto-assigned ids are guaranteed not
134
+ to collide with these, preventing duplicate ids across the full list.
135
+
110
136
  Returns:
111
137
  Ordered list of lightweight span dicts with keys:
112
138
  ``group_id``, ``kind``, ``start_index``, ``end_index``, ``has_reasoning``.
113
139
  """
114
- _ensure_message_ids(messages)
140
+ _ensure_message_ids(messages, id_offset=id_offset, reserved_ids=reserved_ids)
115
141
  spans: list[dict[str, Any]] = []
116
142
  i = 0
117
143
  group_index = 0
@@ -439,7 +465,8 @@ def annotate_message_groups(
439
465
  if previous_group_index is not None:
440
466
  group_index_offset = previous_group_index + 1
441
467
 
442
- spans = group_messages(messages[start_index:])
468
+ reserved_ids = {message.message_id for message in messages[:start_index] if message.message_id}
469
+ spans = group_messages(messages[start_index:], id_offset=start_index, reserved_ids=reserved_ids)
443
470
  for span_index, span in enumerate(spans):
444
471
  group_id = str(span["group_id"])
445
472
  kind = _coerce_group_kind(span["kind"])
@@ -311,12 +311,15 @@ class EvalScoreResult:
311
311
  score: Numeric score from the evaluator.
312
312
  passed: Whether the item passed this evaluator's threshold.
313
313
  sample: Optional raw evaluator output (rationale, metadata).
314
+ dimensions: Per-dimension scores when this evaluator is a rubric
315
+ evaluator. ``None`` for non-rubric (e.g. built-in) evaluators.
314
316
  """
315
317
 
316
318
  name: str
317
319
  score: float
318
320
  passed: bool | None = None
319
321
  sample: dict[str, Any] | None = None
322
+ dimensions: list[RubricScore] | None = None
320
323
 
321
324
 
322
325
  @experimental(feature_id=ExperimentalFeature.EVALS)
@@ -496,6 +499,179 @@ class EvalResults:
496
499
  detail += f" Errored items: {', '.join(summaries)}."
497
500
  raise EvalNotPassedError(detail)
498
501
 
502
+ def assert_score_at_least(
503
+ self,
504
+ min_score: float,
505
+ *,
506
+ evaluator: str | None = None,
507
+ msg: str | None = None,
508
+ ) -> None:
509
+ """Assert every item's score (optionally filtered by evaluator) is ``>= min_score``.
510
+
511
+ Designed for CI gates on generated rubric evaluators (e.g.
512
+ ``results.assert_score_at_least(0.80)``). Includes any
513
+ sub-results from workflow evaluations.
514
+
515
+ Args:
516
+ min_score: Minimum acceptable score (inclusive).
517
+ evaluator: When set, only check scores from the evaluator
518
+ whose ``EvalScoreResult.name`` matches.
519
+ msg: Optional custom failure message.
520
+
521
+ Raises:
522
+ EvalNotPassedError: When any matching score is below the threshold.
523
+ """
524
+ offenders: list[str] = []
525
+
526
+ def _check(results: EvalResults) -> None:
527
+ for item in results.items:
528
+ for score in item.scores:
529
+ if evaluator is not None and score.name != evaluator:
530
+ continue
531
+ if score.score < min_score:
532
+ offenders.append(f"{item.item_id}/{score.name}={score.score:.3f}")
533
+ for sub in results.sub_results.values():
534
+ _check(sub)
535
+
536
+ _check(self)
537
+ if offenders:
538
+ detail = msg or (
539
+ f"{len(offenders)} score(s) below threshold {min_score}"
540
+ f"{' for ' + evaluator if evaluator else ''}: {', '.join(offenders[:5])}"
541
+ + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
542
+ )
543
+ raise EvalNotPassedError(detail)
544
+
545
+ def assert_dimension_score_at_least(
546
+ self,
547
+ dimension_id: str,
548
+ min_score: float,
549
+ *,
550
+ evaluator: str | None = None,
551
+ require_applicable: bool = False,
552
+ msg: str | None = None,
553
+ ) -> None:
554
+ """Assert every item's score for a rubric *dimension* is ``>= min_score``.
555
+
556
+ Walks ``EvalScoreResult.dimensions`` looking for the named
557
+ dimension across all items (and sub-results). Non-applicable
558
+ dimensions are skipped by default; pass
559
+ ``require_applicable=True`` to fail when no applicable score is
560
+ produced.
561
+
562
+ Args:
563
+ dimension_id: Dimension id (matches the rubric definition).
564
+ min_score: Minimum acceptable dimension score (inclusive).
565
+ evaluator: When set, only consider scores from the evaluator
566
+ whose ``EvalScoreResult.name`` matches.
567
+ require_applicable: When ``True``, missing or non-applicable
568
+ dimension scores raise. Defaults to ``False`` (skip).
569
+ msg: Optional custom failure message.
570
+
571
+ Raises:
572
+ EvalNotPassedError: When the dimension fails the threshold.
573
+ """
574
+ offenders: list[str] = []
575
+ missing_items: list[str] = []
576
+
577
+ def _check(results: EvalResults) -> None:
578
+ for item in results.items:
579
+ found_applicable = False
580
+ for score in item.scores:
581
+ if evaluator is not None and score.name != evaluator:
582
+ continue
583
+ if not score.dimensions:
584
+ continue
585
+ for rs in score.dimensions:
586
+ if rs.id != dimension_id:
587
+ continue
588
+ if not rs.applicable:
589
+ continue
590
+ found_applicable = True
591
+ if rs.score is None or rs.score < min_score:
592
+ offenders.append(
593
+ f"{item.item_id}/{score.name}/{dimension_id}="
594
+ f"{rs.score if rs.score is not None else 'None'}"
595
+ )
596
+ if require_applicable and not found_applicable:
597
+ missing_items.append(item.item_id)
598
+ for sub in results.sub_results.values():
599
+ _check(sub)
600
+
601
+ _check(self)
602
+ problems: list[str] = []
603
+ if offenders:
604
+ problems.append(
605
+ f"{len(offenders)} dimension score(s) for '{dimension_id}' below {min_score}: "
606
+ f"{', '.join(offenders[:5])}" + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
607
+ )
608
+ if missing_items:
609
+ problems.append(
610
+ f"Dimension '{dimension_id}' not applicable on {len(missing_items)} item(s): "
611
+ f"{', '.join(missing_items[:5])}"
612
+ )
613
+ if problems:
614
+ raise EvalNotPassedError(msg or "; ".join(problems))
615
+
616
+ def assert_no_failed_items(self, msg: str | None = None) -> None:
617
+ """Assert no item ended in ``fail`` or ``error`` status.
618
+
619
+ Includes any sub-results from workflow evaluations.
620
+
621
+ Args:
622
+ msg: Optional custom failure message.
623
+
624
+ Raises:
625
+ EvalNotPassedError: When any item failed or errored.
626
+ """
627
+ bad: list[str] = []
628
+
629
+ def _check(results: EvalResults) -> None:
630
+ for item in results.items:
631
+ if item.is_failed or item.is_error:
632
+ bad.append(f"{item.item_id}:{item.status}")
633
+ for sub in results.sub_results.values():
634
+ _check(sub)
635
+
636
+ _check(self)
637
+ if bad:
638
+ detail = msg or (
639
+ f"{len(bad)} item(s) failed or errored: {', '.join(bad[:5])}"
640
+ + (f" (+{len(bad) - 5} more)" if len(bad) > 5 else "")
641
+ )
642
+ raise EvalNotPassedError(detail)
643
+
644
+
645
+ # endregion
646
+
647
+ # region Generated rubric evaluators
648
+
649
+
650
+ @experimental(feature_id=ExperimentalFeature.EVALS)
651
+ @dataclass(frozen=True)
652
+ class RubricScore:
653
+ """A single dimension's score from a rubric-based evaluator run.
654
+
655
+ Rubric evaluators emit one ``RubricScore`` per dimension per item.
656
+ Attached to :class:`EvalScoreResult` as a typed view of the raw
657
+ ``properties.rubric_scores`` payload returned by providers such as
658
+ Foundry's generated rubric evaluators.
659
+
660
+ Attributes:
661
+ id: Dimension id (matches the rubric definition).
662
+ score: Numeric score, or ``None`` when the dimension was marked
663
+ non-applicable for this item.
664
+ applicable: Whether the dimension applied to this item.
665
+ weight: Dimension weight (mirrors the rubric definition).
666
+ reason: Short rationale produced by the evaluator.
667
+ """
668
+
669
+ id: str
670
+ score: int | None
671
+ applicable: bool
672
+ weight: int
673
+ reason: str
674
+
499
675
 
500
676
  # endregion
501
677
 
@@ -50,6 +50,7 @@ class ExperimentalFeature(str, Enum):
50
50
  on enum membership or attribute presence over time.
51
51
  """
52
52
 
53
+ DECLARATIVE_AGENTS = "DECLARATIVE_AGENTS"
53
54
  EVALS = "EVALS"
54
55
  FILE_HISTORY = "FILE_HISTORY"
55
56
  FIDES = "FIDES"
@@ -57,6 +58,8 @@ class ExperimentalFeature(str, Enum):
57
58
  FOUNDRY_PREVIEW_TOOLS = "FOUNDRY_PREVIEW_TOOLS"
58
59
  FUNCTIONAL_WORKFLOWS = "FUNCTIONAL_WORKFLOWS"
59
60
  HARNESS = "HARNESS"
61
+ MCP_SKILLS = "MCP_SKILLS"
62
+ PROGRESSIVE_TOOLS = "PROGRESSIVE_TOOLS"
60
63
  SKILLS = "SKILLS"
61
64
  TO_PROMPT_AGENT = "TO_PROMPT_AGENT"
62
65
 
@@ -14,12 +14,13 @@ import logging
14
14
  from collections.abc import Callable, Sequence
15
15
  from typing import TYPE_CHECKING, Any
16
16
 
17
- from .._agents import Agent
17
+ from .._agents import Agent, SupportsAgentRun
18
18
  from .._clients import SupportsWebSearchTool
19
19
  from .._compaction import CompactionProvider, ContextWindowCompactionStrategy, ToolResultCompactionStrategy
20
20
  from .._feature_stage import ExperimentalFeature, experimental
21
21
  from .._sessions import ContextProvider, HistoryProvider, InMemoryHistoryProvider
22
22
  from .._skills import SkillsProvider
23
+ from ._background_agents import BackgroundAgentsProvider
23
24
  from ._memory import MemoryContextProvider, MemoryStore
24
25
  from ._mode import AgentModeProvider
25
26
  from ._todo import TodoProvider
@@ -103,6 +104,8 @@ def _assemble_context_providers(
103
104
  memory_store: MemoryStore | None,
104
105
  skills_provider: SkillsProvider | None,
105
106
  skills_paths: Sequence[str] | None,
107
+ background_agents: Sequence[SupportsAgentRun] | None,
108
+ background_agents_instructions: str | None,
106
109
  extra_context_providers: Sequence[ContextProvider] | None,
107
110
  ) -> list[ContextProvider]:
108
111
  """Assemble the ordered list of context providers."""
@@ -130,6 +133,10 @@ def _assemble_context_providers(
130
133
  if skills_paths:
131
134
  providers.append(SkillsProvider.from_paths(*skills_paths))
132
135
 
136
+ # Background agents are opt-in: only added when agents are provided.
137
+ if background_agents:
138
+ providers.append(BackgroundAgentsProvider(background_agents, instructions=background_agents_instructions))
139
+
133
140
  # Append any user-supplied additional providers.
134
141
  if extra_context_providers:
135
142
  providers.extend(extra_context_providers)
@@ -165,6 +172,8 @@ def create_harness_agent(
165
172
  memory_store: MemoryStore | None = None,
166
173
  skills_provider: SkillsProvider | None = None,
167
174
  skills_paths: Sequence[str] | None = None,
175
+ background_agents: Sequence[SupportsAgentRun] | None = None,
176
+ background_agents_instructions: str | None = None,
168
177
  disable_web_search: bool = False,
169
178
  otel_provider_name: str | None = None,
170
179
  context_providers: Sequence[ContextProvider] | None = None,
@@ -182,6 +191,7 @@ def create_harness_agent(
182
191
  - **AgentModeProvider** — plan/execute mode tracking
183
192
  - **MemoryContextProvider** — file-based durable memory (when ``memory_store`` provided)
184
193
  - **SkillsProvider** — skill discovery and progressive loading
194
+ - **BackgroundAgentsProvider** — delegate work to background sub-agents
185
195
  - **OpenTelemetry** — observability via ``AgentTelemetryLayer``
186
196
 
187
197
  Each feature can be disabled or customized via keyword arguments.
@@ -253,6 +263,13 @@ def create_harness_agent(
253
263
  skills_paths: Paths for file-based skill discovery (looks for SKILL.md files).
254
264
  Can be combined with ``skills_provider``. When neither ``skills_provider``
255
265
  nor ``skills_paths`` is provided, no SkillsProvider is added.
266
+ background_agents: Collection of agents available for background task delegation.
267
+ When provided, a ``BackgroundAgentsProvider`` is automatically included,
268
+ enabling the agent to start, monitor, and retrieve results from background tasks.
269
+ Each agent must have a non-empty, unique name (case-insensitive).
270
+ background_agents_instructions: Optional instruction override for the
271
+ ``BackgroundAgentsProvider``. May include ``{background_agents}`` placeholder
272
+ which will be replaced with the agent listing.
256
273
  disable_web_search: When True, skip automatic web search tool inclusion.
257
274
  When False (default), the web search tool is automatically added if the
258
275
  client implements SupportsWebSearchTool. A warning is logged if the client
@@ -302,6 +319,8 @@ def create_harness_agent(
302
319
  memory_store=memory_store,
303
320
  skills_provider=skills_provider,
304
321
  skills_paths=skills_paths,
322
+ background_agents=background_agents,
323
+ background_agents_instructions=background_agents_instructions,
305
324
  extra_context_providers=context_providers,
306
325
  )
307
326
 
@@ -349,6 +349,8 @@ class BackgroundAgentsProvider(ContextProvider):
349
349
  _save_provider_state(session, provider_state, source_id=source_id)
350
350
  return f"Background task {task_id} started on agent '{agent_name}'."
351
351
 
352
+ background_agents_start_task._invoke_sync_on_event_loop = True # pyright: ignore[reportPrivateUsage]
353
+
352
354
  @tool(name="background_agents_wait_for_first_completion", approval_mode="never_require")
353
355
  async def background_agents_wait_for_first_completion(task_ids: list[int]) -> str:
354
356
  """Block until the first of the specified background tasks completes. Returns the completed task's ID."""
@@ -471,6 +473,8 @@ class BackgroundAgentsProvider(ContextProvider):
471
473
  _save_provider_state(session, provider_state, source_id=source_id)
472
474
  return f"Task {task_id} continued with new input."
473
475
 
476
+ background_agents_continue_task._invoke_sync_on_event_loop = True # pyright: ignore[reportPrivateUsage]
477
+
474
478
  @tool(name="background_agents_clear_completed_task", approval_mode="never_require")
475
479
  def background_agents_clear_completed_task(task_id: int) -> str:
476
480
  """Remove a completed or failed task and release its session to free memory."""