agent-framework-core 1.6.0__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/PKG-INFO +1 -1
  2. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/__init__.py +44 -0
  3. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_clients.py +8 -1
  4. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_compaction.py +150 -7
  5. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_evaluation.py +176 -0
  6. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_feature_stage.py +127 -10
  7. agent_framework_core-1.8.0/agent_framework/_harness/_agent.py +368 -0
  8. agent_framework_core-1.8.0/agent_framework/_harness/_background_agents.py +525 -0
  9. agent_framework_core-1.8.0/agent_framework/_harness/_file_access.py +1018 -0
  10. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_mode.py +50 -21
  11. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_todo.py +94 -28
  12. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_mcp.py +72 -20
  13. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_middleware.py +114 -2
  14. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_serialization.py +45 -0
  15. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_skills.py +666 -198
  16. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_tools.py +38 -4
  17. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_types.py +86 -0
  18. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent.py +2 -1
  19. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_functional.py +2 -1
  20. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/a2a/__init__.py +2 -1
  21. agent_framework_core-1.8.0/agent_framework/a2a/__init__.pyi +5 -0
  22. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.py +2 -0
  23. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.pyi +4 -0
  24. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/observability.py +42 -4
  25. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/pyproject.toml +1 -1
  26. agent_framework_core-1.6.0/agent_framework/a2a/__init__.pyi +0 -5
  27. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/LICENSE +0 -0
  28. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/README.md +0 -0
  29. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_agents.py +0 -0
  30. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_docstrings.py +0 -0
  31. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/__init__.py +0 -0
  32. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_memory.py +0 -0
  33. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_sessions.py +0 -0
  34. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_settings.py +0 -0
  35. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_telemetry.py +0 -0
  36. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/__init__.py +0 -0
  37. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_executor.py +0 -0
  38. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_utils.py +0 -0
  39. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint.py +0 -0
  40. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint_encoding.py +0 -0
  41. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_const.py +0 -0
  42. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_conversation_history.py +0 -0
  43. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge.py +0 -0
  44. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge_runner.py +0 -0
  45. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_events.py +0 -0
  46. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_executor.py +0 -0
  47. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_function_executor.py +0 -0
  48. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_message_utils.py +0 -0
  49. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_model_utils.py +0 -0
  50. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_request_info_mixin.py +0 -0
  51. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner.py +0 -0
  52. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner_context.py +0 -0
  53. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_state.py +0 -0
  54. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_typing_utils.py +0 -0
  55. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_validation.py +0 -0
  56. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_viz.py +0 -0
  57. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow.py +0 -0
  58. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_builder.py +0 -0
  59. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_context.py +0 -0
  60. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_executor.py +0 -0
  61. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.py +0 -0
  62. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.pyi +0 -0
  63. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.py +0 -0
  64. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.pyi +0 -0
  65. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.py +0 -0
  66. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.pyi +0 -0
  67. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.py +0 -0
  68. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.pyi +0 -0
  69. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.py +0 -0
  70. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.pyi +0 -0
  71. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.py +0 -0
  72. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.pyi +0 -0
  73. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.py +0 -0
  74. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.pyi +0 -0
  75. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/exceptions.py +0 -0
  76. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.py +0 -0
  77. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.pyi +0 -0
  78. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.py +0 -0
  79. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.pyi +0 -0
  80. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/hyperlight/__init__.py +0 -0
  81. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/lab/__init__.py +0 -0
  82. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.py +0 -0
  83. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.pyi +0 -0
  84. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.py +0 -0
  85. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.pyi +0 -0
  86. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.py +0 -0
  87. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.pyi +0 -0
  88. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.py +0 -0
  89. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.pyi +0 -0
  90. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.py +0 -0
  91. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.pyi +0 -0
  92. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/py.typed +0 -0
  93. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.py +0 -0
  94. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.pyi +0 -0
  95. {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/security.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-framework-core
3
- Version: 1.6.0
3
+ Version: 1.8.0
4
4
  Summary: Microsoft Agent Framework for building AI Agents with Python. This is the core package that has all the core abstractions and implementations.
5
5
  Author-email: Microsoft <af-support@microsoft.com>
6
6
  Requires-Python: >=3.10
@@ -45,6 +45,7 @@ from ._compaction import (
45
45
  CharacterEstimatorTokenizer,
46
46
  CompactionProvider,
47
47
  CompactionStrategy,
48
+ ContextWindowCompactionStrategy,
48
49
  SelectiveToolCallCompactionStrategy,
49
50
  SlidingWindowStrategy,
50
51
  SummarizationStrategy,
@@ -70,6 +71,7 @@ from ._evaluation import (
70
71
  Evaluator,
71
72
  ExpectedToolCall,
72
73
  LocalEvaluator,
74
+ RubricScore,
73
75
  evaluate_agent,
74
76
  evaluate_workflow,
75
77
  evaluator,
@@ -79,6 +81,26 @@ from ._evaluation import (
79
81
  tool_calls_present,
80
82
  )
81
83
  from ._feature_stage import ExperimentalFeature, ReleaseCandidateFeature
84
+ from ._harness._agent import (
85
+ DEFAULT_HARNESS_INSTRUCTIONS,
86
+ create_harness_agent,
87
+ )
88
+ from ._harness._background_agents import (
89
+ DEFAULT_BACKGROUND_AGENTS_SOURCE_ID,
90
+ BackgroundAgentsProvider,
91
+ BackgroundTaskInfo,
92
+ BackgroundTaskStatus,
93
+ )
94
+ from ._harness._file_access import (
95
+ DEFAULT_FILE_ACCESS_INSTRUCTIONS,
96
+ DEFAULT_FILE_ACCESS_SOURCE_ID,
97
+ AgentFileStore,
98
+ FileAccessProvider,
99
+ FileSearchMatch,
100
+ FileSearchResult,
101
+ FileSystemAgentFileStore,
102
+ InMemoryAgentFileStore,
103
+ )
82
104
  from ._harness._memory import (
83
105
  DEFAULT_MEMORY_SOURCE_ID,
84
106
  MemoryContextProvider,
@@ -146,6 +168,9 @@ from ._skills import (
146
168
  InlineSkillResource,
147
169
  InlineSkillScript,
148
170
  InMemorySkillsSource,
171
+ MCPSkill,
172
+ MCPSkillResource,
173
+ MCPSkillsSource,
149
174
  Skill,
150
175
  SkillFrontmatter,
151
176
  SkillResource,
@@ -297,6 +322,10 @@ __all__ = [
297
322
  "AGENT_FRAMEWORK_USER_AGENT",
298
323
  "APP_INFO",
299
324
  "COMPACTION_STATE_KEY",
325
+ "DEFAULT_BACKGROUND_AGENTS_SOURCE_ID",
326
+ "DEFAULT_FILE_ACCESS_INSTRUCTIONS",
327
+ "DEFAULT_FILE_ACCESS_SOURCE_ID",
328
+ "DEFAULT_HARNESS_INSTRUCTIONS",
300
329
  "DEFAULT_MAX_ITERATIONS",
301
330
  "DEFAULT_MEMORY_SOURCE_ID",
302
331
  "DEFAULT_MODE_SOURCE_ID",
@@ -321,6 +350,7 @@ __all__ = [
321
350
  "AgentExecutor",
322
351
  "AgentExecutorRequest",
323
352
  "AgentExecutorResponse",
353
+ "AgentFileStore",
324
354
  "AgentFrameworkException",
325
355
  "AgentMiddleware",
326
356
  "AgentMiddlewareLayer",
@@ -332,6 +362,9 @@ __all__ = [
332
362
  "AgentSession",
333
363
  "AggregatingSkillsSource",
334
364
  "Annotation",
365
+ "BackgroundAgentsProvider",
366
+ "BackgroundTaskInfo",
367
+ "BackgroundTaskStatus",
335
368
  "BaseAgent",
336
369
  "BaseChatClient",
337
370
  "BaseEmbeddingClient",
@@ -352,6 +385,7 @@ __all__ = [
352
385
  "CompactionStrategy",
353
386
  "Content",
354
387
  "ContextProvider",
388
+ "ContextWindowCompactionStrategy",
355
389
  "ContinuationToken",
356
390
  "ConversationSplit",
357
391
  "ConversationSplitter",
@@ -376,11 +410,15 @@ __all__ = [
376
410
  "ExperimentalFeature",
377
411
  "FanInEdgeGroup",
378
412
  "FanOutEdgeGroup",
413
+ "FileAccessProvider",
379
414
  "FileCheckpointStorage",
380
415
  "FileHistoryProvider",
416
+ "FileSearchMatch",
417
+ "FileSearchResult",
381
418
  "FileSkill",
382
419
  "FileSkillScript",
383
420
  "FileSkillsSource",
421
+ "FileSystemAgentFileStore",
384
422
  "FilteringSkillsSource",
385
423
  "FinalT",
386
424
  "FinishReason",
@@ -397,6 +435,7 @@ __all__ = [
397
435
  "GeneratedEmbeddings",
398
436
  "GraphConnectivityError",
399
437
  "HistoryProvider",
438
+ "InMemoryAgentFileStore",
400
439
  "InMemoryCheckpointStorage",
401
440
  "InMemoryHistoryProvider",
402
441
  "InMemorySkillsSource",
@@ -408,6 +447,9 @@ __all__ = [
408
447
  "MCPStdioTool",
409
448
  "MCPStreamableHTTPTool",
410
449
  "MCPWebsocketTool",
450
+ "MCPSkill",
451
+ "MCPSkillResource",
452
+ "MCPSkillsSource",
411
453
  "MemoryContextProvider",
412
454
  "MemoryFileStore",
413
455
  "MemoryIndexEntry",
@@ -425,6 +467,7 @@ __all__ = [
425
467
  "ResponseStream",
426
468
  "Role",
427
469
  "RoleLiteral",
470
+ "RubricScore",
428
471
  "RunContext",
429
472
  "Runner",
430
473
  "RunnerContext",
@@ -499,6 +542,7 @@ __all__ = [
499
542
  "apply_compaction",
500
543
  "chat_middleware",
501
544
  "create_edge_runner",
545
+ "create_harness_agent",
502
546
  "detect_media_type_from_base64",
503
547
  "evaluate_agent",
504
548
  "evaluate_workflow",
@@ -380,8 +380,15 @@ class BaseChatClient(SerializationMixin, ABC, Generic[OptionsCoT]):
380
380
  return prepared_messages
381
381
  from ._compaction import apply_compaction
382
382
 
383
+ # Compact the caller's list in place when possible. A compaction operation has
384
+ # two halves: exclusion flags (mutated on shared Message objects) and inserted
385
+ # summary messages. Operating on the original list keeps both halves on the list
386
+ # the function-invocation tool loop reuses across iterations; otherwise inserted
387
+ # summaries would be lost on a throwaway copy while exclusions persisted, silently
388
+ # dropping older groups (issue #4991).
389
+ working_messages = messages if isinstance(messages, list) else prepared_messages
383
390
  return await apply_compaction(
384
- prepared_messages,
391
+ working_messages,
385
392
  strategy=compaction_strategy,
386
393
  tokenizer=tokenizer,
387
394
  )
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import json
6
6
  import logging
7
- from collections.abc import Mapping, Sequence
7
+ from collections.abc import Iterable, Mapping, Sequence
8
8
  from typing import (
9
9
  TYPE_CHECKING,
10
10
  Any,
@@ -92,10 +92,23 @@ def _is_reasoning_only_assistant(message: Message) -> bool:
92
92
  return all(content.type == "text_reasoning" for content in message.contents)
93
93
 
94
94
 
95
- def _ensure_message_ids(messages: list[Message]) -> None:
95
+ def _ensure_message_ids(
96
+ messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
97
+ ) -> None:
98
+ existing_ids: set[str] = set(reserved_ids) if reserved_ids is not None else set()
99
+ existing_ids.update(message.message_id for message in messages if message.message_id)
96
100
  for index, message in enumerate(messages):
97
- if not message.message_id:
98
- message.message_id = f"msg_{index}"
101
+ if message.message_id:
102
+ continue
103
+ candidate = f"msg_{id_offset + index}"
104
+ if candidate in existing_ids:
105
+ counter = id_offset + len(messages)
106
+ candidate = f"msg_{counter}"
107
+ while candidate in existing_ids:
108
+ counter += 1
109
+ candidate = f"msg_{counter}"
110
+ message.message_id = candidate
111
+ existing_ids.add(candidate)
99
112
 
100
113
 
101
114
  def _group_id_for(message: Message, group_index: int) -> str:
@@ -104,14 +117,27 @@ def _group_id_for(message: Message, group_index: int) -> str:
104
117
  return f"group_index_{group_index}"
105
118
 
106
119
 
107
- def group_messages(messages: list[Message]) -> list[dict[str, Any]]:
120
+ def group_messages(
121
+ messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
122
+ ) -> list[dict[str, Any]]:
108
123
  """Compute group spans and metadata for annotation.
109
124
 
125
+ Args:
126
+ messages: The messages (or a slice of them) to group.
127
+
128
+ Keyword Args:
129
+ id_offset: Absolute starting index used when auto-assigning ``message_id``
130
+ values, so incremental annotation of a list slice produces ids that
131
+ stay unique across the full list.
132
+ reserved_ids: Message ids that already exist outside ``messages`` (for
133
+ example in a preserved prefix). Auto-assigned ids are guaranteed not
134
+ to collide with these, preventing duplicate ids across the full list.
135
+
110
136
  Returns:
111
137
  Ordered list of lightweight span dicts with keys:
112
138
  ``group_id``, ``kind``, ``start_index``, ``end_index``, ``has_reasoning``.
113
139
  """
114
- _ensure_message_ids(messages)
140
+ _ensure_message_ids(messages, id_offset=id_offset, reserved_ids=reserved_ids)
115
141
  spans: list[dict[str, Any]] = []
116
142
  i = 0
117
143
  group_index = 0
@@ -439,7 +465,8 @@ def annotate_message_groups(
439
465
  if previous_group_index is not None:
440
466
  group_index_offset = previous_group_index + 1
441
467
 
442
- spans = group_messages(messages[start_index:])
468
+ reserved_ids = {message.message_id for message in messages[:start_index] if message.message_id}
469
+ spans = group_messages(messages[start_index:], id_offset=start_index, reserved_ids=reserved_ids)
443
470
  for span_index, span in enumerate(spans):
444
471
  group_id = str(span["group_id"])
445
472
  kind = _coerce_group_kind(span["kind"])
@@ -1277,6 +1304,121 @@ class CompactionProvider(ContextProvider):
1277
1304
  # whether excluded messages are loaded on the next turn.
1278
1305
 
1279
1306
 
1307
+ class ContextWindowCompactionStrategy:
1308
+ """Token-budget compaction derived from a model's context window size.
1309
+
1310
+ Computes an input budget from the model's context window and output token
1311
+ limits, then applies a two-phase compaction pipeline:
1312
+
1313
+ 1. **Tool result eviction** — collapses older tool-call groups into summaries
1314
+ when included tokens exceed ``tool_eviction_threshold`` of the input budget.
1315
+ 2. **Truncation** — removes oldest non-system groups when included tokens
1316
+ exceed ``truncation_threshold`` of the input budget.
1317
+
1318
+ The class uses two independent :class:`TokenBudgetComposedStrategy`
1319
+ instances — one per phase — so each fires only when its own threshold
1320
+ is exceeded.
1321
+
1322
+ Examples:
1323
+ .. code-block:: python
1324
+
1325
+ from agent_framework import ContextWindowCompactionStrategy, CompactionProvider
1326
+
1327
+ strategy = ContextWindowCompactionStrategy(
1328
+ max_context_window_tokens=128_000,
1329
+ max_output_tokens=16_384,
1330
+ )
1331
+ provider = CompactionProvider(before_strategy=strategy)
1332
+ """
1333
+
1334
+ DEFAULT_TOOL_EVICTION_THRESHOLD: float = 0.5
1335
+ """Default fraction of input budget at which tool result eviction triggers."""
1336
+
1337
+ DEFAULT_TRUNCATION_THRESHOLD: float = 0.8
1338
+ """Default fraction of input budget at which truncation triggers."""
1339
+
1340
+ def __init__(
1341
+ self,
1342
+ *,
1343
+ max_context_window_tokens: int,
1344
+ max_output_tokens: int,
1345
+ tokenizer: TokenizerProtocol | None = None,
1346
+ tool_eviction_threshold: float = DEFAULT_TOOL_EVICTION_THRESHOLD,
1347
+ truncation_threshold: float = DEFAULT_TRUNCATION_THRESHOLD,
1348
+ keep_last_tool_call_groups: int = 4,
1349
+ ) -> None:
1350
+ """Create a context-window compaction strategy.
1351
+
1352
+ Keyword Args:
1353
+ max_context_window_tokens: The model's maximum context window size
1354
+ in tokens (e.g. 128,000).
1355
+ max_output_tokens: The model's maximum output tokens per response
1356
+ (e.g. 16,384).
1357
+ tokenizer: Token counter for measuring message sizes. Defaults to
1358
+ :class:`CharacterEstimatorTokenizer` (4 chars/token heuristic).
1359
+ tool_eviction_threshold: Fraction of input budget (0.0, 1.0] at
1360
+ which tool result eviction triggers. Defaults to 0.5.
1361
+ truncation_threshold: Fraction of input budget (0.0, 1.0] at which
1362
+ truncation triggers. Must be ≥ ``tool_eviction_threshold``.
1363
+ Defaults to 0.8.
1364
+ keep_last_tool_call_groups: Number of most recent tool-call groups
1365
+ to retain verbatim during tool eviction. Older groups are
1366
+ collapsed into summaries. Defaults to 4.
1367
+
1368
+ Raises:
1369
+ ValueError: If thresholds are out of range or inconsistent.
1370
+ """
1371
+ if max_context_window_tokens <= 0:
1372
+ raise ValueError("max_context_window_tokens must be positive.")
1373
+ if max_output_tokens < 0 or max_output_tokens >= max_context_window_tokens:
1374
+ raise ValueError("max_output_tokens must be >= 0 and < max_context_window_tokens.")
1375
+ if not (0.0 < tool_eviction_threshold <= 1.0):
1376
+ raise ValueError("tool_eviction_threshold must be in (0.0, 1.0].")
1377
+ if not (0.0 < truncation_threshold <= 1.0):
1378
+ raise ValueError("truncation_threshold must be in (0.0, 1.0].")
1379
+ if truncation_threshold < tool_eviction_threshold:
1380
+ raise ValueError("truncation_threshold must be >= tool_eviction_threshold.")
1381
+
1382
+ resolved_tokenizer = tokenizer or CharacterEstimatorTokenizer()
1383
+ input_budget = max_context_window_tokens - max_output_tokens
1384
+ tool_eviction_tokens = int(input_budget * tool_eviction_threshold)
1385
+ truncation_tokens = int(input_budget * truncation_threshold)
1386
+
1387
+ self.max_context_window_tokens = max_context_window_tokens
1388
+ self.max_output_tokens = max_output_tokens
1389
+ self.input_budget_tokens = input_budget
1390
+ self.tool_eviction_threshold = tool_eviction_threshold
1391
+ self.truncation_threshold = truncation_threshold
1392
+
1393
+ self._tool_eviction = TokenBudgetComposedStrategy(
1394
+ token_budget=tool_eviction_tokens,
1395
+ tokenizer=resolved_tokenizer,
1396
+ strategies=[
1397
+ ToolResultCompactionStrategy(keep_last_tool_call_groups=keep_last_tool_call_groups),
1398
+ ],
1399
+ )
1400
+ self._truncation = TokenBudgetComposedStrategy(
1401
+ token_budget=truncation_tokens,
1402
+ tokenizer=resolved_tokenizer,
1403
+ strategies=[
1404
+ TruncationStrategy(
1405
+ max_n=truncation_tokens,
1406
+ compact_to=tool_eviction_tokens,
1407
+ tokenizer=resolved_tokenizer,
1408
+ ),
1409
+ ],
1410
+ )
1411
+
1412
+ async def __call__(self, messages: list[Message]) -> bool:
1413
+ """Apply the two-phase compaction pipeline.
1414
+
1415
+ Returns:
1416
+ True if compaction changed message inclusion; otherwise False.
1417
+ """
1418
+ changed = await self._tool_eviction(messages)
1419
+ return (await self._truncation(messages)) or changed
1420
+
1421
+
1280
1422
  __all__ = [
1281
1423
  "COMPACTION_STATE_KEY",
1282
1424
  "EXCLUDED_KEY",
@@ -1293,6 +1435,7 @@ __all__ = [
1293
1435
  "CharacterEstimatorTokenizer",
1294
1436
  "CompactionProvider",
1295
1437
  "CompactionStrategy",
1438
+ "ContextWindowCompactionStrategy",
1296
1439
  "GroupKind",
1297
1440
  "SelectiveToolCallCompactionStrategy",
1298
1441
  "SlidingWindowStrategy",
@@ -311,12 +311,15 @@ class EvalScoreResult:
311
311
  score: Numeric score from the evaluator.
312
312
  passed: Whether the item passed this evaluator's threshold.
313
313
  sample: Optional raw evaluator output (rationale, metadata).
314
+ dimensions: Per-dimension scores when this evaluator is a rubric
315
+ evaluator. ``None`` for non-rubric (e.g. built-in) evaluators.
314
316
  """
315
317
 
316
318
  name: str
317
319
  score: float
318
320
  passed: bool | None = None
319
321
  sample: dict[str, Any] | None = None
322
+ dimensions: list[RubricScore] | None = None
320
323
 
321
324
 
322
325
  @experimental(feature_id=ExperimentalFeature.EVALS)
@@ -496,6 +499,179 @@ class EvalResults:
496
499
  detail += f" Errored items: {', '.join(summaries)}."
497
500
  raise EvalNotPassedError(detail)
498
501
 
502
+ def assert_score_at_least(
503
+ self,
504
+ min_score: float,
505
+ *,
506
+ evaluator: str | None = None,
507
+ msg: str | None = None,
508
+ ) -> None:
509
+ """Assert every item's score (optionally filtered by evaluator) is ``>= min_score``.
510
+
511
+ Designed for CI gates on generated rubric evaluators (e.g.
512
+ ``results.assert_score_at_least(0.80)``). Includes any
513
+ sub-results from workflow evaluations.
514
+
515
+ Args:
516
+ min_score: Minimum acceptable score (inclusive).
517
+ evaluator: When set, only check scores from the evaluator
518
+ whose ``EvalScoreResult.name`` matches.
519
+ msg: Optional custom failure message.
520
+
521
+ Raises:
522
+ EvalNotPassedError: When any matching score is below the threshold.
523
+ """
524
+ offenders: list[str] = []
525
+
526
+ def _check(results: EvalResults) -> None:
527
+ for item in results.items:
528
+ for score in item.scores:
529
+ if evaluator is not None and score.name != evaluator:
530
+ continue
531
+ if score.score < min_score:
532
+ offenders.append(f"{item.item_id}/{score.name}={score.score:.3f}")
533
+ for sub in results.sub_results.values():
534
+ _check(sub)
535
+
536
+ _check(self)
537
+ if offenders:
538
+ detail = msg or (
539
+ f"{len(offenders)} score(s) below threshold {min_score}"
540
+ f"{' for ' + evaluator if evaluator else ''}: {', '.join(offenders[:5])}"
541
+ + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
542
+ )
543
+ raise EvalNotPassedError(detail)
544
+
545
+ def assert_dimension_score_at_least(
546
+ self,
547
+ dimension_id: str,
548
+ min_score: float,
549
+ *,
550
+ evaluator: str | None = None,
551
+ require_applicable: bool = False,
552
+ msg: str | None = None,
553
+ ) -> None:
554
+ """Assert every item's score for a rubric *dimension* is ``>= min_score``.
555
+
556
+ Walks ``EvalScoreResult.dimensions`` looking for the named
557
+ dimension across all items (and sub-results). Non-applicable
558
+ dimensions are skipped by default; pass
559
+ ``require_applicable=True`` to fail when no applicable score is
560
+ produced.
561
+
562
+ Args:
563
+ dimension_id: Dimension id (matches the rubric definition).
564
+ min_score: Minimum acceptable dimension score (inclusive).
565
+ evaluator: When set, only consider scores from the evaluator
566
+ whose ``EvalScoreResult.name`` matches.
567
+ require_applicable: When ``True``, missing or non-applicable
568
+ dimension scores raise. Defaults to ``False`` (skip).
569
+ msg: Optional custom failure message.
570
+
571
+ Raises:
572
+ EvalNotPassedError: When the dimension fails the threshold.
573
+ """
574
+ offenders: list[str] = []
575
+ missing_items: list[str] = []
576
+
577
+ def _check(results: EvalResults) -> None:
578
+ for item in results.items:
579
+ found_applicable = False
580
+ for score in item.scores:
581
+ if evaluator is not None and score.name != evaluator:
582
+ continue
583
+ if not score.dimensions:
584
+ continue
585
+ for rs in score.dimensions:
586
+ if rs.id != dimension_id:
587
+ continue
588
+ if not rs.applicable:
589
+ continue
590
+ found_applicable = True
591
+ if rs.score is None or rs.score < min_score:
592
+ offenders.append(
593
+ f"{item.item_id}/{score.name}/{dimension_id}="
594
+ f"{rs.score if rs.score is not None else 'None'}"
595
+ )
596
+ if require_applicable and not found_applicable:
597
+ missing_items.append(item.item_id)
598
+ for sub in results.sub_results.values():
599
+ _check(sub)
600
+
601
+ _check(self)
602
+ problems: list[str] = []
603
+ if offenders:
604
+ problems.append(
605
+ f"{len(offenders)} dimension score(s) for '{dimension_id}' below {min_score}: "
606
+ f"{', '.join(offenders[:5])}" + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
607
+ )
608
+ if missing_items:
609
+ problems.append(
610
+ f"Dimension '{dimension_id}' not applicable on {len(missing_items)} item(s): "
611
+ f"{', '.join(missing_items[:5])}"
612
+ )
613
+ if problems:
614
+ raise EvalNotPassedError(msg or "; ".join(problems))
615
+
616
+ def assert_no_failed_items(self, msg: str | None = None) -> None:
617
+ """Assert no item ended in ``fail`` or ``error`` status.
618
+
619
+ Includes any sub-results from workflow evaluations.
620
+
621
+ Args:
622
+ msg: Optional custom failure message.
623
+
624
+ Raises:
625
+ EvalNotPassedError: When any item failed or errored.
626
+ """
627
+ bad: list[str] = []
628
+
629
+ def _check(results: EvalResults) -> None:
630
+ for item in results.items:
631
+ if item.is_failed or item.is_error:
632
+ bad.append(f"{item.item_id}:{item.status}")
633
+ for sub in results.sub_results.values():
634
+ _check(sub)
635
+
636
+ _check(self)
637
+ if bad:
638
+ detail = msg or (
639
+ f"{len(bad)} item(s) failed or errored: {', '.join(bad[:5])}"
640
+ + (f" (+{len(bad) - 5} more)" if len(bad) > 5 else "")
641
+ )
642
+ raise EvalNotPassedError(detail)
643
+
644
+
645
+ # endregion
646
+
647
+ # region Generated rubric evaluators
648
+
649
+
650
+ @experimental(feature_id=ExperimentalFeature.EVALS)
651
+ @dataclass(frozen=True)
652
+ class RubricScore:
653
+ """A single dimension's score from a rubric-based evaluator run.
654
+
655
+ Rubric evaluators emit one ``RubricScore`` per dimension per item.
656
+ Attached to :class:`EvalScoreResult` as a typed view of the raw
657
+ ``properties.rubric_scores`` payload returned by providers such as
658
+ Foundry's generated rubric evaluators.
659
+
660
+ Attributes:
661
+ id: Dimension id (matches the rubric definition).
662
+ score: Numeric score, or ``None`` when the dimension was marked
663
+ non-applicable for this item.
664
+ applicable: Whether the dimension applied to this item.
665
+ weight: Dimension weight (mirrors the rubric definition).
666
+ reason: Short rationale produced by the evaluator.
667
+ """
668
+
669
+ id: str
670
+ score: int | None
671
+ applicable: bool
672
+ weight: int
673
+ reason: str
674
+
499
675
 
500
676
  # endregion
501
677