agent-framework-core 1.6.0__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/PKG-INFO +1 -1
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/__init__.py +44 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_clients.py +8 -1
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_compaction.py +150 -7
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_evaluation.py +176 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_feature_stage.py +127 -10
- agent_framework_core-1.8.0/agent_framework/_harness/_agent.py +368 -0
- agent_framework_core-1.8.0/agent_framework/_harness/_background_agents.py +525 -0
- agent_framework_core-1.8.0/agent_framework/_harness/_file_access.py +1018 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_mode.py +50 -21
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_todo.py +94 -28
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_mcp.py +72 -20
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_middleware.py +114 -2
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_serialization.py +45 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_skills.py +666 -198
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_tools.py +38 -4
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_types.py +86 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent.py +2 -1
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_functional.py +2 -1
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/a2a/__init__.py +2 -1
- agent_framework_core-1.8.0/agent_framework/a2a/__init__.pyi +5 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.py +2 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/foundry/__init__.pyi +4 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/observability.py +42 -4
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/pyproject.toml +1 -1
- agent_framework_core-1.6.0/agent_framework/a2a/__init__.pyi +0 -5
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/LICENSE +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/README.md +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_agents.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_docstrings.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_harness/_memory.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_sessions.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_settings.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_telemetry.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_executor.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_agent_utils.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_checkpoint_encoding.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_const.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_conversation_history.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_edge_runner.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_events.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_executor.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_function_executor.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_message_utils.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_model_utils.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_request_info_mixin.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_runner_context.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_state.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_typing_utils.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_validation.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_viz.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_builder.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_context.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/_workflows/_workflow_executor.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ag_ui/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/amazon/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/anthropic/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/azure/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/chatkit/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/declarative/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/devui/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/exceptions.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/github/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/google/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/hyperlight/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/lab/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/mem0/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/microsoft/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/ollama/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/openai/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/orchestrations/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/py.typed +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.py +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/redis/__init__.pyi +0 -0
- {agent_framework_core-1.6.0 → agent_framework_core-1.8.0}/agent_framework/security.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-framework-core
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: Microsoft Agent Framework for building AI Agents with Python. This is the core package that has all the core abstractions and implementations.
|
|
5
5
|
Author-email: Microsoft <af-support@microsoft.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -45,6 +45,7 @@ from ._compaction import (
|
|
|
45
45
|
CharacterEstimatorTokenizer,
|
|
46
46
|
CompactionProvider,
|
|
47
47
|
CompactionStrategy,
|
|
48
|
+
ContextWindowCompactionStrategy,
|
|
48
49
|
SelectiveToolCallCompactionStrategy,
|
|
49
50
|
SlidingWindowStrategy,
|
|
50
51
|
SummarizationStrategy,
|
|
@@ -70,6 +71,7 @@ from ._evaluation import (
|
|
|
70
71
|
Evaluator,
|
|
71
72
|
ExpectedToolCall,
|
|
72
73
|
LocalEvaluator,
|
|
74
|
+
RubricScore,
|
|
73
75
|
evaluate_agent,
|
|
74
76
|
evaluate_workflow,
|
|
75
77
|
evaluator,
|
|
@@ -79,6 +81,26 @@ from ._evaluation import (
|
|
|
79
81
|
tool_calls_present,
|
|
80
82
|
)
|
|
81
83
|
from ._feature_stage import ExperimentalFeature, ReleaseCandidateFeature
|
|
84
|
+
from ._harness._agent import (
|
|
85
|
+
DEFAULT_HARNESS_INSTRUCTIONS,
|
|
86
|
+
create_harness_agent,
|
|
87
|
+
)
|
|
88
|
+
from ._harness._background_agents import (
|
|
89
|
+
DEFAULT_BACKGROUND_AGENTS_SOURCE_ID,
|
|
90
|
+
BackgroundAgentsProvider,
|
|
91
|
+
BackgroundTaskInfo,
|
|
92
|
+
BackgroundTaskStatus,
|
|
93
|
+
)
|
|
94
|
+
from ._harness._file_access import (
|
|
95
|
+
DEFAULT_FILE_ACCESS_INSTRUCTIONS,
|
|
96
|
+
DEFAULT_FILE_ACCESS_SOURCE_ID,
|
|
97
|
+
AgentFileStore,
|
|
98
|
+
FileAccessProvider,
|
|
99
|
+
FileSearchMatch,
|
|
100
|
+
FileSearchResult,
|
|
101
|
+
FileSystemAgentFileStore,
|
|
102
|
+
InMemoryAgentFileStore,
|
|
103
|
+
)
|
|
82
104
|
from ._harness._memory import (
|
|
83
105
|
DEFAULT_MEMORY_SOURCE_ID,
|
|
84
106
|
MemoryContextProvider,
|
|
@@ -146,6 +168,9 @@ from ._skills import (
|
|
|
146
168
|
InlineSkillResource,
|
|
147
169
|
InlineSkillScript,
|
|
148
170
|
InMemorySkillsSource,
|
|
171
|
+
MCPSkill,
|
|
172
|
+
MCPSkillResource,
|
|
173
|
+
MCPSkillsSource,
|
|
149
174
|
Skill,
|
|
150
175
|
SkillFrontmatter,
|
|
151
176
|
SkillResource,
|
|
@@ -297,6 +322,10 @@ __all__ = [
|
|
|
297
322
|
"AGENT_FRAMEWORK_USER_AGENT",
|
|
298
323
|
"APP_INFO",
|
|
299
324
|
"COMPACTION_STATE_KEY",
|
|
325
|
+
"DEFAULT_BACKGROUND_AGENTS_SOURCE_ID",
|
|
326
|
+
"DEFAULT_FILE_ACCESS_INSTRUCTIONS",
|
|
327
|
+
"DEFAULT_FILE_ACCESS_SOURCE_ID",
|
|
328
|
+
"DEFAULT_HARNESS_INSTRUCTIONS",
|
|
300
329
|
"DEFAULT_MAX_ITERATIONS",
|
|
301
330
|
"DEFAULT_MEMORY_SOURCE_ID",
|
|
302
331
|
"DEFAULT_MODE_SOURCE_ID",
|
|
@@ -321,6 +350,7 @@ __all__ = [
|
|
|
321
350
|
"AgentExecutor",
|
|
322
351
|
"AgentExecutorRequest",
|
|
323
352
|
"AgentExecutorResponse",
|
|
353
|
+
"AgentFileStore",
|
|
324
354
|
"AgentFrameworkException",
|
|
325
355
|
"AgentMiddleware",
|
|
326
356
|
"AgentMiddlewareLayer",
|
|
@@ -332,6 +362,9 @@ __all__ = [
|
|
|
332
362
|
"AgentSession",
|
|
333
363
|
"AggregatingSkillsSource",
|
|
334
364
|
"Annotation",
|
|
365
|
+
"BackgroundAgentsProvider",
|
|
366
|
+
"BackgroundTaskInfo",
|
|
367
|
+
"BackgroundTaskStatus",
|
|
335
368
|
"BaseAgent",
|
|
336
369
|
"BaseChatClient",
|
|
337
370
|
"BaseEmbeddingClient",
|
|
@@ -352,6 +385,7 @@ __all__ = [
|
|
|
352
385
|
"CompactionStrategy",
|
|
353
386
|
"Content",
|
|
354
387
|
"ContextProvider",
|
|
388
|
+
"ContextWindowCompactionStrategy",
|
|
355
389
|
"ContinuationToken",
|
|
356
390
|
"ConversationSplit",
|
|
357
391
|
"ConversationSplitter",
|
|
@@ -376,11 +410,15 @@ __all__ = [
|
|
|
376
410
|
"ExperimentalFeature",
|
|
377
411
|
"FanInEdgeGroup",
|
|
378
412
|
"FanOutEdgeGroup",
|
|
413
|
+
"FileAccessProvider",
|
|
379
414
|
"FileCheckpointStorage",
|
|
380
415
|
"FileHistoryProvider",
|
|
416
|
+
"FileSearchMatch",
|
|
417
|
+
"FileSearchResult",
|
|
381
418
|
"FileSkill",
|
|
382
419
|
"FileSkillScript",
|
|
383
420
|
"FileSkillsSource",
|
|
421
|
+
"FileSystemAgentFileStore",
|
|
384
422
|
"FilteringSkillsSource",
|
|
385
423
|
"FinalT",
|
|
386
424
|
"FinishReason",
|
|
@@ -397,6 +435,7 @@ __all__ = [
|
|
|
397
435
|
"GeneratedEmbeddings",
|
|
398
436
|
"GraphConnectivityError",
|
|
399
437
|
"HistoryProvider",
|
|
438
|
+
"InMemoryAgentFileStore",
|
|
400
439
|
"InMemoryCheckpointStorage",
|
|
401
440
|
"InMemoryHistoryProvider",
|
|
402
441
|
"InMemorySkillsSource",
|
|
@@ -408,6 +447,9 @@ __all__ = [
|
|
|
408
447
|
"MCPStdioTool",
|
|
409
448
|
"MCPStreamableHTTPTool",
|
|
410
449
|
"MCPWebsocketTool",
|
|
450
|
+
"MCPSkill",
|
|
451
|
+
"MCPSkillResource",
|
|
452
|
+
"MCPSkillsSource",
|
|
411
453
|
"MemoryContextProvider",
|
|
412
454
|
"MemoryFileStore",
|
|
413
455
|
"MemoryIndexEntry",
|
|
@@ -425,6 +467,7 @@ __all__ = [
|
|
|
425
467
|
"ResponseStream",
|
|
426
468
|
"Role",
|
|
427
469
|
"RoleLiteral",
|
|
470
|
+
"RubricScore",
|
|
428
471
|
"RunContext",
|
|
429
472
|
"Runner",
|
|
430
473
|
"RunnerContext",
|
|
@@ -499,6 +542,7 @@ __all__ = [
|
|
|
499
542
|
"apply_compaction",
|
|
500
543
|
"chat_middleware",
|
|
501
544
|
"create_edge_runner",
|
|
545
|
+
"create_harness_agent",
|
|
502
546
|
"detect_media_type_from_base64",
|
|
503
547
|
"evaluate_agent",
|
|
504
548
|
"evaluate_workflow",
|
|
@@ -380,8 +380,15 @@ class BaseChatClient(SerializationMixin, ABC, Generic[OptionsCoT]):
|
|
|
380
380
|
return prepared_messages
|
|
381
381
|
from ._compaction import apply_compaction
|
|
382
382
|
|
|
383
|
+
# Compact the caller's list in place when possible. A compaction operation has
|
|
384
|
+
# two halves: exclusion flags (mutated on shared Message objects) and inserted
|
|
385
|
+
# summary messages. Operating on the original list keeps both halves on the list
|
|
386
|
+
# the function-invocation tool loop reuses across iterations; otherwise inserted
|
|
387
|
+
# summaries would be lost on a throwaway copy while exclusions persisted, silently
|
|
388
|
+
# dropping older groups (issue #4991).
|
|
389
|
+
working_messages = messages if isinstance(messages, list) else prepared_messages
|
|
383
390
|
return await apply_compaction(
|
|
384
|
-
|
|
391
|
+
working_messages,
|
|
385
392
|
strategy=compaction_strategy,
|
|
386
393
|
tokenizer=tokenizer,
|
|
387
394
|
)
|
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
|
-
from collections.abc import Mapping, Sequence
|
|
7
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
8
8
|
from typing import (
|
|
9
9
|
TYPE_CHECKING,
|
|
10
10
|
Any,
|
|
@@ -92,10 +92,23 @@ def _is_reasoning_only_assistant(message: Message) -> bool:
|
|
|
92
92
|
return all(content.type == "text_reasoning" for content in message.contents)
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def _ensure_message_ids(
|
|
95
|
+
def _ensure_message_ids(
|
|
96
|
+
messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
|
|
97
|
+
) -> None:
|
|
98
|
+
existing_ids: set[str] = set(reserved_ids) if reserved_ids is not None else set()
|
|
99
|
+
existing_ids.update(message.message_id for message in messages if message.message_id)
|
|
96
100
|
for index, message in enumerate(messages):
|
|
97
|
-
if
|
|
98
|
-
|
|
101
|
+
if message.message_id:
|
|
102
|
+
continue
|
|
103
|
+
candidate = f"msg_{id_offset + index}"
|
|
104
|
+
if candidate in existing_ids:
|
|
105
|
+
counter = id_offset + len(messages)
|
|
106
|
+
candidate = f"msg_{counter}"
|
|
107
|
+
while candidate in existing_ids:
|
|
108
|
+
counter += 1
|
|
109
|
+
candidate = f"msg_{counter}"
|
|
110
|
+
message.message_id = candidate
|
|
111
|
+
existing_ids.add(candidate)
|
|
99
112
|
|
|
100
113
|
|
|
101
114
|
def _group_id_for(message: Message, group_index: int) -> str:
|
|
@@ -104,14 +117,27 @@ def _group_id_for(message: Message, group_index: int) -> str:
|
|
|
104
117
|
return f"group_index_{group_index}"
|
|
105
118
|
|
|
106
119
|
|
|
107
|
-
def group_messages(
|
|
120
|
+
def group_messages(
|
|
121
|
+
messages: list[Message], *, id_offset: int = 0, reserved_ids: Iterable[str] | None = None
|
|
122
|
+
) -> list[dict[str, Any]]:
|
|
108
123
|
"""Compute group spans and metadata for annotation.
|
|
109
124
|
|
|
125
|
+
Args:
|
|
126
|
+
messages: The messages (or a slice of them) to group.
|
|
127
|
+
|
|
128
|
+
Keyword Args:
|
|
129
|
+
id_offset: Absolute starting index used when auto-assigning ``message_id``
|
|
130
|
+
values, so incremental annotation of a list slice produces ids that
|
|
131
|
+
stay unique across the full list.
|
|
132
|
+
reserved_ids: Message ids that already exist outside ``messages`` (for
|
|
133
|
+
example in a preserved prefix). Auto-assigned ids are guaranteed not
|
|
134
|
+
to collide with these, preventing duplicate ids across the full list.
|
|
135
|
+
|
|
110
136
|
Returns:
|
|
111
137
|
Ordered list of lightweight span dicts with keys:
|
|
112
138
|
``group_id``, ``kind``, ``start_index``, ``end_index``, ``has_reasoning``.
|
|
113
139
|
"""
|
|
114
|
-
_ensure_message_ids(messages)
|
|
140
|
+
_ensure_message_ids(messages, id_offset=id_offset, reserved_ids=reserved_ids)
|
|
115
141
|
spans: list[dict[str, Any]] = []
|
|
116
142
|
i = 0
|
|
117
143
|
group_index = 0
|
|
@@ -439,7 +465,8 @@ def annotate_message_groups(
|
|
|
439
465
|
if previous_group_index is not None:
|
|
440
466
|
group_index_offset = previous_group_index + 1
|
|
441
467
|
|
|
442
|
-
|
|
468
|
+
reserved_ids = {message.message_id for message in messages[:start_index] if message.message_id}
|
|
469
|
+
spans = group_messages(messages[start_index:], id_offset=start_index, reserved_ids=reserved_ids)
|
|
443
470
|
for span_index, span in enumerate(spans):
|
|
444
471
|
group_id = str(span["group_id"])
|
|
445
472
|
kind = _coerce_group_kind(span["kind"])
|
|
@@ -1277,6 +1304,121 @@ class CompactionProvider(ContextProvider):
|
|
|
1277
1304
|
# whether excluded messages are loaded on the next turn.
|
|
1278
1305
|
|
|
1279
1306
|
|
|
1307
|
+
class ContextWindowCompactionStrategy:
|
|
1308
|
+
"""Token-budget compaction derived from a model's context window size.
|
|
1309
|
+
|
|
1310
|
+
Computes an input budget from the model's context window and output token
|
|
1311
|
+
limits, then applies a two-phase compaction pipeline:
|
|
1312
|
+
|
|
1313
|
+
1. **Tool result eviction** — collapses older tool-call groups into summaries
|
|
1314
|
+
when included tokens exceed ``tool_eviction_threshold`` of the input budget.
|
|
1315
|
+
2. **Truncation** — removes oldest non-system groups when included tokens
|
|
1316
|
+
exceed ``truncation_threshold`` of the input budget.
|
|
1317
|
+
|
|
1318
|
+
The class uses two independent :class:`TokenBudgetComposedStrategy`
|
|
1319
|
+
instances — one per phase — so each fires only when its own threshold
|
|
1320
|
+
is exceeded.
|
|
1321
|
+
|
|
1322
|
+
Examples:
|
|
1323
|
+
.. code-block:: python
|
|
1324
|
+
|
|
1325
|
+
from agent_framework import ContextWindowCompactionStrategy, CompactionProvider
|
|
1326
|
+
|
|
1327
|
+
strategy = ContextWindowCompactionStrategy(
|
|
1328
|
+
max_context_window_tokens=128_000,
|
|
1329
|
+
max_output_tokens=16_384,
|
|
1330
|
+
)
|
|
1331
|
+
provider = CompactionProvider(before_strategy=strategy)
|
|
1332
|
+
"""
|
|
1333
|
+
|
|
1334
|
+
DEFAULT_TOOL_EVICTION_THRESHOLD: float = 0.5
|
|
1335
|
+
"""Default fraction of input budget at which tool result eviction triggers."""
|
|
1336
|
+
|
|
1337
|
+
DEFAULT_TRUNCATION_THRESHOLD: float = 0.8
|
|
1338
|
+
"""Default fraction of input budget at which truncation triggers."""
|
|
1339
|
+
|
|
1340
|
+
def __init__(
|
|
1341
|
+
self,
|
|
1342
|
+
*,
|
|
1343
|
+
max_context_window_tokens: int,
|
|
1344
|
+
max_output_tokens: int,
|
|
1345
|
+
tokenizer: TokenizerProtocol | None = None,
|
|
1346
|
+
tool_eviction_threshold: float = DEFAULT_TOOL_EVICTION_THRESHOLD,
|
|
1347
|
+
truncation_threshold: float = DEFAULT_TRUNCATION_THRESHOLD,
|
|
1348
|
+
keep_last_tool_call_groups: int = 4,
|
|
1349
|
+
) -> None:
|
|
1350
|
+
"""Create a context-window compaction strategy.
|
|
1351
|
+
|
|
1352
|
+
Keyword Args:
|
|
1353
|
+
max_context_window_tokens: The model's maximum context window size
|
|
1354
|
+
in tokens (e.g. 128,000).
|
|
1355
|
+
max_output_tokens: The model's maximum output tokens per response
|
|
1356
|
+
(e.g. 16,384).
|
|
1357
|
+
tokenizer: Token counter for measuring message sizes. Defaults to
|
|
1358
|
+
:class:`CharacterEstimatorTokenizer` (4 chars/token heuristic).
|
|
1359
|
+
tool_eviction_threshold: Fraction of input budget (0.0, 1.0] at
|
|
1360
|
+
which tool result eviction triggers. Defaults to 0.5.
|
|
1361
|
+
truncation_threshold: Fraction of input budget (0.0, 1.0] at which
|
|
1362
|
+
truncation triggers. Must be ≥ ``tool_eviction_threshold``.
|
|
1363
|
+
Defaults to 0.8.
|
|
1364
|
+
keep_last_tool_call_groups: Number of most recent tool-call groups
|
|
1365
|
+
to retain verbatim during tool eviction. Older groups are
|
|
1366
|
+
collapsed into summaries. Defaults to 4.
|
|
1367
|
+
|
|
1368
|
+
Raises:
|
|
1369
|
+
ValueError: If thresholds are out of range or inconsistent.
|
|
1370
|
+
"""
|
|
1371
|
+
if max_context_window_tokens <= 0:
|
|
1372
|
+
raise ValueError("max_context_window_tokens must be positive.")
|
|
1373
|
+
if max_output_tokens < 0 or max_output_tokens >= max_context_window_tokens:
|
|
1374
|
+
raise ValueError("max_output_tokens must be >= 0 and < max_context_window_tokens.")
|
|
1375
|
+
if not (0.0 < tool_eviction_threshold <= 1.0):
|
|
1376
|
+
raise ValueError("tool_eviction_threshold must be in (0.0, 1.0].")
|
|
1377
|
+
if not (0.0 < truncation_threshold <= 1.0):
|
|
1378
|
+
raise ValueError("truncation_threshold must be in (0.0, 1.0].")
|
|
1379
|
+
if truncation_threshold < tool_eviction_threshold:
|
|
1380
|
+
raise ValueError("truncation_threshold must be >= tool_eviction_threshold.")
|
|
1381
|
+
|
|
1382
|
+
resolved_tokenizer = tokenizer or CharacterEstimatorTokenizer()
|
|
1383
|
+
input_budget = max_context_window_tokens - max_output_tokens
|
|
1384
|
+
tool_eviction_tokens = int(input_budget * tool_eviction_threshold)
|
|
1385
|
+
truncation_tokens = int(input_budget * truncation_threshold)
|
|
1386
|
+
|
|
1387
|
+
self.max_context_window_tokens = max_context_window_tokens
|
|
1388
|
+
self.max_output_tokens = max_output_tokens
|
|
1389
|
+
self.input_budget_tokens = input_budget
|
|
1390
|
+
self.tool_eviction_threshold = tool_eviction_threshold
|
|
1391
|
+
self.truncation_threshold = truncation_threshold
|
|
1392
|
+
|
|
1393
|
+
self._tool_eviction = TokenBudgetComposedStrategy(
|
|
1394
|
+
token_budget=tool_eviction_tokens,
|
|
1395
|
+
tokenizer=resolved_tokenizer,
|
|
1396
|
+
strategies=[
|
|
1397
|
+
ToolResultCompactionStrategy(keep_last_tool_call_groups=keep_last_tool_call_groups),
|
|
1398
|
+
],
|
|
1399
|
+
)
|
|
1400
|
+
self._truncation = TokenBudgetComposedStrategy(
|
|
1401
|
+
token_budget=truncation_tokens,
|
|
1402
|
+
tokenizer=resolved_tokenizer,
|
|
1403
|
+
strategies=[
|
|
1404
|
+
TruncationStrategy(
|
|
1405
|
+
max_n=truncation_tokens,
|
|
1406
|
+
compact_to=tool_eviction_tokens,
|
|
1407
|
+
tokenizer=resolved_tokenizer,
|
|
1408
|
+
),
|
|
1409
|
+
],
|
|
1410
|
+
)
|
|
1411
|
+
|
|
1412
|
+
async def __call__(self, messages: list[Message]) -> bool:
|
|
1413
|
+
"""Apply the two-phase compaction pipeline.
|
|
1414
|
+
|
|
1415
|
+
Returns:
|
|
1416
|
+
True if compaction changed message inclusion; otherwise False.
|
|
1417
|
+
"""
|
|
1418
|
+
changed = await self._tool_eviction(messages)
|
|
1419
|
+
return (await self._truncation(messages)) or changed
|
|
1420
|
+
|
|
1421
|
+
|
|
1280
1422
|
__all__ = [
|
|
1281
1423
|
"COMPACTION_STATE_KEY",
|
|
1282
1424
|
"EXCLUDED_KEY",
|
|
@@ -1293,6 +1435,7 @@ __all__ = [
|
|
|
1293
1435
|
"CharacterEstimatorTokenizer",
|
|
1294
1436
|
"CompactionProvider",
|
|
1295
1437
|
"CompactionStrategy",
|
|
1438
|
+
"ContextWindowCompactionStrategy",
|
|
1296
1439
|
"GroupKind",
|
|
1297
1440
|
"SelectiveToolCallCompactionStrategy",
|
|
1298
1441
|
"SlidingWindowStrategy",
|
|
@@ -311,12 +311,15 @@ class EvalScoreResult:
|
|
|
311
311
|
score: Numeric score from the evaluator.
|
|
312
312
|
passed: Whether the item passed this evaluator's threshold.
|
|
313
313
|
sample: Optional raw evaluator output (rationale, metadata).
|
|
314
|
+
dimensions: Per-dimension scores when this evaluator is a rubric
|
|
315
|
+
evaluator. ``None`` for non-rubric (e.g. built-in) evaluators.
|
|
314
316
|
"""
|
|
315
317
|
|
|
316
318
|
name: str
|
|
317
319
|
score: float
|
|
318
320
|
passed: bool | None = None
|
|
319
321
|
sample: dict[str, Any] | None = None
|
|
322
|
+
dimensions: list[RubricScore] | None = None
|
|
320
323
|
|
|
321
324
|
|
|
322
325
|
@experimental(feature_id=ExperimentalFeature.EVALS)
|
|
@@ -496,6 +499,179 @@ class EvalResults:
|
|
|
496
499
|
detail += f" Errored items: {', '.join(summaries)}."
|
|
497
500
|
raise EvalNotPassedError(detail)
|
|
498
501
|
|
|
502
|
+
def assert_score_at_least(
|
|
503
|
+
self,
|
|
504
|
+
min_score: float,
|
|
505
|
+
*,
|
|
506
|
+
evaluator: str | None = None,
|
|
507
|
+
msg: str | None = None,
|
|
508
|
+
) -> None:
|
|
509
|
+
"""Assert every item's score (optionally filtered by evaluator) is ``>= min_score``.
|
|
510
|
+
|
|
511
|
+
Designed for CI gates on generated rubric evaluators (e.g.
|
|
512
|
+
``results.assert_score_at_least(0.80)``). Includes any
|
|
513
|
+
sub-results from workflow evaluations.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
min_score: Minimum acceptable score (inclusive).
|
|
517
|
+
evaluator: When set, only check scores from the evaluator
|
|
518
|
+
whose ``EvalScoreResult.name`` matches.
|
|
519
|
+
msg: Optional custom failure message.
|
|
520
|
+
|
|
521
|
+
Raises:
|
|
522
|
+
EvalNotPassedError: When any matching score is below the threshold.
|
|
523
|
+
"""
|
|
524
|
+
offenders: list[str] = []
|
|
525
|
+
|
|
526
|
+
def _check(results: EvalResults) -> None:
|
|
527
|
+
for item in results.items:
|
|
528
|
+
for score in item.scores:
|
|
529
|
+
if evaluator is not None and score.name != evaluator:
|
|
530
|
+
continue
|
|
531
|
+
if score.score < min_score:
|
|
532
|
+
offenders.append(f"{item.item_id}/{score.name}={score.score:.3f}")
|
|
533
|
+
for sub in results.sub_results.values():
|
|
534
|
+
_check(sub)
|
|
535
|
+
|
|
536
|
+
_check(self)
|
|
537
|
+
if offenders:
|
|
538
|
+
detail = msg or (
|
|
539
|
+
f"{len(offenders)} score(s) below threshold {min_score}"
|
|
540
|
+
f"{' for ' + evaluator if evaluator else ''}: {', '.join(offenders[:5])}"
|
|
541
|
+
+ (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
|
|
542
|
+
)
|
|
543
|
+
raise EvalNotPassedError(detail)
|
|
544
|
+
|
|
545
|
+
def assert_dimension_score_at_least(
|
|
546
|
+
self,
|
|
547
|
+
dimension_id: str,
|
|
548
|
+
min_score: float,
|
|
549
|
+
*,
|
|
550
|
+
evaluator: str | None = None,
|
|
551
|
+
require_applicable: bool = False,
|
|
552
|
+
msg: str | None = None,
|
|
553
|
+
) -> None:
|
|
554
|
+
"""Assert every item's score for a rubric *dimension* is ``>= min_score``.
|
|
555
|
+
|
|
556
|
+
Walks ``EvalScoreResult.dimensions`` looking for the named
|
|
557
|
+
dimension across all items (and sub-results). Non-applicable
|
|
558
|
+
dimensions are skipped by default; pass
|
|
559
|
+
``require_applicable=True`` to fail when no applicable score is
|
|
560
|
+
produced.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
dimension_id: Dimension id (matches the rubric definition).
|
|
564
|
+
min_score: Minimum acceptable dimension score (inclusive).
|
|
565
|
+
evaluator: When set, only consider scores from the evaluator
|
|
566
|
+
whose ``EvalScoreResult.name`` matches.
|
|
567
|
+
require_applicable: When ``True``, missing or non-applicable
|
|
568
|
+
dimension scores raise. Defaults to ``False`` (skip).
|
|
569
|
+
msg: Optional custom failure message.
|
|
570
|
+
|
|
571
|
+
Raises:
|
|
572
|
+
EvalNotPassedError: When the dimension fails the threshold.
|
|
573
|
+
"""
|
|
574
|
+
offenders: list[str] = []
|
|
575
|
+
missing_items: list[str] = []
|
|
576
|
+
|
|
577
|
+
def _check(results: EvalResults) -> None:
|
|
578
|
+
for item in results.items:
|
|
579
|
+
found_applicable = False
|
|
580
|
+
for score in item.scores:
|
|
581
|
+
if evaluator is not None and score.name != evaluator:
|
|
582
|
+
continue
|
|
583
|
+
if not score.dimensions:
|
|
584
|
+
continue
|
|
585
|
+
for rs in score.dimensions:
|
|
586
|
+
if rs.id != dimension_id:
|
|
587
|
+
continue
|
|
588
|
+
if not rs.applicable:
|
|
589
|
+
continue
|
|
590
|
+
found_applicable = True
|
|
591
|
+
if rs.score is None or rs.score < min_score:
|
|
592
|
+
offenders.append(
|
|
593
|
+
f"{item.item_id}/{score.name}/{dimension_id}="
|
|
594
|
+
f"{rs.score if rs.score is not None else 'None'}"
|
|
595
|
+
)
|
|
596
|
+
if require_applicable and not found_applicable:
|
|
597
|
+
missing_items.append(item.item_id)
|
|
598
|
+
for sub in results.sub_results.values():
|
|
599
|
+
_check(sub)
|
|
600
|
+
|
|
601
|
+
_check(self)
|
|
602
|
+
problems: list[str] = []
|
|
603
|
+
if offenders:
|
|
604
|
+
problems.append(
|
|
605
|
+
f"{len(offenders)} dimension score(s) for '{dimension_id}' below {min_score}: "
|
|
606
|
+
f"{', '.join(offenders[:5])}" + (f" (+{len(offenders) - 5} more)" if len(offenders) > 5 else "")
|
|
607
|
+
)
|
|
608
|
+
if missing_items:
|
|
609
|
+
problems.append(
|
|
610
|
+
f"Dimension '{dimension_id}' not applicable on {len(missing_items)} item(s): "
|
|
611
|
+
f"{', '.join(missing_items[:5])}"
|
|
612
|
+
)
|
|
613
|
+
if problems:
|
|
614
|
+
raise EvalNotPassedError(msg or "; ".join(problems))
|
|
615
|
+
|
|
616
|
+
def assert_no_failed_items(self, msg: str | None = None) -> None:
|
|
617
|
+
"""Assert no item ended in ``fail`` or ``error`` status.
|
|
618
|
+
|
|
619
|
+
Includes any sub-results from workflow evaluations.
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
msg: Optional custom failure message.
|
|
623
|
+
|
|
624
|
+
Raises:
|
|
625
|
+
EvalNotPassedError: When any item failed or errored.
|
|
626
|
+
"""
|
|
627
|
+
bad: list[str] = []
|
|
628
|
+
|
|
629
|
+
def _check(results: EvalResults) -> None:
|
|
630
|
+
for item in results.items:
|
|
631
|
+
if item.is_failed or item.is_error:
|
|
632
|
+
bad.append(f"{item.item_id}:{item.status}")
|
|
633
|
+
for sub in results.sub_results.values():
|
|
634
|
+
_check(sub)
|
|
635
|
+
|
|
636
|
+
_check(self)
|
|
637
|
+
if bad:
|
|
638
|
+
detail = msg or (
|
|
639
|
+
f"{len(bad)} item(s) failed or errored: {', '.join(bad[:5])}"
|
|
640
|
+
+ (f" (+{len(bad) - 5} more)" if len(bad) > 5 else "")
|
|
641
|
+
)
|
|
642
|
+
raise EvalNotPassedError(detail)
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
# endregion
|
|
646
|
+
|
|
647
|
+
# region Generated rubric evaluators
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
@experimental(feature_id=ExperimentalFeature.EVALS)
|
|
651
|
+
@dataclass(frozen=True)
|
|
652
|
+
class RubricScore:
|
|
653
|
+
"""A single dimension's score from a rubric-based evaluator run.
|
|
654
|
+
|
|
655
|
+
Rubric evaluators emit one ``RubricScore`` per dimension per item.
|
|
656
|
+
Attached to :class:`EvalScoreResult` as a typed view of the raw
|
|
657
|
+
``properties.rubric_scores`` payload returned by providers such as
|
|
658
|
+
Foundry's generated rubric evaluators.
|
|
659
|
+
|
|
660
|
+
Attributes:
|
|
661
|
+
id: Dimension id (matches the rubric definition).
|
|
662
|
+
score: Numeric score, or ``None`` when the dimension was marked
|
|
663
|
+
non-applicable for this item.
|
|
664
|
+
applicable: Whether the dimension applied to this item.
|
|
665
|
+
weight: Dimension weight (mirrors the rubric definition).
|
|
666
|
+
reason: Short rationale produced by the evaluator.
|
|
667
|
+
"""
|
|
668
|
+
|
|
669
|
+
id: str
|
|
670
|
+
score: int | None
|
|
671
|
+
applicable: bool
|
|
672
|
+
weight: int
|
|
673
|
+
reason: str
|
|
674
|
+
|
|
499
675
|
|
|
500
676
|
# endregion
|
|
501
677
|
|