fast-agent-mcp 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fast_agent/__init__.py +183 -0
- fast_agent/acp/__init__.py +19 -0
- fast_agent/acp/acp_aware_mixin.py +304 -0
- fast_agent/acp/acp_context.py +437 -0
- fast_agent/acp/content_conversion.py +136 -0
- fast_agent/acp/filesystem_runtime.py +427 -0
- fast_agent/acp/permission_store.py +269 -0
- fast_agent/acp/server/__init__.py +5 -0
- fast_agent/acp/server/agent_acp_server.py +1472 -0
- fast_agent/acp/slash_commands.py +1050 -0
- fast_agent/acp/terminal_runtime.py +408 -0
- fast_agent/acp/tool_permission_adapter.py +125 -0
- fast_agent/acp/tool_permissions.py +474 -0
- fast_agent/acp/tool_progress.py +814 -0
- fast_agent/agents/__init__.py +85 -0
- fast_agent/agents/agent_types.py +64 -0
- fast_agent/agents/llm_agent.py +350 -0
- fast_agent/agents/llm_decorator.py +1139 -0
- fast_agent/agents/mcp_agent.py +1337 -0
- fast_agent/agents/tool_agent.py +271 -0
- fast_agent/agents/workflow/agents_as_tools_agent.py +849 -0
- fast_agent/agents/workflow/chain_agent.py +212 -0
- fast_agent/agents/workflow/evaluator_optimizer.py +380 -0
- fast_agent/agents/workflow/iterative_planner.py +652 -0
- fast_agent/agents/workflow/maker_agent.py +379 -0
- fast_agent/agents/workflow/orchestrator_models.py +218 -0
- fast_agent/agents/workflow/orchestrator_prompts.py +248 -0
- fast_agent/agents/workflow/parallel_agent.py +250 -0
- fast_agent/agents/workflow/router_agent.py +353 -0
- fast_agent/cli/__init__.py +0 -0
- fast_agent/cli/__main__.py +73 -0
- fast_agent/cli/commands/acp.py +159 -0
- fast_agent/cli/commands/auth.py +404 -0
- fast_agent/cli/commands/check_config.py +783 -0
- fast_agent/cli/commands/go.py +514 -0
- fast_agent/cli/commands/quickstart.py +557 -0
- fast_agent/cli/commands/serve.py +143 -0
- fast_agent/cli/commands/server_helpers.py +114 -0
- fast_agent/cli/commands/setup.py +174 -0
- fast_agent/cli/commands/url_parser.py +190 -0
- fast_agent/cli/constants.py +40 -0
- fast_agent/cli/main.py +115 -0
- fast_agent/cli/terminal.py +24 -0
- fast_agent/config.py +798 -0
- fast_agent/constants.py +41 -0
- fast_agent/context.py +279 -0
- fast_agent/context_dependent.py +50 -0
- fast_agent/core/__init__.py +92 -0
- fast_agent/core/agent_app.py +448 -0
- fast_agent/core/core_app.py +137 -0
- fast_agent/core/direct_decorators.py +784 -0
- fast_agent/core/direct_factory.py +620 -0
- fast_agent/core/error_handling.py +27 -0
- fast_agent/core/exceptions.py +90 -0
- fast_agent/core/executor/__init__.py +0 -0
- fast_agent/core/executor/executor.py +280 -0
- fast_agent/core/executor/task_registry.py +32 -0
- fast_agent/core/executor/workflow_signal.py +324 -0
- fast_agent/core/fastagent.py +1186 -0
- fast_agent/core/logging/__init__.py +5 -0
- fast_agent/core/logging/events.py +138 -0
- fast_agent/core/logging/json_serializer.py +164 -0
- fast_agent/core/logging/listeners.py +309 -0
- fast_agent/core/logging/logger.py +278 -0
- fast_agent/core/logging/transport.py +481 -0
- fast_agent/core/prompt.py +9 -0
- fast_agent/core/prompt_templates.py +183 -0
- fast_agent/core/validation.py +326 -0
- fast_agent/event_progress.py +62 -0
- fast_agent/history/history_exporter.py +49 -0
- fast_agent/human_input/__init__.py +47 -0
- fast_agent/human_input/elicitation_handler.py +123 -0
- fast_agent/human_input/elicitation_state.py +33 -0
- fast_agent/human_input/form_elements.py +59 -0
- fast_agent/human_input/form_fields.py +256 -0
- fast_agent/human_input/simple_form.py +113 -0
- fast_agent/human_input/types.py +40 -0
- fast_agent/interfaces.py +310 -0
- fast_agent/llm/__init__.py +9 -0
- fast_agent/llm/cancellation.py +22 -0
- fast_agent/llm/fastagent_llm.py +931 -0
- fast_agent/llm/internal/passthrough.py +161 -0
- fast_agent/llm/internal/playback.py +129 -0
- fast_agent/llm/internal/silent.py +41 -0
- fast_agent/llm/internal/slow.py +38 -0
- fast_agent/llm/memory.py +275 -0
- fast_agent/llm/model_database.py +490 -0
- fast_agent/llm/model_factory.py +388 -0
- fast_agent/llm/model_info.py +102 -0
- fast_agent/llm/prompt_utils.py +155 -0
- fast_agent/llm/provider/anthropic/anthropic_utils.py +84 -0
- fast_agent/llm/provider/anthropic/cache_planner.py +56 -0
- fast_agent/llm/provider/anthropic/llm_anthropic.py +796 -0
- fast_agent/llm/provider/anthropic/multipart_converter_anthropic.py +462 -0
- fast_agent/llm/provider/bedrock/bedrock_utils.py +218 -0
- fast_agent/llm/provider/bedrock/llm_bedrock.py +2207 -0
- fast_agent/llm/provider/bedrock/multipart_converter_bedrock.py +84 -0
- fast_agent/llm/provider/google/google_converter.py +466 -0
- fast_agent/llm/provider/google/llm_google_native.py +681 -0
- fast_agent/llm/provider/openai/llm_aliyun.py +31 -0
- fast_agent/llm/provider/openai/llm_azure.py +143 -0
- fast_agent/llm/provider/openai/llm_deepseek.py +76 -0
- fast_agent/llm/provider/openai/llm_generic.py +35 -0
- fast_agent/llm/provider/openai/llm_google_oai.py +32 -0
- fast_agent/llm/provider/openai/llm_groq.py +42 -0
- fast_agent/llm/provider/openai/llm_huggingface.py +85 -0
- fast_agent/llm/provider/openai/llm_openai.py +1195 -0
- fast_agent/llm/provider/openai/llm_openai_compatible.py +138 -0
- fast_agent/llm/provider/openai/llm_openrouter.py +45 -0
- fast_agent/llm/provider/openai/llm_tensorzero_openai.py +128 -0
- fast_agent/llm/provider/openai/llm_xai.py +38 -0
- fast_agent/llm/provider/openai/multipart_converter_openai.py +561 -0
- fast_agent/llm/provider/openai/openai_multipart.py +169 -0
- fast_agent/llm/provider/openai/openai_utils.py +67 -0
- fast_agent/llm/provider/openai/responses.py +133 -0
- fast_agent/llm/provider_key_manager.py +139 -0
- fast_agent/llm/provider_types.py +34 -0
- fast_agent/llm/request_params.py +61 -0
- fast_agent/llm/sampling_converter.py +98 -0
- fast_agent/llm/stream_types.py +9 -0
- fast_agent/llm/usage_tracking.py +445 -0
- fast_agent/mcp/__init__.py +56 -0
- fast_agent/mcp/common.py +26 -0
- fast_agent/mcp/elicitation_factory.py +84 -0
- fast_agent/mcp/elicitation_handlers.py +164 -0
- fast_agent/mcp/gen_client.py +83 -0
- fast_agent/mcp/helpers/__init__.py +36 -0
- fast_agent/mcp/helpers/content_helpers.py +352 -0
- fast_agent/mcp/helpers/server_config_helpers.py +25 -0
- fast_agent/mcp/hf_auth.py +147 -0
- fast_agent/mcp/interfaces.py +92 -0
- fast_agent/mcp/logger_textio.py +108 -0
- fast_agent/mcp/mcp_agent_client_session.py +411 -0
- fast_agent/mcp/mcp_aggregator.py +2175 -0
- fast_agent/mcp/mcp_connection_manager.py +723 -0
- fast_agent/mcp/mcp_content.py +262 -0
- fast_agent/mcp/mime_utils.py +108 -0
- fast_agent/mcp/oauth_client.py +509 -0
- fast_agent/mcp/prompt.py +159 -0
- fast_agent/mcp/prompt_message_extended.py +155 -0
- fast_agent/mcp/prompt_render.py +84 -0
- fast_agent/mcp/prompt_serialization.py +580 -0
- fast_agent/mcp/prompts/__init__.py +0 -0
- fast_agent/mcp/prompts/__main__.py +7 -0
- fast_agent/mcp/prompts/prompt_constants.py +18 -0
- fast_agent/mcp/prompts/prompt_helpers.py +238 -0
- fast_agent/mcp/prompts/prompt_load.py +186 -0
- fast_agent/mcp/prompts/prompt_server.py +552 -0
- fast_agent/mcp/prompts/prompt_template.py +438 -0
- fast_agent/mcp/resource_utils.py +215 -0
- fast_agent/mcp/sampling.py +200 -0
- fast_agent/mcp/server/__init__.py +4 -0
- fast_agent/mcp/server/agent_server.py +613 -0
- fast_agent/mcp/skybridge.py +44 -0
- fast_agent/mcp/sse_tracking.py +287 -0
- fast_agent/mcp/stdio_tracking_simple.py +59 -0
- fast_agent/mcp/streamable_http_tracking.py +309 -0
- fast_agent/mcp/tool_execution_handler.py +137 -0
- fast_agent/mcp/tool_permission_handler.py +88 -0
- fast_agent/mcp/transport_tracking.py +634 -0
- fast_agent/mcp/types.py +24 -0
- fast_agent/mcp/ui_agent.py +48 -0
- fast_agent/mcp/ui_mixin.py +209 -0
- fast_agent/mcp_server_registry.py +89 -0
- fast_agent/py.typed +0 -0
- fast_agent/resources/examples/data-analysis/analysis-campaign.py +189 -0
- fast_agent/resources/examples/data-analysis/analysis.py +68 -0
- fast_agent/resources/examples/data-analysis/fastagent.config.yaml +41 -0
- fast_agent/resources/examples/data-analysis/mount-point/WA_Fn-UseC_-HR-Employee-Attrition.csv +1471 -0
- fast_agent/resources/examples/mcp/elicitations/elicitation_account_server.py +88 -0
- fast_agent/resources/examples/mcp/elicitations/elicitation_forms_server.py +297 -0
- fast_agent/resources/examples/mcp/elicitations/elicitation_game_server.py +164 -0
- fast_agent/resources/examples/mcp/elicitations/fastagent.config.yaml +35 -0
- fast_agent/resources/examples/mcp/elicitations/fastagent.secrets.yaml.example +17 -0
- fast_agent/resources/examples/mcp/elicitations/forms_demo.py +107 -0
- fast_agent/resources/examples/mcp/elicitations/game_character.py +65 -0
- fast_agent/resources/examples/mcp/elicitations/game_character_handler.py +256 -0
- fast_agent/resources/examples/mcp/elicitations/tool_call.py +21 -0
- fast_agent/resources/examples/mcp/state-transfer/agent_one.py +18 -0
- fast_agent/resources/examples/mcp/state-transfer/agent_two.py +18 -0
- fast_agent/resources/examples/mcp/state-transfer/fastagent.config.yaml +27 -0
- fast_agent/resources/examples/mcp/state-transfer/fastagent.secrets.yaml.example +15 -0
- fast_agent/resources/examples/researcher/fastagent.config.yaml +61 -0
- fast_agent/resources/examples/researcher/researcher-eval.py +53 -0
- fast_agent/resources/examples/researcher/researcher-imp.py +189 -0
- fast_agent/resources/examples/researcher/researcher.py +36 -0
- fast_agent/resources/examples/tensorzero/.env.sample +2 -0
- fast_agent/resources/examples/tensorzero/Makefile +31 -0
- fast_agent/resources/examples/tensorzero/README.md +56 -0
- fast_agent/resources/examples/tensorzero/agent.py +35 -0
- fast_agent/resources/examples/tensorzero/demo_images/clam.jpg +0 -0
- fast_agent/resources/examples/tensorzero/demo_images/crab.png +0 -0
- fast_agent/resources/examples/tensorzero/demo_images/shrimp.png +0 -0
- fast_agent/resources/examples/tensorzero/docker-compose.yml +105 -0
- fast_agent/resources/examples/tensorzero/fastagent.config.yaml +19 -0
- fast_agent/resources/examples/tensorzero/image_demo.py +67 -0
- fast_agent/resources/examples/tensorzero/mcp_server/Dockerfile +25 -0
- fast_agent/resources/examples/tensorzero/mcp_server/entrypoint.sh +35 -0
- fast_agent/resources/examples/tensorzero/mcp_server/mcp_server.py +31 -0
- fast_agent/resources/examples/tensorzero/mcp_server/pyproject.toml +11 -0
- fast_agent/resources/examples/tensorzero/simple_agent.py +25 -0
- fast_agent/resources/examples/tensorzero/tensorzero_config/system_schema.json +29 -0
- fast_agent/resources/examples/tensorzero/tensorzero_config/system_template.minijinja +11 -0
- fast_agent/resources/examples/tensorzero/tensorzero_config/tensorzero.toml +35 -0
- fast_agent/resources/examples/workflows/agents_as_tools_extended.py +73 -0
- fast_agent/resources/examples/workflows/agents_as_tools_simple.py +50 -0
- fast_agent/resources/examples/workflows/chaining.py +37 -0
- fast_agent/resources/examples/workflows/evaluator.py +77 -0
- fast_agent/resources/examples/workflows/fastagent.config.yaml +26 -0
- fast_agent/resources/examples/workflows/graded_report.md +89 -0
- fast_agent/resources/examples/workflows/human_input.py +28 -0
- fast_agent/resources/examples/workflows/maker.py +156 -0
- fast_agent/resources/examples/workflows/orchestrator.py +70 -0
- fast_agent/resources/examples/workflows/parallel.py +56 -0
- fast_agent/resources/examples/workflows/router.py +69 -0
- fast_agent/resources/examples/workflows/short_story.md +13 -0
- fast_agent/resources/examples/workflows/short_story.txt +19 -0
- fast_agent/resources/setup/.gitignore +30 -0
- fast_agent/resources/setup/agent.py +28 -0
- fast_agent/resources/setup/fastagent.config.yaml +65 -0
- fast_agent/resources/setup/fastagent.secrets.yaml.example +38 -0
- fast_agent/resources/setup/pyproject.toml.tmpl +23 -0
- fast_agent/skills/__init__.py +9 -0
- fast_agent/skills/registry.py +235 -0
- fast_agent/tools/elicitation.py +369 -0
- fast_agent/tools/shell_runtime.py +402 -0
- fast_agent/types/__init__.py +59 -0
- fast_agent/types/conversation_summary.py +294 -0
- fast_agent/types/llm_stop_reason.py +78 -0
- fast_agent/types/message_search.py +249 -0
- fast_agent/ui/__init__.py +38 -0
- fast_agent/ui/console.py +59 -0
- fast_agent/ui/console_display.py +1080 -0
- fast_agent/ui/elicitation_form.py +946 -0
- fast_agent/ui/elicitation_style.py +59 -0
- fast_agent/ui/enhanced_prompt.py +1400 -0
- fast_agent/ui/history_display.py +734 -0
- fast_agent/ui/interactive_prompt.py +1199 -0
- fast_agent/ui/markdown_helpers.py +104 -0
- fast_agent/ui/markdown_truncator.py +1004 -0
- fast_agent/ui/mcp_display.py +857 -0
- fast_agent/ui/mcp_ui_utils.py +235 -0
- fast_agent/ui/mermaid_utils.py +169 -0
- fast_agent/ui/message_primitives.py +50 -0
- fast_agent/ui/notification_tracker.py +205 -0
- fast_agent/ui/plain_text_truncator.py +68 -0
- fast_agent/ui/progress_display.py +10 -0
- fast_agent/ui/rich_progress.py +195 -0
- fast_agent/ui/streaming.py +774 -0
- fast_agent/ui/streaming_buffer.py +449 -0
- fast_agent/ui/tool_display.py +422 -0
- fast_agent/ui/usage_display.py +204 -0
- fast_agent/utils/__init__.py +5 -0
- fast_agent/utils/reasoning_stream_parser.py +77 -0
- fast_agent/utils/time.py +22 -0
- fast_agent/workflow_telemetry.py +261 -0
- fast_agent_mcp-0.4.7.dist-info/METADATA +788 -0
- fast_agent_mcp-0.4.7.dist-info/RECORD +261 -0
- fast_agent_mcp-0.4.7.dist-info/WHEEL +4 -0
- fast_agent_mcp-0.4.7.dist-info/entry_points.txt +7 -0
- fast_agent_mcp-0.4.7.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import json # Import at the module level
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from mcp import CallToolRequest, Tool
|
|
5
|
+
from mcp.types import CallToolRequestParams, PromptMessage
|
|
6
|
+
|
|
7
|
+
from fast_agent.core.logging.logger import get_logger
|
|
8
|
+
from fast_agent.core.prompt import Prompt
|
|
9
|
+
from fast_agent.llm.fastagent_llm import (
|
|
10
|
+
FastAgentLLM,
|
|
11
|
+
RequestParams,
|
|
12
|
+
)
|
|
13
|
+
from fast_agent.llm.provider_types import Provider
|
|
14
|
+
from fast_agent.llm.usage_tracking import create_turn_usage_from_messages
|
|
15
|
+
from fast_agent.mcp.helpers.content_helpers import get_text
|
|
16
|
+
from fast_agent.types import PromptMessageExtended
|
|
17
|
+
from fast_agent.types.llm_stop_reason import LlmStopReason
|
|
18
|
+
|
|
19
|
+
CALL_TOOL_INDICATOR = "***CALL_TOOL"
|
|
20
|
+
FIXED_RESPONSE_INDICATOR = "***FIXED_RESPONSE"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PassthroughLLM(FastAgentLLM):
|
|
24
|
+
"""
|
|
25
|
+
A specialized LLM implementation that simply passes through input messages without modification.
|
|
26
|
+
|
|
27
|
+
This is useful for cases where you need an object with the AugmentedLLM interface
|
|
28
|
+
but want to preserve the original message without any processing, such as in a
|
|
29
|
+
parallel workflow where no fan-in aggregation is needed.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self, provider=Provider.FAST_AGENT, name: str = "Passthrough", **kwargs: dict[str, Any]
|
|
34
|
+
) -> None:
|
|
35
|
+
super().__init__(name=name, provider=provider, **kwargs)
|
|
36
|
+
self.logger = get_logger(__name__)
|
|
37
|
+
self._messages = [PromptMessage]
|
|
38
|
+
self._fixed_response: str | None = None
|
|
39
|
+
self._correlation_id: int = 0
|
|
40
|
+
|
|
41
|
+
async def initialize(self) -> None:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def _parse_tool_command(self, command: str) -> tuple[str, dict | None]:
|
|
45
|
+
"""
|
|
46
|
+
Parse a tool command string into tool name and arguments.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
command: The command string in format "***CALL_TOOL <tool_name> [arguments_json]"
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Tuple of (tool_name, arguments_dict)
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
ValueError: If command format is invalid
|
|
56
|
+
"""
|
|
57
|
+
parts = command.split(" ", 2)
|
|
58
|
+
if len(parts) < 2:
|
|
59
|
+
raise ValueError("Invalid format. Expected '***CALL_TOOL <tool_name> [arguments_json]'")
|
|
60
|
+
|
|
61
|
+
tool_name = parts[1].strip()
|
|
62
|
+
arguments = None
|
|
63
|
+
|
|
64
|
+
if len(parts) > 2:
|
|
65
|
+
try:
|
|
66
|
+
arguments = json.loads(parts[2])
|
|
67
|
+
except json.JSONDecodeError:
|
|
68
|
+
raise ValueError(f"Invalid JSON arguments: {parts[2]}")
|
|
69
|
+
|
|
70
|
+
self.logger.info(f"Calling tool {tool_name} with arguments {arguments}")
|
|
71
|
+
return tool_name, arguments
|
|
72
|
+
|
|
73
|
+
async def _apply_prompt_provider_specific(
|
|
74
|
+
self,
|
|
75
|
+
multipart_messages: list["PromptMessageExtended"],
|
|
76
|
+
request_params: RequestParams | None = None,
|
|
77
|
+
tools: list[Tool] | None = None,
|
|
78
|
+
is_template: bool = False,
|
|
79
|
+
) -> PromptMessageExtended:
|
|
80
|
+
# Add messages to history with proper is_prompt flag
|
|
81
|
+
self.history.extend(multipart_messages, is_prompt=is_template)
|
|
82
|
+
|
|
83
|
+
last_message = multipart_messages[-1]
|
|
84
|
+
# If the caller already provided an assistant reply (e.g., history replay), return it as-is.
|
|
85
|
+
if last_message.role == "assistant":
|
|
86
|
+
return last_message
|
|
87
|
+
|
|
88
|
+
tool_calls: dict[str, CallToolRequest] = {}
|
|
89
|
+
stop_reason: LlmStopReason = LlmStopReason.END_TURN
|
|
90
|
+
if self.is_tool_call(last_message):
|
|
91
|
+
tool_name, arguments = self._parse_tool_command(last_message.first_text())
|
|
92
|
+
tool_calls[f"correlationId{self._correlation_id}"] = CallToolRequest(
|
|
93
|
+
method="tools/call",
|
|
94
|
+
params=CallToolRequestParams(name=tool_name, arguments=arguments),
|
|
95
|
+
)
|
|
96
|
+
self._correlation_id += 1
|
|
97
|
+
stop_reason = LlmStopReason.TOOL_USE
|
|
98
|
+
|
|
99
|
+
if last_message.first_text().startswith(FIXED_RESPONSE_INDICATOR):
|
|
100
|
+
self._fixed_response = (
|
|
101
|
+
last_message.first_text().split(FIXED_RESPONSE_INDICATOR, 1)[1].strip()
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if len(last_message.tool_results or {}) > 0:
|
|
105
|
+
assert last_message.tool_results
|
|
106
|
+
concatenated_content = " ".join(
|
|
107
|
+
[
|
|
108
|
+
(get_text(tool_result.content[0]) or "<empty>")
|
|
109
|
+
for tool_result in last_message.tool_results.values()
|
|
110
|
+
]
|
|
111
|
+
)
|
|
112
|
+
result = Prompt.assistant(concatenated_content, stop_reason=stop_reason)
|
|
113
|
+
|
|
114
|
+
elif self._fixed_response:
|
|
115
|
+
result = Prompt.assistant(
|
|
116
|
+
self._fixed_response, tool_calls=tool_calls, stop_reason=stop_reason
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
# Walk backwards through messages concatenating while role is "user"
|
|
120
|
+
user_messages = []
|
|
121
|
+
for message in reversed(multipart_messages):
|
|
122
|
+
if message.role != "user":
|
|
123
|
+
break
|
|
124
|
+
user_messages.append(message.all_text())
|
|
125
|
+
concatenated_content = "\n".join(reversed(user_messages))
|
|
126
|
+
|
|
127
|
+
result = Prompt.assistant(
|
|
128
|
+
concatenated_content,
|
|
129
|
+
tool_calls=tool_calls,
|
|
130
|
+
stop_reason=stop_reason,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
turn_usage = create_turn_usage_from_messages(
|
|
134
|
+
input_content=multipart_messages[-1].all_text(),
|
|
135
|
+
output_content=result.all_text(),
|
|
136
|
+
model="passthrough",
|
|
137
|
+
model_type="passthrough",
|
|
138
|
+
tool_calls=len(tool_calls),
|
|
139
|
+
delay_seconds=0.0,
|
|
140
|
+
)
|
|
141
|
+
self.usage_accumulator.add_turn(turn_usage)
|
|
142
|
+
|
|
143
|
+
return result
|
|
144
|
+
|
|
145
|
+
def _convert_extended_messages_to_provider(
|
|
146
|
+
self, messages: list[PromptMessageExtended]
|
|
147
|
+
) -> list[Any]:
|
|
148
|
+
"""
|
|
149
|
+
Convert PromptMessageExtended list to provider format.
|
|
150
|
+
For PassthroughLLM, we don't actually make API calls, so this just returns empty list.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
messages: List of PromptMessageExtended objects
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Empty list (passthrough doesn't use provider-specific messages)
|
|
157
|
+
"""
|
|
158
|
+
return []
|
|
159
|
+
|
|
160
|
+
def is_tool_call(self, message: PromptMessageExtended) -> bool:
|
|
161
|
+
return message.first_text().startswith(CALL_TOOL_INDICATOR)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from typing import Any, Type, Union
|
|
2
|
+
|
|
3
|
+
from mcp import Tool
|
|
4
|
+
from mcp.types import PromptMessage
|
|
5
|
+
|
|
6
|
+
from fast_agent.core.exceptions import ModelConfigError
|
|
7
|
+
from fast_agent.core.prompt import Prompt
|
|
8
|
+
from fast_agent.interfaces import ModelT
|
|
9
|
+
from fast_agent.llm.internal.passthrough import PassthroughLLM
|
|
10
|
+
from fast_agent.llm.provider_types import Provider
|
|
11
|
+
from fast_agent.llm.usage_tracking import create_turn_usage_from_messages
|
|
12
|
+
from fast_agent.mcp.helpers.content_helpers import normalize_to_extended_list
|
|
13
|
+
from fast_agent.mcp.prompts.prompt_helpers import MessageContent
|
|
14
|
+
from fast_agent.types import PromptMessageExtended, RequestParams
|
|
15
|
+
|
|
16
|
+
# TODO -- support tool usage/replay
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PlaybackLLM(PassthroughLLM):
|
|
20
|
+
"""
|
|
21
|
+
A specialized LLM implementation that plays back assistant messages when loaded with prompts.
|
|
22
|
+
|
|
23
|
+
Unlike the PassthroughLLM which simply passes through messages without modification,
|
|
24
|
+
PlaybackLLM is designed to simulate a conversation by playing back prompt messages
|
|
25
|
+
in sequence when loaded with prompts through apply_prompt_template.
|
|
26
|
+
|
|
27
|
+
After apply_prompts has been called, each call to generate_str returns the next
|
|
28
|
+
"ASSISTANT" message in the loaded messages. If no messages are set or all messages have
|
|
29
|
+
been played back, it returns a message indicating that messages are exhausted.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, name: str = "Playback", **kwargs: dict[str, Any]) -> None:
|
|
33
|
+
super().__init__(name=name, provider=Provider.FAST_AGENT, **kwargs)
|
|
34
|
+
self._messages: list[PromptMessageExtended] = []
|
|
35
|
+
self._current_index = -1
|
|
36
|
+
self._overage = -1
|
|
37
|
+
|
|
38
|
+
def _get_next_assistant_message(self) -> PromptMessageExtended:
|
|
39
|
+
"""
|
|
40
|
+
Get the next assistant message from the loaded messages.
|
|
41
|
+
Increments the current message index and skips user messages.
|
|
42
|
+
"""
|
|
43
|
+
# Find next assistant message
|
|
44
|
+
while self._current_index < len(self._messages):
|
|
45
|
+
message = self._messages[self._current_index]
|
|
46
|
+
self._current_index += 1
|
|
47
|
+
if "assistant" != message.role:
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
return message
|
|
51
|
+
|
|
52
|
+
self._overage += 1
|
|
53
|
+
return Prompt.assistant(
|
|
54
|
+
f"MESSAGES EXHAUSTED (list size {len(self._messages)}) ({self._overage} overage)"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
async def generate(
|
|
58
|
+
self,
|
|
59
|
+
messages: Union[
|
|
60
|
+
str,
|
|
61
|
+
PromptMessage,
|
|
62
|
+
PromptMessageExtended,
|
|
63
|
+
list[Union[str, PromptMessage, PromptMessageExtended]],
|
|
64
|
+
],
|
|
65
|
+
request_params: RequestParams | None = None,
|
|
66
|
+
tools: list[Tool] | None = None,
|
|
67
|
+
) -> PromptMessageExtended:
|
|
68
|
+
"""
|
|
69
|
+
Handle playback of messages in two modes:
|
|
70
|
+
1. First call: store messages for playback and return "HISTORY LOADED"
|
|
71
|
+
2. Subsequent calls: return the next assistant message
|
|
72
|
+
"""
|
|
73
|
+
# Normalize all input types to a list of PromptMessageExtended
|
|
74
|
+
multipart_messages = normalize_to_extended_list(messages)
|
|
75
|
+
|
|
76
|
+
# If this is the first call (initialization) or we're loading a prompt template
|
|
77
|
+
# with multiple messages (comes from apply_prompt)
|
|
78
|
+
if -1 == self._current_index:
|
|
79
|
+
if len(multipart_messages) > 1:
|
|
80
|
+
self._messages = multipart_messages
|
|
81
|
+
else:
|
|
82
|
+
self._messages.extend(multipart_messages)
|
|
83
|
+
|
|
84
|
+
# Reset the index to the beginning for proper playback
|
|
85
|
+
self._current_index = 0
|
|
86
|
+
|
|
87
|
+
# In PlaybackLLM, we always return "HISTORY LOADED" on initialization,
|
|
88
|
+
# regardless of the prompt content. The next call will return messages.
|
|
89
|
+
return Prompt.assistant(f"HISTORY LOADED ({len(self._messages)}) messages")
|
|
90
|
+
|
|
91
|
+
response = self._get_next_assistant_message()
|
|
92
|
+
|
|
93
|
+
# Track usage for this playback "turn"
|
|
94
|
+
try:
|
|
95
|
+
input_content = str(multipart_messages) if multipart_messages else ""
|
|
96
|
+
output_content = MessageContent.get_first_text(response) or ""
|
|
97
|
+
|
|
98
|
+
turn_usage = create_turn_usage_from_messages(
|
|
99
|
+
input_content=input_content,
|
|
100
|
+
output_content=output_content,
|
|
101
|
+
model="playback",
|
|
102
|
+
model_type="playback",
|
|
103
|
+
tool_calls=0,
|
|
104
|
+
delay_seconds=0.0,
|
|
105
|
+
)
|
|
106
|
+
self.usage_accumulator.add_turn(turn_usage)
|
|
107
|
+
|
|
108
|
+
except Exception as e:
|
|
109
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
|
110
|
+
|
|
111
|
+
return response
|
|
112
|
+
|
|
113
|
+
async def structured(
|
|
114
|
+
self,
|
|
115
|
+
messages: list[PromptMessageExtended],
|
|
116
|
+
model: Type[ModelT],
|
|
117
|
+
request_params: RequestParams | None = None,
|
|
118
|
+
) -> tuple[ModelT | None, PromptMessageExtended]:
|
|
119
|
+
"""
|
|
120
|
+
Handle structured requests by returning the next assistant message.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
if -1 == self._current_index:
|
|
124
|
+
raise ModelConfigError("Use generate() to load playback history")
|
|
125
|
+
|
|
126
|
+
return self._structured_from_multipart(
|
|
127
|
+
self._get_next_assistant_message(),
|
|
128
|
+
model,
|
|
129
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Silent LLM implementation that suppresses display output while maintaining functionality."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from fast_agent.llm.internal.passthrough import PassthroughLLM
|
|
6
|
+
from fast_agent.llm.provider_types import Provider
|
|
7
|
+
from fast_agent.llm.usage_tracking import TurnUsage, UsageAccumulator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ZeroUsageAccumulator(UsageAccumulator):
|
|
11
|
+
"""Usage accumulator that always reports zero usage."""
|
|
12
|
+
|
|
13
|
+
def add_turn(self, turn: TurnUsage) -> None:
|
|
14
|
+
"""Override to do nothing - no usage accumulation."""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# TODO -- this won't work anymore
|
|
19
|
+
class SilentLLM(PassthroughLLM):
|
|
20
|
+
"""
|
|
21
|
+
A specialized LLM that processes messages like PassthroughLLM but suppresses all display output.
|
|
22
|
+
|
|
23
|
+
This is particularly useful for parallel agent workflows where the fan-in agent
|
|
24
|
+
should aggregate results without polluting the console with intermediate output.
|
|
25
|
+
Token counting is disabled - the model always reports zero usage.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self, provider=Provider.FAST_AGENT, name: str = "Silent", **kwargs: dict[str, Any]
|
|
30
|
+
) -> None:
|
|
31
|
+
super().__init__(name=name, provider=provider, **kwargs)
|
|
32
|
+
# Override with zero usage accumulator - silent model reports no usage
|
|
33
|
+
self.usage_accumulator = ZeroUsageAccumulator()
|
|
34
|
+
|
|
35
|
+
def show_tool_calls(self, tool_calls: Any, **kwargs) -> None:
|
|
36
|
+
"""Override to suppress tool call display."""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
def show_tool_results(self, tool_results: Any, **kwargs) -> None:
|
|
40
|
+
"""Override to suppress tool result display."""
|
|
41
|
+
pass
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from mcp import Tool
|
|
5
|
+
|
|
6
|
+
from fast_agent.llm.fastagent_llm import (
|
|
7
|
+
RequestParams,
|
|
8
|
+
)
|
|
9
|
+
from fast_agent.llm.internal.passthrough import PassthroughLLM
|
|
10
|
+
from fast_agent.llm.provider_types import Provider
|
|
11
|
+
from fast_agent.types import PromptMessageExtended
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SlowLLM(PassthroughLLM):
|
|
15
|
+
"""
|
|
16
|
+
A specialized LLM implementation that sleeps for 3 seconds before responding like PassthroughLLM.
|
|
17
|
+
|
|
18
|
+
This is useful for testing scenarios where you want to simulate slow responses
|
|
19
|
+
or for debugging timing-related issues in parallel workflows.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self, provider=Provider.FAST_AGENT, name: str = "Slow", **kwargs: dict[str, Any]
|
|
24
|
+
) -> None:
|
|
25
|
+
super().__init__(name=name, provider=provider, **kwargs)
|
|
26
|
+
|
|
27
|
+
async def _apply_prompt_provider_specific(
|
|
28
|
+
self,
|
|
29
|
+
multipart_messages: list["PromptMessageExtended"],
|
|
30
|
+
request_params: RequestParams | None = None,
|
|
31
|
+
tools: list[Tool] | None = None,
|
|
32
|
+
is_template: bool = False,
|
|
33
|
+
) -> PromptMessageExtended:
|
|
34
|
+
"""Sleep for 3 seconds then apply prompt like PassthroughLLM."""
|
|
35
|
+
await asyncio.sleep(3)
|
|
36
|
+
return await super()._apply_prompt_provider_specific(
|
|
37
|
+
multipart_messages, request_params, tools, is_template
|
|
38
|
+
)
|
fast_agent/llm/memory.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
from typing import Generic, Protocol, TypeVar
|
|
2
|
+
|
|
3
|
+
# Define our own type variable for implementation use
|
|
4
|
+
MessageParamT = TypeVar("MessageParamT")
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Memory(Protocol, Generic[MessageParamT]):
|
|
8
|
+
"""
|
|
9
|
+
Simple memory management for storing past interactions in-memory.
|
|
10
|
+
|
|
11
|
+
IMPORTANT: As of the conversation history architecture refactor,
|
|
12
|
+
provider history is DIAGNOSTIC ONLY. Messages are generated fresh
|
|
13
|
+
from _message_history on each API call via _convert_to_provider_format().
|
|
14
|
+
|
|
15
|
+
The get() method should NOT be called by provider code for API calls.
|
|
16
|
+
It may still be used for debugging/inspection purposes.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# TODO: saqadri - add checkpointing and other advanced memory capabilities
|
|
20
|
+
|
|
21
|
+
def __init__(self) -> None: ...
|
|
22
|
+
|
|
23
|
+
def extend(self, messages: list[MessageParamT], is_prompt: bool = False) -> None: ...
|
|
24
|
+
|
|
25
|
+
def set(self, messages: list[MessageParamT], is_prompt: bool = False) -> None: ...
|
|
26
|
+
|
|
27
|
+
def append(self, message: MessageParamT, is_prompt: bool = False) -> None: ...
|
|
28
|
+
|
|
29
|
+
def get(self, include_completion_history: bool = True) -> list[MessageParamT]: ...
|
|
30
|
+
|
|
31
|
+
def clear(self, clear_prompts: bool = False) -> None: ...
|
|
32
|
+
|
|
33
|
+
def pop(self, *, from_prompts: bool = False) -> MessageParamT | None: ...
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SimpleMemory(Memory, Generic[MessageParamT]):
|
|
37
|
+
"""
|
|
38
|
+
Simple memory management for storing past interactions in-memory.
|
|
39
|
+
|
|
40
|
+
Maintains both prompt messages (which are always included) and
|
|
41
|
+
generated conversation history (which is included based on use_history setting).
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
self.history: list[MessageParamT] = []
|
|
46
|
+
self.prompt_messages: list[MessageParamT] = [] # Always included
|
|
47
|
+
self.conversation_cache_positions: list[
|
|
48
|
+
int
|
|
49
|
+
] = [] # Track active conversation cache positions
|
|
50
|
+
self.cache_walk_distance: int = 6 # Messages between cache blocks
|
|
51
|
+
self.max_conversation_cache_blocks: int = 2 # Maximum conversation cache blocks
|
|
52
|
+
|
|
53
|
+
def extend(self, messages: list[MessageParamT], is_prompt: bool = False) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Add multiple messages to history.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
messages: Messages to add
|
|
59
|
+
is_prompt: If True, add to prompt_messages instead of regular history
|
|
60
|
+
"""
|
|
61
|
+
if is_prompt:
|
|
62
|
+
self.prompt_messages.extend(messages)
|
|
63
|
+
else:
|
|
64
|
+
self.history.extend(messages)
|
|
65
|
+
|
|
66
|
+
def set(self, messages: list[MessageParamT], is_prompt: bool = False) -> None:
|
|
67
|
+
"""
|
|
68
|
+
Replace messages in history.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
messages: Messages to set
|
|
72
|
+
is_prompt: If True, replace prompt_messages instead of regular history
|
|
73
|
+
"""
|
|
74
|
+
if is_prompt:
|
|
75
|
+
self.prompt_messages = messages.copy()
|
|
76
|
+
else:
|
|
77
|
+
self.history = messages.copy()
|
|
78
|
+
|
|
79
|
+
def append(self, message: MessageParamT, is_prompt: bool = False) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Add a single message to history.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
message: Message to add
|
|
85
|
+
is_prompt: If True, add to prompt_messages instead of regular history
|
|
86
|
+
"""
|
|
87
|
+
if is_prompt:
|
|
88
|
+
self.prompt_messages.append(message)
|
|
89
|
+
else:
|
|
90
|
+
self.history.append(message)
|
|
91
|
+
|
|
92
|
+
def get(self, include_completion_history: bool = True) -> list[MessageParamT]:
|
|
93
|
+
"""
|
|
94
|
+
Get all messages in memory.
|
|
95
|
+
|
|
96
|
+
DEPRECATED: Provider history is now diagnostic only. This method returns
|
|
97
|
+
a diagnostic snapshot and should NOT be used for API calls. Messages for
|
|
98
|
+
API calls are generated fresh from _message_history via
|
|
99
|
+
_convert_to_provider_format().
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
include_history: If True, include regular history messages
|
|
103
|
+
If False, only return prompt messages
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Combined list of prompt messages and optionally history messages
|
|
107
|
+
(for diagnostic/inspection purposes only)
|
|
108
|
+
"""
|
|
109
|
+
# Note: We don't emit a warning here because this method is still
|
|
110
|
+
# legitimately used for diagnostic purposes and by some internal code.
|
|
111
|
+
# The important change is that provider completion methods no longer
|
|
112
|
+
# call this for API message construction.
|
|
113
|
+
if include_completion_history:
|
|
114
|
+
return self.prompt_messages + self.history
|
|
115
|
+
else:
|
|
116
|
+
return self.prompt_messages.copy()
|
|
117
|
+
|
|
118
|
+
def clear(self, clear_prompts: bool = False) -> None:
|
|
119
|
+
"""
|
|
120
|
+
Clear history and optionally prompt messages.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
clear_prompts: If True, also clear prompt messages
|
|
124
|
+
"""
|
|
125
|
+
self.history = []
|
|
126
|
+
self.conversation_cache_positions = [] # Reset cache positions
|
|
127
|
+
if clear_prompts:
|
|
128
|
+
self.prompt_messages = []
|
|
129
|
+
|
|
130
|
+
def pop(self, *, from_prompts: bool = False) -> MessageParamT | None:
|
|
131
|
+
"""
|
|
132
|
+
Remove and return the most recent message from history or prompt messages.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
from_prompts: If True, pop from prompt_messages instead of history
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
The removed message if available, otherwise None
|
|
139
|
+
"""
|
|
140
|
+
if from_prompts:
|
|
141
|
+
if not self.prompt_messages:
|
|
142
|
+
return None
|
|
143
|
+
return self.prompt_messages.pop()
|
|
144
|
+
|
|
145
|
+
if not self.history:
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
removed = self.history.pop()
|
|
149
|
+
# Recalculate cache positions now that the history shrank
|
|
150
|
+
self.conversation_cache_positions = self._calculate_cache_positions(len(self.history))
|
|
151
|
+
return removed
|
|
152
|
+
|
|
153
|
+
def should_apply_conversation_cache(self) -> bool:
|
|
154
|
+
"""
|
|
155
|
+
Determine if conversation caching should be applied based on walking algorithm.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True if we should add or update cache blocks
|
|
159
|
+
"""
|
|
160
|
+
total_messages = len(self.history)
|
|
161
|
+
|
|
162
|
+
# Need at least cache_walk_distance messages to start caching
|
|
163
|
+
if total_messages < self.cache_walk_distance:
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
# Check if we need to add a new cache block
|
|
167
|
+
return len(self._calculate_cache_positions(total_messages)) != len(
|
|
168
|
+
self.conversation_cache_positions
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def _calculate_cache_positions(self, total_conversation_messages: int) -> list[int]:
|
|
172
|
+
"""
|
|
173
|
+
Calculate where cache blocks should be placed using walking algorithm.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
total_conversation_messages: Number of conversation messages (not including prompts)
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of positions (relative to conversation start) where cache should be placed
|
|
180
|
+
"""
|
|
181
|
+
positions = []
|
|
182
|
+
|
|
183
|
+
# Place cache blocks every cache_walk_distance messages
|
|
184
|
+
for i in range(
|
|
185
|
+
self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance
|
|
186
|
+
):
|
|
187
|
+
positions.append(i)
|
|
188
|
+
if len(positions) >= self.max_conversation_cache_blocks:
|
|
189
|
+
break
|
|
190
|
+
|
|
191
|
+
# Keep only the most recent cache blocks (walking behavior)
|
|
192
|
+
if len(positions) > self.max_conversation_cache_blocks:
|
|
193
|
+
positions = positions[-self.max_conversation_cache_blocks :]
|
|
194
|
+
|
|
195
|
+
return positions
|
|
196
|
+
|
|
197
|
+
def get_conversation_cache_updates(self) -> dict:
|
|
198
|
+
"""
|
|
199
|
+
Get cache position updates needed for the walking algorithm.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Dict with 'add', 'remove', and 'active' position lists (relative to full message array)
|
|
203
|
+
"""
|
|
204
|
+
total_conversation_messages = len(self.history)
|
|
205
|
+
new_positions = self._calculate_cache_positions(total_conversation_messages)
|
|
206
|
+
|
|
207
|
+
# Convert to absolute positions (including prompt messages)
|
|
208
|
+
prompt_offset = len(self.prompt_messages)
|
|
209
|
+
new_absolute_positions = [pos + prompt_offset for pos in new_positions]
|
|
210
|
+
|
|
211
|
+
old_positions_set = set(self.conversation_cache_positions)
|
|
212
|
+
new_positions_set = set(new_absolute_positions)
|
|
213
|
+
|
|
214
|
+
return {
|
|
215
|
+
"add": sorted(new_positions_set - old_positions_set),
|
|
216
|
+
"remove": sorted(old_positions_set - new_positions_set),
|
|
217
|
+
"active": sorted(new_absolute_positions),
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
def apply_conversation_cache_updates(self, updates: dict) -> None:
|
|
221
|
+
"""
|
|
222
|
+
Apply cache position updates.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
updates: Dict from get_conversation_cache_updates()
|
|
226
|
+
"""
|
|
227
|
+
self.conversation_cache_positions = updates["active"].copy()
|
|
228
|
+
|
|
229
|
+
def remove_cache_control_from_messages(
|
|
230
|
+
self, messages: list[MessageParamT], positions: list[int]
|
|
231
|
+
) -> None:
|
|
232
|
+
"""
|
|
233
|
+
Remove cache control from specified message positions.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
messages: The message array to modify
|
|
237
|
+
positions: List of positions to remove cache control from
|
|
238
|
+
"""
|
|
239
|
+
for pos in positions:
|
|
240
|
+
if pos < len(messages):
|
|
241
|
+
message = messages[pos]
|
|
242
|
+
if isinstance(message, dict) and "content" in message:
|
|
243
|
+
content_list = message["content"]
|
|
244
|
+
if isinstance(content_list, list):
|
|
245
|
+
for content_block in content_list:
|
|
246
|
+
if isinstance(content_block, dict) and "cache_control" in content_block:
|
|
247
|
+
del content_block["cache_control"]
|
|
248
|
+
|
|
249
|
+
def add_cache_control_to_messages(
|
|
250
|
+
self, messages: list[MessageParamT], positions: list[int]
|
|
251
|
+
) -> int:
|
|
252
|
+
"""
|
|
253
|
+
Add cache control to specified message positions.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
messages: The message array to modify
|
|
257
|
+
positions: List of positions to add cache control to
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Number of cache blocks successfully applied
|
|
261
|
+
"""
|
|
262
|
+
applied_count = 0
|
|
263
|
+
for pos in positions:
|
|
264
|
+
if pos < len(messages):
|
|
265
|
+
message = messages[pos]
|
|
266
|
+
if isinstance(message, dict) and "content" in message:
|
|
267
|
+
content_list = message["content"]
|
|
268
|
+
if isinstance(content_list, list) and content_list:
|
|
269
|
+
# Apply cache control to the last content block
|
|
270
|
+
for content_block in reversed(content_list):
|
|
271
|
+
if isinstance(content_block, dict):
|
|
272
|
+
content_block["cache_control"] = {"type": "ephemeral"}
|
|
273
|
+
applied_count += 1
|
|
274
|
+
break
|
|
275
|
+
return applied_count
|