fast-agent-mcp 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fast_agent/__init__.py +183 -0
- fast_agent/acp/__init__.py +19 -0
- fast_agent/acp/acp_aware_mixin.py +304 -0
- fast_agent/acp/acp_context.py +437 -0
- fast_agent/acp/content_conversion.py +136 -0
- fast_agent/acp/filesystem_runtime.py +427 -0
- fast_agent/acp/permission_store.py +269 -0
- fast_agent/acp/server/__init__.py +5 -0
- fast_agent/acp/server/agent_acp_server.py +1472 -0
- fast_agent/acp/slash_commands.py +1050 -0
- fast_agent/acp/terminal_runtime.py +408 -0
- fast_agent/acp/tool_permission_adapter.py +125 -0
- fast_agent/acp/tool_permissions.py +474 -0
- fast_agent/acp/tool_progress.py +814 -0
- fast_agent/agents/__init__.py +85 -0
- fast_agent/agents/agent_types.py +64 -0
- fast_agent/agents/llm_agent.py +350 -0
- fast_agent/agents/llm_decorator.py +1139 -0
- fast_agent/agents/mcp_agent.py +1337 -0
- fast_agent/agents/tool_agent.py +271 -0
- fast_agent/agents/workflow/agents_as_tools_agent.py +849 -0
- fast_agent/agents/workflow/chain_agent.py +212 -0
- fast_agent/agents/workflow/evaluator_optimizer.py +380 -0
- fast_agent/agents/workflow/iterative_planner.py +652 -0
- fast_agent/agents/workflow/maker_agent.py +379 -0
- fast_agent/agents/workflow/orchestrator_models.py +218 -0
- fast_agent/agents/workflow/orchestrator_prompts.py +248 -0
- fast_agent/agents/workflow/parallel_agent.py +250 -0
- fast_agent/agents/workflow/router_agent.py +353 -0
- fast_agent/cli/__init__.py +0 -0
- fast_agent/cli/__main__.py +73 -0
- fast_agent/cli/commands/acp.py +159 -0
- fast_agent/cli/commands/auth.py +404 -0
- fast_agent/cli/commands/check_config.py +783 -0
- fast_agent/cli/commands/go.py +514 -0
- fast_agent/cli/commands/quickstart.py +557 -0
- fast_agent/cli/commands/serve.py +143 -0
- fast_agent/cli/commands/server_helpers.py +114 -0
- fast_agent/cli/commands/setup.py +174 -0
- fast_agent/cli/commands/url_parser.py +190 -0
- fast_agent/cli/constants.py +40 -0
- fast_agent/cli/main.py +115 -0
- fast_agent/cli/terminal.py +24 -0
- fast_agent/config.py +798 -0
- fast_agent/constants.py +41 -0
- fast_agent/context.py +279 -0
- fast_agent/context_dependent.py +50 -0
- fast_agent/core/__init__.py +92 -0
- fast_agent/core/agent_app.py +448 -0
- fast_agent/core/core_app.py +137 -0
- fast_agent/core/direct_decorators.py +784 -0
- fast_agent/core/direct_factory.py +620 -0
- fast_agent/core/error_handling.py +27 -0
- fast_agent/core/exceptions.py +90 -0
- fast_agent/core/executor/__init__.py +0 -0
- fast_agent/core/executor/executor.py +280 -0
- fast_agent/core/executor/task_registry.py +32 -0
- fast_agent/core/executor/workflow_signal.py +324 -0
- fast_agent/core/fastagent.py +1186 -0
- fast_agent/core/logging/__init__.py +5 -0
- fast_agent/core/logging/events.py +138 -0
- fast_agent/core/logging/json_serializer.py +164 -0
- fast_agent/core/logging/listeners.py +309 -0
- fast_agent/core/logging/logger.py +278 -0
- fast_agent/core/logging/transport.py +481 -0
- fast_agent/core/prompt.py +9 -0
- fast_agent/core/prompt_templates.py +183 -0
- fast_agent/core/validation.py +326 -0
- fast_agent/event_progress.py +62 -0
- fast_agent/history/history_exporter.py +49 -0
- fast_agent/human_input/__init__.py +47 -0
- fast_agent/human_input/elicitation_handler.py +123 -0
- fast_agent/human_input/elicitation_state.py +33 -0
- fast_agent/human_input/form_elements.py +59 -0
- fast_agent/human_input/form_fields.py +256 -0
- fast_agent/human_input/simple_form.py +113 -0
- fast_agent/human_input/types.py +40 -0
- fast_agent/interfaces.py +310 -0
- fast_agent/llm/__init__.py +9 -0
- fast_agent/llm/cancellation.py +22 -0
- fast_agent/llm/fastagent_llm.py +931 -0
- fast_agent/llm/internal/passthrough.py +161 -0
- fast_agent/llm/internal/playback.py +129 -0
- fast_agent/llm/internal/silent.py +41 -0
- fast_agent/llm/internal/slow.py +38 -0
- fast_agent/llm/memory.py +275 -0
- fast_agent/llm/model_database.py +490 -0
- fast_agent/llm/model_factory.py +388 -0
- fast_agent/llm/model_info.py +102 -0
- fast_agent/llm/prompt_utils.py +155 -0
- fast_agent/llm/provider/anthropic/anthropic_utils.py +84 -0
- fast_agent/llm/provider/anthropic/cache_planner.py +56 -0
- fast_agent/llm/provider/anthropic/llm_anthropic.py +796 -0
- fast_agent/llm/provider/anthropic/multipart_converter_anthropic.py +462 -0
- fast_agent/llm/provider/bedrock/bedrock_utils.py +218 -0
- fast_agent/llm/provider/bedrock/llm_bedrock.py +2207 -0
- fast_agent/llm/provider/bedrock/multipart_converter_bedrock.py +84 -0
- fast_agent/llm/provider/google/google_converter.py +466 -0
- fast_agent/llm/provider/google/llm_google_native.py +681 -0
- fast_agent/llm/provider/openai/llm_aliyun.py +31 -0
- fast_agent/llm/provider/openai/llm_azure.py +143 -0
- fast_agent/llm/provider/openai/llm_deepseek.py +76 -0
- fast_agent/llm/provider/openai/llm_generic.py +35 -0
- fast_agent/llm/provider/openai/llm_google_oai.py +32 -0
- fast_agent/llm/provider/openai/llm_groq.py +42 -0
- fast_agent/llm/provider/openai/llm_huggingface.py +85 -0
- fast_agent/llm/provider/openai/llm_openai.py +1195 -0
- fast_agent/llm/provider/openai/llm_openai_compatible.py +138 -0
- fast_agent/llm/provider/openai/llm_openrouter.py +45 -0
- fast_agent/llm/provider/openai/llm_tensorzero_openai.py +128 -0
- fast_agent/llm/provider/openai/llm_xai.py +38 -0
- fast_agent/llm/provider/openai/multipart_converter_openai.py +561 -0
- fast_agent/llm/provider/openai/openai_multipart.py +169 -0
- fast_agent/llm/provider/openai/openai_utils.py +67 -0
- fast_agent/llm/provider/openai/responses.py +133 -0
- fast_agent/llm/provider_key_manager.py +139 -0
- fast_agent/llm/provider_types.py +34 -0
- fast_agent/llm/request_params.py +61 -0
- fast_agent/llm/sampling_converter.py +98 -0
- fast_agent/llm/stream_types.py +9 -0
- fast_agent/llm/usage_tracking.py +445 -0
- fast_agent/mcp/__init__.py +56 -0
- fast_agent/mcp/common.py +26 -0
- fast_agent/mcp/elicitation_factory.py +84 -0
- fast_agent/mcp/elicitation_handlers.py +164 -0
- fast_agent/mcp/gen_client.py +83 -0
- fast_agent/mcp/helpers/__init__.py +36 -0
- fast_agent/mcp/helpers/content_helpers.py +352 -0
- fast_agent/mcp/helpers/server_config_helpers.py +25 -0
- fast_agent/mcp/hf_auth.py +147 -0
- fast_agent/mcp/interfaces.py +92 -0
- fast_agent/mcp/logger_textio.py +108 -0
- fast_agent/mcp/mcp_agent_client_session.py +411 -0
- fast_agent/mcp/mcp_aggregator.py +2175 -0
- fast_agent/mcp/mcp_connection_manager.py +723 -0
- fast_agent/mcp/mcp_content.py +262 -0
- fast_agent/mcp/mime_utils.py +108 -0
- fast_agent/mcp/oauth_client.py +509 -0
- fast_agent/mcp/prompt.py +159 -0
- fast_agent/mcp/prompt_message_extended.py +155 -0
- fast_agent/mcp/prompt_render.py +84 -0
- fast_agent/mcp/prompt_serialization.py +580 -0
- fast_agent/mcp/prompts/__init__.py +0 -0
- fast_agent/mcp/prompts/__main__.py +7 -0
- fast_agent/mcp/prompts/prompt_constants.py +18 -0
- fast_agent/mcp/prompts/prompt_helpers.py +238 -0
- fast_agent/mcp/prompts/prompt_load.py +186 -0
- fast_agent/mcp/prompts/prompt_server.py +552 -0
- fast_agent/mcp/prompts/prompt_template.py +438 -0
- fast_agent/mcp/resource_utils.py +215 -0
- fast_agent/mcp/sampling.py +200 -0
- fast_agent/mcp/server/__init__.py +4 -0
- fast_agent/mcp/server/agent_server.py +613 -0
- fast_agent/mcp/skybridge.py +44 -0
- fast_agent/mcp/sse_tracking.py +287 -0
- fast_agent/mcp/stdio_tracking_simple.py +59 -0
- fast_agent/mcp/streamable_http_tracking.py +309 -0
- fast_agent/mcp/tool_execution_handler.py +137 -0
- fast_agent/mcp/tool_permission_handler.py +88 -0
- fast_agent/mcp/transport_tracking.py +634 -0
- fast_agent/mcp/types.py +24 -0
- fast_agent/mcp/ui_agent.py +48 -0
- fast_agent/mcp/ui_mixin.py +209 -0
- fast_agent/mcp_server_registry.py +89 -0
- fast_agent/py.typed +0 -0
- fast_agent/resources/examples/data-analysis/analysis-campaign.py +189 -0
- fast_agent/resources/examples/data-analysis/analysis.py +68 -0
- fast_agent/resources/examples/data-analysis/fastagent.config.yaml +41 -0
- fast_agent/resources/examples/data-analysis/mount-point/WA_Fn-UseC_-HR-Employee-Attrition.csv +1471 -0
- fast_agent/resources/examples/mcp/elicitations/elicitation_account_server.py +88 -0
- fast_agent/resources/examples/mcp/elicitations/elicitation_forms_server.py +297 -0
- fast_agent/resources/examples/mcp/elicitations/elicitation_game_server.py +164 -0
- fast_agent/resources/examples/mcp/elicitations/fastagent.config.yaml +35 -0
- fast_agent/resources/examples/mcp/elicitations/fastagent.secrets.yaml.example +17 -0
- fast_agent/resources/examples/mcp/elicitations/forms_demo.py +107 -0
- fast_agent/resources/examples/mcp/elicitations/game_character.py +65 -0
- fast_agent/resources/examples/mcp/elicitations/game_character_handler.py +256 -0
- fast_agent/resources/examples/mcp/elicitations/tool_call.py +21 -0
- fast_agent/resources/examples/mcp/state-transfer/agent_one.py +18 -0
- fast_agent/resources/examples/mcp/state-transfer/agent_two.py +18 -0
- fast_agent/resources/examples/mcp/state-transfer/fastagent.config.yaml +27 -0
- fast_agent/resources/examples/mcp/state-transfer/fastagent.secrets.yaml.example +15 -0
- fast_agent/resources/examples/researcher/fastagent.config.yaml +61 -0
- fast_agent/resources/examples/researcher/researcher-eval.py +53 -0
- fast_agent/resources/examples/researcher/researcher-imp.py +189 -0
- fast_agent/resources/examples/researcher/researcher.py +36 -0
- fast_agent/resources/examples/tensorzero/.env.sample +2 -0
- fast_agent/resources/examples/tensorzero/Makefile +31 -0
- fast_agent/resources/examples/tensorzero/README.md +56 -0
- fast_agent/resources/examples/tensorzero/agent.py +35 -0
- fast_agent/resources/examples/tensorzero/demo_images/clam.jpg +0 -0
- fast_agent/resources/examples/tensorzero/demo_images/crab.png +0 -0
- fast_agent/resources/examples/tensorzero/demo_images/shrimp.png +0 -0
- fast_agent/resources/examples/tensorzero/docker-compose.yml +105 -0
- fast_agent/resources/examples/tensorzero/fastagent.config.yaml +19 -0
- fast_agent/resources/examples/tensorzero/image_demo.py +67 -0
- fast_agent/resources/examples/tensorzero/mcp_server/Dockerfile +25 -0
- fast_agent/resources/examples/tensorzero/mcp_server/entrypoint.sh +35 -0
- fast_agent/resources/examples/tensorzero/mcp_server/mcp_server.py +31 -0
- fast_agent/resources/examples/tensorzero/mcp_server/pyproject.toml +11 -0
- fast_agent/resources/examples/tensorzero/simple_agent.py +25 -0
- fast_agent/resources/examples/tensorzero/tensorzero_config/system_schema.json +29 -0
- fast_agent/resources/examples/tensorzero/tensorzero_config/system_template.minijinja +11 -0
- fast_agent/resources/examples/tensorzero/tensorzero_config/tensorzero.toml +35 -0
- fast_agent/resources/examples/workflows/agents_as_tools_extended.py +73 -0
- fast_agent/resources/examples/workflows/agents_as_tools_simple.py +50 -0
- fast_agent/resources/examples/workflows/chaining.py +37 -0
- fast_agent/resources/examples/workflows/evaluator.py +77 -0
- fast_agent/resources/examples/workflows/fastagent.config.yaml +26 -0
- fast_agent/resources/examples/workflows/graded_report.md +89 -0
- fast_agent/resources/examples/workflows/human_input.py +28 -0
- fast_agent/resources/examples/workflows/maker.py +156 -0
- fast_agent/resources/examples/workflows/orchestrator.py +70 -0
- fast_agent/resources/examples/workflows/parallel.py +56 -0
- fast_agent/resources/examples/workflows/router.py +69 -0
- fast_agent/resources/examples/workflows/short_story.md +13 -0
- fast_agent/resources/examples/workflows/short_story.txt +19 -0
- fast_agent/resources/setup/.gitignore +30 -0
- fast_agent/resources/setup/agent.py +28 -0
- fast_agent/resources/setup/fastagent.config.yaml +65 -0
- fast_agent/resources/setup/fastagent.secrets.yaml.example +38 -0
- fast_agent/resources/setup/pyproject.toml.tmpl +23 -0
- fast_agent/skills/__init__.py +9 -0
- fast_agent/skills/registry.py +235 -0
- fast_agent/tools/elicitation.py +369 -0
- fast_agent/tools/shell_runtime.py +402 -0
- fast_agent/types/__init__.py +59 -0
- fast_agent/types/conversation_summary.py +294 -0
- fast_agent/types/llm_stop_reason.py +78 -0
- fast_agent/types/message_search.py +249 -0
- fast_agent/ui/__init__.py +38 -0
- fast_agent/ui/console.py +59 -0
- fast_agent/ui/console_display.py +1080 -0
- fast_agent/ui/elicitation_form.py +946 -0
- fast_agent/ui/elicitation_style.py +59 -0
- fast_agent/ui/enhanced_prompt.py +1400 -0
- fast_agent/ui/history_display.py +734 -0
- fast_agent/ui/interactive_prompt.py +1199 -0
- fast_agent/ui/markdown_helpers.py +104 -0
- fast_agent/ui/markdown_truncator.py +1004 -0
- fast_agent/ui/mcp_display.py +857 -0
- fast_agent/ui/mcp_ui_utils.py +235 -0
- fast_agent/ui/mermaid_utils.py +169 -0
- fast_agent/ui/message_primitives.py +50 -0
- fast_agent/ui/notification_tracker.py +205 -0
- fast_agent/ui/plain_text_truncator.py +68 -0
- fast_agent/ui/progress_display.py +10 -0
- fast_agent/ui/rich_progress.py +195 -0
- fast_agent/ui/streaming.py +774 -0
- fast_agent/ui/streaming_buffer.py +449 -0
- fast_agent/ui/tool_display.py +422 -0
- fast_agent/ui/usage_display.py +204 -0
- fast_agent/utils/__init__.py +5 -0
- fast_agent/utils/reasoning_stream_parser.py +77 -0
- fast_agent/utils/time.py +22 -0
- fast_agent/workflow_telemetry.py +261 -0
- fast_agent_mcp-0.4.7.dist-info/METADATA +788 -0
- fast_agent_mcp-0.4.7.dist-info/RECORD +261 -0
- fast_agent_mcp-0.4.7.dist-info/WHEEL +4 -0
- fast_agent_mcp-0.4.7.dist-info/entry_points.txt +7 -0
- fast_agent_mcp-0.4.7.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, Type, Union, cast
|
|
4
|
+
|
|
5
|
+
from anthropic import APIError, AsyncAnthropic, AuthenticationError
|
|
6
|
+
from anthropic.lib.streaming import AsyncMessageStream
|
|
7
|
+
from anthropic.types import (
|
|
8
|
+
Message,
|
|
9
|
+
MessageParam,
|
|
10
|
+
TextBlock,
|
|
11
|
+
TextBlockParam,
|
|
12
|
+
ToolParam,
|
|
13
|
+
ToolUseBlock,
|
|
14
|
+
ToolUseBlockParam,
|
|
15
|
+
Usage,
|
|
16
|
+
)
|
|
17
|
+
from mcp import Tool
|
|
18
|
+
from mcp.types import (
|
|
19
|
+
CallToolRequest,
|
|
20
|
+
CallToolRequestParams,
|
|
21
|
+
CallToolResult,
|
|
22
|
+
ContentBlock,
|
|
23
|
+
TextContent,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from fast_agent.constants import FAST_AGENT_ERROR_CHANNEL
|
|
27
|
+
from fast_agent.core.exceptions import ProviderKeyError
|
|
28
|
+
from fast_agent.core.logging.logger import get_logger
|
|
29
|
+
from fast_agent.core.prompt import Prompt
|
|
30
|
+
from fast_agent.event_progress import ProgressAction
|
|
31
|
+
from fast_agent.interfaces import ModelT
|
|
32
|
+
from fast_agent.llm.fastagent_llm import (
|
|
33
|
+
FastAgentLLM,
|
|
34
|
+
RequestParams,
|
|
35
|
+
)
|
|
36
|
+
from fast_agent.llm.provider.anthropic.cache_planner import AnthropicCachePlanner
|
|
37
|
+
from fast_agent.llm.provider.anthropic.multipart_converter_anthropic import (
|
|
38
|
+
AnthropicConverter,
|
|
39
|
+
)
|
|
40
|
+
from fast_agent.llm.provider_types import Provider
|
|
41
|
+
from fast_agent.llm.usage_tracking import TurnUsage
|
|
42
|
+
from fast_agent.mcp.helpers.content_helpers import text_content
|
|
43
|
+
from fast_agent.types import PromptMessageExtended
|
|
44
|
+
from fast_agent.types.llm_stop_reason import LlmStopReason
|
|
45
|
+
|
|
46
|
+
DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-0"
|
|
47
|
+
STRUCTURED_OUTPUT_TOOL_NAME = "return_structured_output"
|
|
48
|
+
|
|
49
|
+
# Type alias for system field - can be string or list of text blocks with cache control
|
|
50
|
+
SystemParam = Union[str, list[TextBlockParam]]
|
|
51
|
+
|
|
52
|
+
logger = get_logger(__name__)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class AnthropicLLM(FastAgentLLM[MessageParam, Message]):
|
|
56
|
+
CONVERSATION_CACHE_WALK_DISTANCE = 6
|
|
57
|
+
MAX_CONVERSATION_CACHE_BLOCKS = 2
|
|
58
|
+
# Anthropic-specific parameter exclusions
|
|
59
|
+
ANTHROPIC_EXCLUDE_FIELDS = {
|
|
60
|
+
FastAgentLLM.PARAM_MESSAGES,
|
|
61
|
+
FastAgentLLM.PARAM_MODEL,
|
|
62
|
+
FastAgentLLM.PARAM_SYSTEM_PROMPT,
|
|
63
|
+
FastAgentLLM.PARAM_STOP_SEQUENCES,
|
|
64
|
+
FastAgentLLM.PARAM_MAX_TOKENS,
|
|
65
|
+
FastAgentLLM.PARAM_METADATA,
|
|
66
|
+
FastAgentLLM.PARAM_USE_HISTORY,
|
|
67
|
+
FastAgentLLM.PARAM_MAX_ITERATIONS,
|
|
68
|
+
FastAgentLLM.PARAM_PARALLEL_TOOL_CALLS,
|
|
69
|
+
FastAgentLLM.PARAM_TEMPLATE_VARS,
|
|
70
|
+
FastAgentLLM.PARAM_MCP_METADATA,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
74
|
+
# Initialize logger - keep it simple without name reference
|
|
75
|
+
super().__init__(*args, provider=Provider.ANTHROPIC, **kwargs)
|
|
76
|
+
|
|
77
|
+
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
|
78
|
+
"""Initialize Anthropic-specific default parameters"""
|
|
79
|
+
# Get base defaults from parent (includes ModelDatabase lookup)
|
|
80
|
+
base_params = super()._initialize_default_params(kwargs)
|
|
81
|
+
|
|
82
|
+
# Override with Anthropic-specific settings
|
|
83
|
+
chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
|
|
84
|
+
base_params.model = chosen_model
|
|
85
|
+
|
|
86
|
+
return base_params
|
|
87
|
+
|
|
88
|
+
def _base_url(self) -> str | None:
|
|
89
|
+
assert self.context.config
|
|
90
|
+
return self.context.config.anthropic.base_url if self.context.config.anthropic else None
|
|
91
|
+
|
|
92
|
+
def _get_cache_mode(self) -> str:
|
|
93
|
+
"""Get the cache mode configuration."""
|
|
94
|
+
cache_mode = "auto" # Default to auto
|
|
95
|
+
if self.context.config and self.context.config.anthropic:
|
|
96
|
+
cache_mode = self.context.config.anthropic.cache_mode
|
|
97
|
+
return cache_mode
|
|
98
|
+
|
|
99
|
+
async def _prepare_tools(
|
|
100
|
+
self, structured_model: Type[ModelT] | None = None, tools: list[Tool] | None = None
|
|
101
|
+
) -> list[ToolParam]:
|
|
102
|
+
"""Prepare tools based on whether we're in structured output mode."""
|
|
103
|
+
if structured_model:
|
|
104
|
+
return [
|
|
105
|
+
ToolParam(
|
|
106
|
+
name=STRUCTURED_OUTPUT_TOOL_NAME,
|
|
107
|
+
description="Return the response in the required JSON format",
|
|
108
|
+
input_schema=structured_model.model_json_schema(),
|
|
109
|
+
)
|
|
110
|
+
]
|
|
111
|
+
else:
|
|
112
|
+
# Regular mode - use tools from aggregator
|
|
113
|
+
return [
|
|
114
|
+
ToolParam(
|
|
115
|
+
name=tool.name,
|
|
116
|
+
description=tool.description or "",
|
|
117
|
+
input_schema=tool.inputSchema,
|
|
118
|
+
)
|
|
119
|
+
for tool in tools or []
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
def _apply_system_cache(self, base_args: dict, cache_mode: str) -> int:
|
|
123
|
+
"""Apply cache control to system prompt if cache mode allows it."""
|
|
124
|
+
system_content: SystemParam | None = base_args.get("system")
|
|
125
|
+
|
|
126
|
+
if cache_mode != "off" and system_content:
|
|
127
|
+
# Convert string to list format with cache control
|
|
128
|
+
if isinstance(system_content, str):
|
|
129
|
+
base_args["system"] = [
|
|
130
|
+
TextBlockParam(
|
|
131
|
+
type="text", text=system_content, cache_control={"type": "ephemeral"}
|
|
132
|
+
)
|
|
133
|
+
]
|
|
134
|
+
logger.debug(
|
|
135
|
+
"Applied cache_control to system prompt (caches tools+system in one block)"
|
|
136
|
+
)
|
|
137
|
+
return 1
|
|
138
|
+
# If it's already a list (shouldn't happen in current flow but type-safe)
|
|
139
|
+
elif isinstance(system_content, list):
|
|
140
|
+
logger.debug("System prompt already in list format")
|
|
141
|
+
else:
|
|
142
|
+
logger.debug(f"Unexpected system prompt type: {type(system_content)}")
|
|
143
|
+
|
|
144
|
+
return 0
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def _apply_cache_control_to_message(message: MessageParam) -> bool:
|
|
148
|
+
"""Apply cache control to the last content block of a message."""
|
|
149
|
+
if not isinstance(message, dict) or "content" not in message:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
content_list = message["content"]
|
|
153
|
+
if not isinstance(content_list, list) or not content_list:
|
|
154
|
+
return False
|
|
155
|
+
|
|
156
|
+
for content_block in reversed(content_list):
|
|
157
|
+
if isinstance(content_block, dict):
|
|
158
|
+
content_block["cache_control"] = {"type": "ephemeral"}
|
|
159
|
+
return True
|
|
160
|
+
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
def _is_structured_output_request(self, tool_uses: list[Any]) -> bool:
|
|
164
|
+
"""
|
|
165
|
+
Check if the tool uses contain a structured output request.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
tool_uses: List of tool use blocks from the response
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
True if any tool is the structured output tool
|
|
172
|
+
"""
|
|
173
|
+
return any(tool.name == STRUCTURED_OUTPUT_TOOL_NAME for tool in tool_uses)
|
|
174
|
+
|
|
175
|
+
def _build_tool_calls_dict(self, tool_uses: list[ToolUseBlock]) -> dict[str, CallToolRequest]:
|
|
176
|
+
"""
|
|
177
|
+
Convert Anthropic tool use blocks into our CallToolRequest.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
tool_uses: List of tool use blocks from Anthropic response
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Dictionary mapping tool_use_id to CallToolRequest objects
|
|
184
|
+
"""
|
|
185
|
+
tool_calls = {}
|
|
186
|
+
for tool_use in tool_uses:
|
|
187
|
+
tool_call = CallToolRequest(
|
|
188
|
+
method="tools/call",
|
|
189
|
+
params=CallToolRequestParams(
|
|
190
|
+
name=tool_use.name,
|
|
191
|
+
arguments=cast("dict[str, Any] | None", tool_use.input),
|
|
192
|
+
),
|
|
193
|
+
)
|
|
194
|
+
tool_calls[tool_use.id] = tool_call
|
|
195
|
+
return tool_calls
|
|
196
|
+
|
|
197
|
+
async def _handle_structured_output_response(
|
|
198
|
+
self,
|
|
199
|
+
tool_use_block: ToolUseBlock,
|
|
200
|
+
structured_model: Type[ModelT],
|
|
201
|
+
messages: list[MessageParam],
|
|
202
|
+
) -> tuple[LlmStopReason, list[ContentBlock]]:
|
|
203
|
+
"""
|
|
204
|
+
Handle a structured output tool response from Anthropic.
|
|
205
|
+
|
|
206
|
+
This handles the special case where Anthropic's model was forced to use
|
|
207
|
+
a 'return_structured_output' tool via tool_choice. The tool input contains
|
|
208
|
+
the JSON data we want, so we extract it and format it for display.
|
|
209
|
+
|
|
210
|
+
Even though we don't call an external tool, we must create a CallToolResult
|
|
211
|
+
to satisfy Anthropic's API requirement that every tool_use has a corresponding
|
|
212
|
+
tool_result in the next message.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
tool_use_block: The tool use block containing structured output
|
|
216
|
+
structured_model: The model class for structured output
|
|
217
|
+
messages: The message list to append tool results to
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Tuple of (stop_reason, response_content_blocks)
|
|
221
|
+
"""
|
|
222
|
+
tool_args = tool_use_block.input
|
|
223
|
+
tool_use_id = tool_use_block.id
|
|
224
|
+
|
|
225
|
+
# Create the content for responses
|
|
226
|
+
structured_content = TextContent(type="text", text=json.dumps(tool_args))
|
|
227
|
+
|
|
228
|
+
tool_result = CallToolResult(isError=False, content=[structured_content])
|
|
229
|
+
messages.append(
|
|
230
|
+
AnthropicConverter.create_tool_results_message([(tool_use_id, tool_result)])
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
logger.debug("Structured output received, treating as END_TURN")
|
|
234
|
+
|
|
235
|
+
return LlmStopReason.END_TURN, [structured_content]
|
|
236
|
+
|
|
237
|
+
async def _process_stream(
|
|
238
|
+
self,
|
|
239
|
+
stream: AsyncMessageStream,
|
|
240
|
+
model: str,
|
|
241
|
+
) -> Message:
|
|
242
|
+
"""Process the streaming response and display real-time token usage."""
|
|
243
|
+
# Track estimated output tokens by counting text chunks
|
|
244
|
+
estimated_tokens = 0
|
|
245
|
+
tool_streams: dict[int, dict[str, Any]] = {}
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
# Process the raw event stream to get token counts
|
|
249
|
+
# Cancellation is handled via asyncio.Task.cancel() which raises CancelledError
|
|
250
|
+
async for event in stream:
|
|
251
|
+
if (
|
|
252
|
+
event.type == "content_block_start"
|
|
253
|
+
and hasattr(event, "content_block")
|
|
254
|
+
and getattr(event.content_block, "type", None) == "tool_use"
|
|
255
|
+
):
|
|
256
|
+
content_block = event.content_block
|
|
257
|
+
tool_streams[event.index] = {
|
|
258
|
+
"name": content_block.name,
|
|
259
|
+
"id": content_block.id,
|
|
260
|
+
"buffer": [],
|
|
261
|
+
}
|
|
262
|
+
self._notify_tool_stream_listeners(
|
|
263
|
+
"start",
|
|
264
|
+
{
|
|
265
|
+
"tool_name": content_block.name,
|
|
266
|
+
"tool_use_id": content_block.id,
|
|
267
|
+
"index": event.index,
|
|
268
|
+
"streams_arguments": False, # Anthropic doesn't stream arguments
|
|
269
|
+
},
|
|
270
|
+
)
|
|
271
|
+
self.logger.info(
|
|
272
|
+
"Model started streaming tool input",
|
|
273
|
+
data={
|
|
274
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
275
|
+
"agent_name": self.name,
|
|
276
|
+
"model": model,
|
|
277
|
+
"tool_name": content_block.name,
|
|
278
|
+
"tool_use_id": content_block.id,
|
|
279
|
+
"tool_event": "start",
|
|
280
|
+
},
|
|
281
|
+
)
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
if (
|
|
285
|
+
event.type == "content_block_delta"
|
|
286
|
+
and hasattr(event, "delta")
|
|
287
|
+
and event.delta.type == "input_json_delta"
|
|
288
|
+
):
|
|
289
|
+
info = tool_streams.get(event.index)
|
|
290
|
+
if info is not None:
|
|
291
|
+
chunk = event.delta.partial_json or ""
|
|
292
|
+
info["buffer"].append(chunk)
|
|
293
|
+
preview = chunk if len(chunk) <= 80 else chunk[:77] + "..."
|
|
294
|
+
self._notify_tool_stream_listeners(
|
|
295
|
+
"delta",
|
|
296
|
+
{
|
|
297
|
+
"tool_name": info.get("name"),
|
|
298
|
+
"tool_use_id": info.get("id"),
|
|
299
|
+
"index": event.index,
|
|
300
|
+
"chunk": chunk,
|
|
301
|
+
"streams_arguments": False,
|
|
302
|
+
},
|
|
303
|
+
)
|
|
304
|
+
self.logger.debug(
|
|
305
|
+
"Streaming tool input delta",
|
|
306
|
+
data={
|
|
307
|
+
"tool_name": info.get("name"),
|
|
308
|
+
"tool_use_id": info.get("id"),
|
|
309
|
+
"chunk": preview,
|
|
310
|
+
},
|
|
311
|
+
)
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
if event.type == "content_block_stop" and event.index in tool_streams:
|
|
315
|
+
info = tool_streams.pop(event.index)
|
|
316
|
+
preview_raw = "".join(info.get("buffer", []))
|
|
317
|
+
if preview_raw:
|
|
318
|
+
preview = (
|
|
319
|
+
preview_raw if len(preview_raw) <= 120 else preview_raw[:117] + "..."
|
|
320
|
+
)
|
|
321
|
+
self.logger.debug(
|
|
322
|
+
"Completed tool input stream",
|
|
323
|
+
data={
|
|
324
|
+
"tool_name": info.get("name"),
|
|
325
|
+
"tool_use_id": info.get("id"),
|
|
326
|
+
"input_preview": preview,
|
|
327
|
+
},
|
|
328
|
+
)
|
|
329
|
+
self._notify_tool_stream_listeners(
|
|
330
|
+
"stop",
|
|
331
|
+
{
|
|
332
|
+
"tool_name": info.get("name"),
|
|
333
|
+
"tool_use_id": info.get("id"),
|
|
334
|
+
"index": event.index,
|
|
335
|
+
"streams_arguments": False,
|
|
336
|
+
},
|
|
337
|
+
)
|
|
338
|
+
self.logger.info(
|
|
339
|
+
"Model finished streaming tool input",
|
|
340
|
+
data={
|
|
341
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
342
|
+
"agent_name": self.name,
|
|
343
|
+
"model": model,
|
|
344
|
+
"tool_name": info.get("name"),
|
|
345
|
+
"tool_use_id": info.get("id"),
|
|
346
|
+
"tool_event": "stop",
|
|
347
|
+
},
|
|
348
|
+
)
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
# Count tokens in real-time from content_block_delta events
|
|
352
|
+
if (
|
|
353
|
+
event.type == "content_block_delta"
|
|
354
|
+
and hasattr(event, "delta")
|
|
355
|
+
and event.delta.type == "text_delta"
|
|
356
|
+
):
|
|
357
|
+
# Use base class method for token estimation and progress emission
|
|
358
|
+
estimated_tokens = self._update_streaming_progress(
|
|
359
|
+
event.delta.text, model, estimated_tokens
|
|
360
|
+
)
|
|
361
|
+
self._notify_tool_stream_listeners(
|
|
362
|
+
"text",
|
|
363
|
+
{
|
|
364
|
+
"chunk": event.delta.text,
|
|
365
|
+
"index": event.index,
|
|
366
|
+
"streams_arguments": False,
|
|
367
|
+
},
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Also check for final message_delta events with actual usage info
|
|
371
|
+
elif (
|
|
372
|
+
event.type == "message_delta"
|
|
373
|
+
and hasattr(event, "usage")
|
|
374
|
+
and event.usage.output_tokens
|
|
375
|
+
):
|
|
376
|
+
actual_tokens = event.usage.output_tokens
|
|
377
|
+
# Emit final progress with actual token count
|
|
378
|
+
token_str = str(actual_tokens).rjust(5)
|
|
379
|
+
data = {
|
|
380
|
+
"progress_action": ProgressAction.STREAMING,
|
|
381
|
+
"model": model,
|
|
382
|
+
"agent_name": self.name,
|
|
383
|
+
"chat_turn": self.chat_turn(),
|
|
384
|
+
"details": token_str.strip(),
|
|
385
|
+
}
|
|
386
|
+
logger.info("Streaming progress", data=data)
|
|
387
|
+
|
|
388
|
+
# Get the final message with complete usage data
|
|
389
|
+
message = await stream.get_final_message()
|
|
390
|
+
|
|
391
|
+
# Log final usage information
|
|
392
|
+
if hasattr(message, "usage") and message.usage:
|
|
393
|
+
logger.info(
|
|
394
|
+
f"Streaming complete - Model: {model}, Input tokens: {message.usage.input_tokens}, Output tokens: {message.usage.output_tokens}"
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
return message
|
|
398
|
+
except APIError as error:
|
|
399
|
+
logger.error("Streaming APIError during Anthropic completion", exc_info=error)
|
|
400
|
+
raise # Re-raise to be handled by _anthropic_completion
|
|
401
|
+
except Exception as error:
|
|
402
|
+
logger.error("Unexpected error during Anthropic stream processing", exc_info=error)
|
|
403
|
+
# Convert to APIError for consistent handling
|
|
404
|
+
raise APIError(f"Stream processing error: {str(error)}") from error
|
|
405
|
+
|
|
406
|
+
def _stream_failure_response(self, error: APIError, model_name: str) -> PromptMessageExtended:
|
|
407
|
+
"""Convert streaming API errors into a graceful assistant reply."""
|
|
408
|
+
|
|
409
|
+
provider_label = (
|
|
410
|
+
self.provider.value if isinstance(self.provider, Provider) else str(self.provider)
|
|
411
|
+
)
|
|
412
|
+
detail = getattr(error, "message", None) or str(error)
|
|
413
|
+
detail = detail.strip() if isinstance(detail, str) else ""
|
|
414
|
+
|
|
415
|
+
parts: list[str] = [f"{provider_label} request failed"]
|
|
416
|
+
if model_name:
|
|
417
|
+
parts.append(f"for model '{model_name}'")
|
|
418
|
+
code = getattr(error, "code", None)
|
|
419
|
+
if code:
|
|
420
|
+
parts.append(f"(code: {code})")
|
|
421
|
+
status = getattr(error, "status_code", None)
|
|
422
|
+
if status:
|
|
423
|
+
parts.append(f"(status={status})")
|
|
424
|
+
|
|
425
|
+
message = " ".join(parts)
|
|
426
|
+
if detail:
|
|
427
|
+
message = f"{message}: {detail}"
|
|
428
|
+
|
|
429
|
+
user_summary = " ".join(message.split()) if message else ""
|
|
430
|
+
if user_summary and len(user_summary) > 280:
|
|
431
|
+
user_summary = user_summary[:277].rstrip() + "..."
|
|
432
|
+
|
|
433
|
+
if user_summary:
|
|
434
|
+
assistant_text = f"I hit an internal error while calling the model: {user_summary}"
|
|
435
|
+
if not assistant_text.endswith((".", "!", "?")):
|
|
436
|
+
assistant_text += "."
|
|
437
|
+
assistant_text += " See fast-agent-error for additional details."
|
|
438
|
+
else:
|
|
439
|
+
assistant_text = (
|
|
440
|
+
"I hit an internal error while calling the model; see fast-agent-error for details."
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
assistant_block = text_content(assistant_text)
|
|
444
|
+
error_block = text_content(message)
|
|
445
|
+
|
|
446
|
+
return PromptMessageExtended(
|
|
447
|
+
role="assistant",
|
|
448
|
+
content=[assistant_block],
|
|
449
|
+
channels={FAST_AGENT_ERROR_CHANNEL: [error_block]},
|
|
450
|
+
stop_reason=LlmStopReason.ERROR,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
def _handle_retry_failure(self, error: Exception) -> PromptMessageExtended | None:
|
|
454
|
+
"""Return the legacy error-channel response when retries are exhausted."""
|
|
455
|
+
if isinstance(error, APIError):
|
|
456
|
+
model_name = self.default_request_params.model or DEFAULT_ANTHROPIC_MODEL
|
|
457
|
+
return self._stream_failure_response(error, model_name)
|
|
458
|
+
return None
|
|
459
|
+
|
|
460
|
+
def _build_request_messages(
|
|
461
|
+
self,
|
|
462
|
+
params: RequestParams,
|
|
463
|
+
message_param: MessageParam,
|
|
464
|
+
pre_messages: list[MessageParam] | None = None,
|
|
465
|
+
history: list[PromptMessageExtended] | None = None,
|
|
466
|
+
) -> list[MessageParam]:
|
|
467
|
+
"""
|
|
468
|
+
Build the list of Anthropic message parameters for the next request.
|
|
469
|
+
|
|
470
|
+
Ensures that the current user message is only included once when history
|
|
471
|
+
is enabled, which prevents duplicate tool_result blocks from being sent.
|
|
472
|
+
"""
|
|
473
|
+
messages: list[MessageParam] = list(pre_messages) if pre_messages else []
|
|
474
|
+
|
|
475
|
+
history_messages: list[MessageParam] = []
|
|
476
|
+
if params.use_history and history:
|
|
477
|
+
history_messages = self._convert_to_provider_format(history)
|
|
478
|
+
messages.extend(history_messages)
|
|
479
|
+
|
|
480
|
+
include_current = not params.use_history or not history_messages
|
|
481
|
+
if include_current:
|
|
482
|
+
messages.append(message_param)
|
|
483
|
+
|
|
484
|
+
return messages
|
|
485
|
+
|
|
486
|
+
async def _anthropic_completion(
|
|
487
|
+
self,
|
|
488
|
+
message_param,
|
|
489
|
+
request_params: RequestParams | None = None,
|
|
490
|
+
structured_model: Type[ModelT] | None = None,
|
|
491
|
+
tools: list[Tool] | None = None,
|
|
492
|
+
pre_messages: list[MessageParam] | None = None,
|
|
493
|
+
history: list[PromptMessageExtended] | None = None,
|
|
494
|
+
current_extended: PromptMessageExtended | None = None,
|
|
495
|
+
) -> PromptMessageExtended:
|
|
496
|
+
"""
|
|
497
|
+
Process a query using an LLM and available tools.
|
|
498
|
+
Override this method to use a different LLM.
|
|
499
|
+
"""
|
|
500
|
+
|
|
501
|
+
api_key = self._api_key()
|
|
502
|
+
base_url = self._base_url()
|
|
503
|
+
if base_url and base_url.endswith("/v1"):
|
|
504
|
+
base_url = base_url.rstrip("/v1")
|
|
505
|
+
|
|
506
|
+
try:
|
|
507
|
+
anthropic = AsyncAnthropic(api_key=api_key, base_url=base_url)
|
|
508
|
+
params = self.get_request_params(request_params)
|
|
509
|
+
messages = self._build_request_messages(
|
|
510
|
+
params, message_param, pre_messages, history=history
|
|
511
|
+
)
|
|
512
|
+
except AuthenticationError as e:
|
|
513
|
+
raise ProviderKeyError(
|
|
514
|
+
"Invalid Anthropic API key",
|
|
515
|
+
"The configured Anthropic API key was rejected.\nPlease check that your API key is valid and not expired.",
|
|
516
|
+
) from e
|
|
517
|
+
|
|
518
|
+
# Get cache mode configuration
|
|
519
|
+
cache_mode = self._get_cache_mode()
|
|
520
|
+
logger.debug(f"Anthropic cache_mode: {cache_mode}")
|
|
521
|
+
|
|
522
|
+
available_tools = await self._prepare_tools(structured_model, tools)
|
|
523
|
+
|
|
524
|
+
response_content_blocks: list[ContentBlock] = []
|
|
525
|
+
tool_calls: dict[str, CallToolRequest] | None = None
|
|
526
|
+
model = self.default_request_params.model or DEFAULT_ANTHROPIC_MODEL
|
|
527
|
+
|
|
528
|
+
# Create base arguments dictionary
|
|
529
|
+
base_args = {
|
|
530
|
+
"model": model,
|
|
531
|
+
"messages": messages,
|
|
532
|
+
"stop_sequences": params.stopSequences,
|
|
533
|
+
"tools": available_tools,
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
if self.instruction or params.systemPrompt:
|
|
537
|
+
base_args["system"] = self.instruction or params.systemPrompt
|
|
538
|
+
|
|
539
|
+
if structured_model:
|
|
540
|
+
base_args["tool_choice"] = {"type": "tool", "name": STRUCTURED_OUTPUT_TOOL_NAME}
|
|
541
|
+
|
|
542
|
+
if params.maxTokens is not None:
|
|
543
|
+
base_args["max_tokens"] = params.maxTokens
|
|
544
|
+
|
|
545
|
+
self._log_chat_progress(self.chat_turn(), model=model)
|
|
546
|
+
# Use the base class method to prepare all arguments with Anthropic-specific exclusions
|
|
547
|
+
# Do this BEFORE applying cache control so metadata doesn't override cached fields
|
|
548
|
+
arguments = self.prepare_provider_arguments(
|
|
549
|
+
base_args, params, self.ANTHROPIC_EXCLUDE_FIELDS
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# Apply cache control to system prompt AFTER merging arguments
|
|
553
|
+
system_cache_applied = self._apply_system_cache(arguments, cache_mode)
|
|
554
|
+
|
|
555
|
+
# Apply cache_control markers using planner
|
|
556
|
+
planner = AnthropicCachePlanner(
|
|
557
|
+
self.CONVERSATION_CACHE_WALK_DISTANCE, self.MAX_CONVERSATION_CACHE_BLOCKS
|
|
558
|
+
)
|
|
559
|
+
plan_messages: list[PromptMessageExtended] = []
|
|
560
|
+
include_current = not params.use_history or not history
|
|
561
|
+
if params.use_history and history:
|
|
562
|
+
plan_messages.extend(history)
|
|
563
|
+
if include_current and current_extended:
|
|
564
|
+
plan_messages.append(current_extended)
|
|
565
|
+
|
|
566
|
+
cache_indices = planner.plan_indices(
|
|
567
|
+
plan_messages, cache_mode=cache_mode, system_cache_blocks=system_cache_applied
|
|
568
|
+
)
|
|
569
|
+
for idx in cache_indices:
|
|
570
|
+
if 0 <= idx < len(messages):
|
|
571
|
+
self._apply_cache_control_to_message(messages[idx])
|
|
572
|
+
|
|
573
|
+
logger.debug(f"{arguments}")
|
|
574
|
+
# Use streaming API with helper
|
|
575
|
+
try:
|
|
576
|
+
async with anthropic.messages.stream(**arguments) as stream:
|
|
577
|
+
# Process the stream
|
|
578
|
+
response = await self._process_stream(stream, model)
|
|
579
|
+
except asyncio.CancelledError as e:
|
|
580
|
+
reason = str(e) if e.args else "cancelled"
|
|
581
|
+
logger.info(f"Anthropic completion cancelled: {reason}")
|
|
582
|
+
# Return a response indicating cancellation
|
|
583
|
+
return Prompt.assistant(
|
|
584
|
+
TextContent(type="text", text=""),
|
|
585
|
+
stop_reason=LlmStopReason.CANCELLED,
|
|
586
|
+
)
|
|
587
|
+
except APIError as error:
|
|
588
|
+
logger.error("Streaming APIError during Anthropic completion", exc_info=error)
|
|
589
|
+
raise error
|
|
590
|
+
|
|
591
|
+
# Track usage if response is valid and has usage data
|
|
592
|
+
if (
|
|
593
|
+
hasattr(response, "usage")
|
|
594
|
+
and response.usage
|
|
595
|
+
and not isinstance(response, BaseException)
|
|
596
|
+
):
|
|
597
|
+
try:
|
|
598
|
+
turn_usage = TurnUsage.from_anthropic(
|
|
599
|
+
response.usage, model or DEFAULT_ANTHROPIC_MODEL
|
|
600
|
+
)
|
|
601
|
+
self._finalize_turn_usage(turn_usage)
|
|
602
|
+
except Exception as e:
|
|
603
|
+
logger.warning(f"Failed to track usage: {e}")
|
|
604
|
+
|
|
605
|
+
if isinstance(response, AuthenticationError):
|
|
606
|
+
raise ProviderKeyError(
|
|
607
|
+
"Invalid Anthropic API key",
|
|
608
|
+
"The configured Anthropic API key was rejected.\nPlease check that your API key is valid and not expired.",
|
|
609
|
+
) from response
|
|
610
|
+
elif isinstance(response, BaseException):
|
|
611
|
+
# This path shouldn't be reached anymore since we handle APIError above,
|
|
612
|
+
# but keeping for backward compatibility
|
|
613
|
+
logger.error(f"Unexpected error type: {type(response).__name__}", exc_info=response)
|
|
614
|
+
return self._stream_failure_response(
|
|
615
|
+
APIError(f"Unexpected error: {str(response)}"), model
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
logger.debug(
|
|
619
|
+
f"{model} response:",
|
|
620
|
+
data=response,
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
response_as_message = self.convert_message_to_message_param(response)
|
|
624
|
+
messages.append(response_as_message)
|
|
625
|
+
if response.content and response.content[0].type == "text":
|
|
626
|
+
response_content_blocks.append(TextContent(type="text", text=response.content[0].text))
|
|
627
|
+
|
|
628
|
+
stop_reason: LlmStopReason = LlmStopReason.END_TURN
|
|
629
|
+
|
|
630
|
+
match response.stop_reason:
|
|
631
|
+
case "stop_sequence":
|
|
632
|
+
stop_reason = LlmStopReason.STOP_SEQUENCE
|
|
633
|
+
case "max_tokens":
|
|
634
|
+
stop_reason = LlmStopReason.MAX_TOKENS
|
|
635
|
+
case "refusal":
|
|
636
|
+
stop_reason = LlmStopReason.SAFETY
|
|
637
|
+
case "pause":
|
|
638
|
+
stop_reason = LlmStopReason.PAUSE
|
|
639
|
+
case "tool_use":
|
|
640
|
+
stop_reason = LlmStopReason.TOOL_USE
|
|
641
|
+
tool_uses: list[ToolUseBlock] = [
|
|
642
|
+
c for c in response.content if c.type == "tool_use"
|
|
643
|
+
]
|
|
644
|
+
if structured_model and self._is_structured_output_request(tool_uses):
|
|
645
|
+
stop_reason, structured_blocks = await self._handle_structured_output_response(
|
|
646
|
+
tool_uses[0], structured_model, messages
|
|
647
|
+
)
|
|
648
|
+
response_content_blocks.extend(structured_blocks)
|
|
649
|
+
else:
|
|
650
|
+
tool_calls = self._build_tool_calls_dict(tool_uses)
|
|
651
|
+
|
|
652
|
+
# Update diagnostic snapshot (never read again)
|
|
653
|
+
# This provides a snapshot of what was sent to the provider for debugging
|
|
654
|
+
self.history.set(messages)
|
|
655
|
+
|
|
656
|
+
self._log_chat_finished(model=model)
|
|
657
|
+
|
|
658
|
+
return Prompt.assistant(
|
|
659
|
+
*response_content_blocks, stop_reason=stop_reason, tool_calls=tool_calls
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
async def _apply_prompt_provider_specific(
|
|
663
|
+
self,
|
|
664
|
+
multipart_messages: list["PromptMessageExtended"],
|
|
665
|
+
request_params: RequestParams | None = None,
|
|
666
|
+
tools: list[Tool] | None = None,
|
|
667
|
+
is_template: bool = False,
|
|
668
|
+
) -> PromptMessageExtended:
|
|
669
|
+
"""
|
|
670
|
+
Provider-specific prompt application.
|
|
671
|
+
Templates are handled by the agent; messages already include them.
|
|
672
|
+
"""
|
|
673
|
+
# Check the last message role
|
|
674
|
+
last_message = multipart_messages[-1]
|
|
675
|
+
|
|
676
|
+
if last_message.role == "user":
|
|
677
|
+
logger.debug("Last message in prompt is from user, generating assistant response")
|
|
678
|
+
message_param = AnthropicConverter.convert_to_anthropic(last_message)
|
|
679
|
+
# No need to pass pre_messages - conversion happens in _anthropic_completion
|
|
680
|
+
# via _convert_to_provider_format()
|
|
681
|
+
return await self._anthropic_completion(
|
|
682
|
+
message_param,
|
|
683
|
+
request_params,
|
|
684
|
+
tools=tools,
|
|
685
|
+
pre_messages=None,
|
|
686
|
+
history=multipart_messages,
|
|
687
|
+
current_extended=last_message,
|
|
688
|
+
)
|
|
689
|
+
else:
|
|
690
|
+
# For assistant messages: Return the last message content as text
|
|
691
|
+
logger.debug("Last message in prompt is from assistant, returning it directly")
|
|
692
|
+
return last_message
|
|
693
|
+
|
|
694
|
+
async def _apply_prompt_provider_specific_structured(
|
|
695
|
+
self,
|
|
696
|
+
multipart_messages: list[PromptMessageExtended],
|
|
697
|
+
model: Type[ModelT],
|
|
698
|
+
request_params: RequestParams | None = None,
|
|
699
|
+
) -> tuple[ModelT | None, PromptMessageExtended]: # noqa: F821
|
|
700
|
+
"""
|
|
701
|
+
Provider-specific structured output implementation.
|
|
702
|
+
Note: Message history is managed by base class and converted via
|
|
703
|
+
_convert_to_provider_format() on each call.
|
|
704
|
+
"""
|
|
705
|
+
request_params = self.get_request_params(request_params)
|
|
706
|
+
|
|
707
|
+
# Check the last message role
|
|
708
|
+
last_message = multipart_messages[-1]
|
|
709
|
+
|
|
710
|
+
if last_message.role == "user":
|
|
711
|
+
logger.debug("Last message in prompt is from user, generating structured response")
|
|
712
|
+
message_param = AnthropicConverter.convert_to_anthropic(last_message)
|
|
713
|
+
|
|
714
|
+
# Call _anthropic_completion with the structured model
|
|
715
|
+
result: PromptMessageExtended = await self._anthropic_completion(
|
|
716
|
+
message_param,
|
|
717
|
+
request_params,
|
|
718
|
+
structured_model=model,
|
|
719
|
+
history=multipart_messages,
|
|
720
|
+
current_extended=last_message,
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
for content in result.content:
|
|
724
|
+
if content.type == "text":
|
|
725
|
+
try:
|
|
726
|
+
data = json.loads(content.text)
|
|
727
|
+
parsed_model = model(**data)
|
|
728
|
+
return parsed_model, result
|
|
729
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
730
|
+
logger.error(f"Failed to parse structured output: {e}")
|
|
731
|
+
return None, result
|
|
732
|
+
|
|
733
|
+
# If no valid response found
|
|
734
|
+
return None, Prompt.assistant()
|
|
735
|
+
else:
|
|
736
|
+
# For assistant messages: Return the last message content
|
|
737
|
+
logger.debug("Last message in prompt is from assistant, returning it directly")
|
|
738
|
+
return None, last_message
|
|
739
|
+
|
|
740
|
+
def _convert_extended_messages_to_provider(
|
|
741
|
+
self, messages: list[PromptMessageExtended]
|
|
742
|
+
) -> list[MessageParam]:
|
|
743
|
+
"""
|
|
744
|
+
Convert PromptMessageExtended list to Anthropic MessageParam format.
|
|
745
|
+
This is called fresh on every API call from _convert_to_provider_format().
|
|
746
|
+
|
|
747
|
+
Args:
|
|
748
|
+
messages: List of PromptMessageExtended objects
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
List of Anthropic MessageParam objects
|
|
752
|
+
"""
|
|
753
|
+
return [AnthropicConverter.convert_to_anthropic(msg) for msg in messages]
|
|
754
|
+
|
|
755
|
+
@classmethod
|
|
756
|
+
def convert_message_to_message_param(cls, message: Message, **kwargs) -> MessageParam:
|
|
757
|
+
"""Convert a response object to an input parameter object to allow LLM calls to be chained."""
|
|
758
|
+
content = []
|
|
759
|
+
|
|
760
|
+
for content_block in message.content:
|
|
761
|
+
if content_block.type == "text":
|
|
762
|
+
content.append(TextBlock(type="text", text=content_block.text))
|
|
763
|
+
elif content_block.type == "tool_use":
|
|
764
|
+
content.append(
|
|
765
|
+
ToolUseBlockParam(
|
|
766
|
+
type="tool_use",
|
|
767
|
+
name=content_block.name,
|
|
768
|
+
input=content_block.input,
|
|
769
|
+
id=content_block.id,
|
|
770
|
+
)
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
return MessageParam(role="assistant", content=content, **kwargs)
|
|
774
|
+
|
|
775
|
+
def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
|
|
776
|
+
"""This is a debug routine, leaving in for convenience"""
|
|
777
|
+
# Print raw usage for debugging
|
|
778
|
+
print(f"\n=== USAGE DEBUG ({turn_usage.model}) ===")
|
|
779
|
+
print(f"Raw usage: {raw_usage}")
|
|
780
|
+
print(
|
|
781
|
+
f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
|
782
|
+
)
|
|
783
|
+
print(
|
|
784
|
+
f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
|
785
|
+
)
|
|
786
|
+
print(f"Effective input: {turn_usage.effective_input_tokens}")
|
|
787
|
+
print(
|
|
788
|
+
f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
|
789
|
+
)
|
|
790
|
+
if self.usage_accumulator.context_usage_percentage:
|
|
791
|
+
print(
|
|
792
|
+
f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
|
793
|
+
)
|
|
794
|
+
if self.usage_accumulator.cache_hit_rate:
|
|
795
|
+
print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
|
796
|
+
print("===========================\n")
|