fast-agent-mcp 0.2.57__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fast-agent-mcp might be problematic. Click here for more details.
- fast_agent/__init__.py +127 -0
- fast_agent/agents/__init__.py +36 -0
- {mcp_agent/core → fast_agent/agents}/agent_types.py +2 -1
- fast_agent/agents/llm_agent.py +217 -0
- fast_agent/agents/llm_decorator.py +486 -0
- mcp_agent/agents/base_agent.py → fast_agent/agents/mcp_agent.py +377 -385
- fast_agent/agents/tool_agent.py +168 -0
- {mcp_agent → fast_agent}/agents/workflow/chain_agent.py +43 -33
- {mcp_agent → fast_agent}/agents/workflow/evaluator_optimizer.py +31 -35
- {mcp_agent → fast_agent}/agents/workflow/iterative_planner.py +56 -47
- {mcp_agent → fast_agent}/agents/workflow/orchestrator_models.py +4 -4
- {mcp_agent → fast_agent}/agents/workflow/parallel_agent.py +34 -41
- {mcp_agent → fast_agent}/agents/workflow/router_agent.py +54 -39
- {mcp_agent → fast_agent}/cli/__main__.py +5 -3
- {mcp_agent → fast_agent}/cli/commands/check_config.py +95 -66
- {mcp_agent → fast_agent}/cli/commands/go.py +20 -11
- {mcp_agent → fast_agent}/cli/commands/quickstart.py +4 -4
- {mcp_agent → fast_agent}/cli/commands/server_helpers.py +1 -1
- {mcp_agent → fast_agent}/cli/commands/setup.py +64 -134
- {mcp_agent → fast_agent}/cli/commands/url_parser.py +9 -8
- {mcp_agent → fast_agent}/cli/main.py +36 -16
- {mcp_agent → fast_agent}/cli/terminal.py +2 -2
- {mcp_agent → fast_agent}/config.py +13 -2
- fast_agent/constants.py +8 -0
- {mcp_agent → fast_agent}/context.py +24 -19
- {mcp_agent → fast_agent}/context_dependent.py +9 -5
- fast_agent/core/__init__.py +17 -0
- {mcp_agent → fast_agent}/core/agent_app.py +39 -36
- fast_agent/core/core_app.py +135 -0
- {mcp_agent → fast_agent}/core/direct_decorators.py +12 -26
- {mcp_agent → fast_agent}/core/direct_factory.py +95 -73
- {mcp_agent → fast_agent/core}/executor/executor.py +4 -5
- {mcp_agent → fast_agent}/core/fastagent.py +32 -32
- fast_agent/core/logging/__init__.py +5 -0
- {mcp_agent → fast_agent/core}/logging/events.py +3 -3
- {mcp_agent → fast_agent/core}/logging/json_serializer.py +1 -1
- {mcp_agent → fast_agent/core}/logging/listeners.py +85 -7
- {mcp_agent → fast_agent/core}/logging/logger.py +7 -7
- {mcp_agent → fast_agent/core}/logging/transport.py +10 -11
- fast_agent/core/prompt.py +9 -0
- {mcp_agent → fast_agent}/core/validation.py +4 -4
- fast_agent/event_progress.py +61 -0
- fast_agent/history/history_exporter.py +44 -0
- {mcp_agent → fast_agent}/human_input/__init__.py +9 -12
- {mcp_agent → fast_agent}/human_input/elicitation_handler.py +26 -8
- {mcp_agent → fast_agent}/human_input/elicitation_state.py +7 -7
- {mcp_agent → fast_agent}/human_input/simple_form.py +6 -4
- {mcp_agent → fast_agent}/human_input/types.py +1 -18
- fast_agent/interfaces.py +228 -0
- fast_agent/llm/__init__.py +9 -0
- mcp_agent/llm/augmented_llm.py → fast_agent/llm/fastagent_llm.py +128 -218
- fast_agent/llm/internal/passthrough.py +137 -0
- mcp_agent/llm/augmented_llm_playback.py → fast_agent/llm/internal/playback.py +29 -25
- mcp_agent/llm/augmented_llm_silent.py → fast_agent/llm/internal/silent.py +10 -17
- fast_agent/llm/internal/slow.py +38 -0
- {mcp_agent → fast_agent}/llm/memory.py +40 -30
- {mcp_agent → fast_agent}/llm/model_database.py +35 -2
- {mcp_agent → fast_agent}/llm/model_factory.py +103 -77
- fast_agent/llm/model_info.py +126 -0
- {mcp_agent/llm/providers → fast_agent/llm/provider/anthropic}/anthropic_utils.py +7 -7
- fast_agent/llm/provider/anthropic/llm_anthropic.py +603 -0
- {mcp_agent/llm/providers → fast_agent/llm/provider/anthropic}/multipart_converter_anthropic.py +79 -86
- fast_agent/llm/provider/bedrock/bedrock_utils.py +218 -0
- fast_agent/llm/provider/bedrock/llm_bedrock.py +2192 -0
- {mcp_agent/llm/providers → fast_agent/llm/provider/google}/google_converter.py +66 -14
- fast_agent/llm/provider/google/llm_google_native.py +431 -0
- mcp_agent/llm/providers/augmented_llm_aliyun.py → fast_agent/llm/provider/openai/llm_aliyun.py +6 -7
- mcp_agent/llm/providers/augmented_llm_azure.py → fast_agent/llm/provider/openai/llm_azure.py +4 -4
- mcp_agent/llm/providers/augmented_llm_deepseek.py → fast_agent/llm/provider/openai/llm_deepseek.py +10 -11
- mcp_agent/llm/providers/augmented_llm_generic.py → fast_agent/llm/provider/openai/llm_generic.py +4 -4
- mcp_agent/llm/providers/augmented_llm_google_oai.py → fast_agent/llm/provider/openai/llm_google_oai.py +4 -4
- mcp_agent/llm/providers/augmented_llm_groq.py → fast_agent/llm/provider/openai/llm_groq.py +14 -16
- mcp_agent/llm/providers/augmented_llm_openai.py → fast_agent/llm/provider/openai/llm_openai.py +133 -206
- mcp_agent/llm/providers/augmented_llm_openrouter.py → fast_agent/llm/provider/openai/llm_openrouter.py +6 -6
- mcp_agent/llm/providers/augmented_llm_tensorzero_openai.py → fast_agent/llm/provider/openai/llm_tensorzero_openai.py +17 -16
- mcp_agent/llm/providers/augmented_llm_xai.py → fast_agent/llm/provider/openai/llm_xai.py +6 -6
- {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/multipart_converter_openai.py +125 -63
- {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/openai_multipart.py +12 -12
- {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/openai_utils.py +18 -16
- {mcp_agent → fast_agent}/llm/provider_key_manager.py +2 -2
- {mcp_agent → fast_agent}/llm/provider_types.py +2 -0
- {mcp_agent → fast_agent}/llm/sampling_converter.py +15 -12
- {mcp_agent → fast_agent}/llm/usage_tracking.py +23 -5
- fast_agent/mcp/__init__.py +43 -0
- {mcp_agent → fast_agent}/mcp/elicitation_factory.py +3 -3
- {mcp_agent → fast_agent}/mcp/elicitation_handlers.py +19 -10
- {mcp_agent → fast_agent}/mcp/gen_client.py +3 -3
- fast_agent/mcp/helpers/__init__.py +36 -0
- fast_agent/mcp/helpers/content_helpers.py +183 -0
- {mcp_agent → fast_agent}/mcp/helpers/server_config_helpers.py +8 -8
- {mcp_agent → fast_agent}/mcp/hf_auth.py +25 -23
- fast_agent/mcp/interfaces.py +93 -0
- {mcp_agent → fast_agent}/mcp/logger_textio.py +4 -4
- {mcp_agent → fast_agent}/mcp/mcp_agent_client_session.py +49 -44
- {mcp_agent → fast_agent}/mcp/mcp_aggregator.py +66 -115
- {mcp_agent → fast_agent}/mcp/mcp_connection_manager.py +16 -23
- {mcp_agent/core → fast_agent/mcp}/mcp_content.py +23 -15
- {mcp_agent → fast_agent}/mcp/mime_utils.py +39 -0
- fast_agent/mcp/prompt.py +159 -0
- mcp_agent/mcp/prompt_message_multipart.py → fast_agent/mcp/prompt_message_extended.py +27 -20
- {mcp_agent → fast_agent}/mcp/prompt_render.py +21 -19
- {mcp_agent → fast_agent}/mcp/prompt_serialization.py +46 -46
- fast_agent/mcp/prompts/__main__.py +7 -0
- {mcp_agent → fast_agent}/mcp/prompts/prompt_helpers.py +31 -30
- {mcp_agent → fast_agent}/mcp/prompts/prompt_load.py +8 -8
- {mcp_agent → fast_agent}/mcp/prompts/prompt_server.py +11 -19
- {mcp_agent → fast_agent}/mcp/prompts/prompt_template.py +18 -18
- {mcp_agent → fast_agent}/mcp/resource_utils.py +1 -1
- {mcp_agent → fast_agent}/mcp/sampling.py +31 -26
- {mcp_agent/mcp_server → fast_agent/mcp/server}/__init__.py +1 -1
- {mcp_agent/mcp_server → fast_agent/mcp/server}/agent_server.py +5 -6
- fast_agent/mcp/ui_agent.py +48 -0
- fast_agent/mcp/ui_mixin.py +209 -0
- fast_agent/mcp_server_registry.py +90 -0
- {mcp_agent → fast_agent}/resources/examples/data-analysis/analysis-campaign.py +5 -4
- {mcp_agent → fast_agent}/resources/examples/data-analysis/analysis.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_forms_server.py +25 -3
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/forms_demo.py +3 -3
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/game_character.py +2 -2
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/game_character_handler.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/tool_call.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/agent_one.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/agent_two.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/researcher/researcher-eval.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/researcher/researcher-imp.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/researcher/researcher.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/tensorzero/agent.py +2 -2
- {mcp_agent → fast_agent}/resources/examples/tensorzero/image_demo.py +3 -3
- {mcp_agent → fast_agent}/resources/examples/tensorzero/simple_agent.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/workflows/chaining.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/workflows/evaluator.py +3 -3
- {mcp_agent → fast_agent}/resources/examples/workflows/human_input.py +5 -3
- {mcp_agent → fast_agent}/resources/examples/workflows/orchestrator.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/workflows/parallel.py +2 -2
- {mcp_agent → fast_agent}/resources/examples/workflows/router.py +5 -2
- fast_agent/resources/setup/.gitignore +24 -0
- fast_agent/resources/setup/agent.py +18 -0
- fast_agent/resources/setup/fastagent.config.yaml +44 -0
- fast_agent/resources/setup/fastagent.secrets.yaml.example +38 -0
- fast_agent/tools/elicitation.py +369 -0
- fast_agent/types/__init__.py +32 -0
- fast_agent/types/llm_stop_reason.py +77 -0
- fast_agent/ui/__init__.py +38 -0
- fast_agent/ui/console_display.py +1005 -0
- {mcp_agent/human_input → fast_agent/ui}/elicitation_form.py +56 -39
- mcp_agent/human_input/elicitation_forms.py → fast_agent/ui/elicitation_style.py +1 -1
- {mcp_agent/core → fast_agent/ui}/enhanced_prompt.py +96 -25
- {mcp_agent/core → fast_agent/ui}/interactive_prompt.py +330 -125
- fast_agent/ui/mcp_ui_utils.py +224 -0
- {mcp_agent → fast_agent/ui}/progress_display.py +2 -2
- {mcp_agent/logging → fast_agent/ui}/rich_progress.py +4 -4
- {mcp_agent/core → fast_agent/ui}/usage_display.py +3 -8
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.3.0.dist-info}/METADATA +7 -7
- fast_agent_mcp-0.3.0.dist-info/RECORD +202 -0
- fast_agent_mcp-0.3.0.dist-info/entry_points.txt +5 -0
- fast_agent_mcp-0.2.57.dist-info/RECORD +0 -192
- fast_agent_mcp-0.2.57.dist-info/entry_points.txt +0 -6
- mcp_agent/__init__.py +0 -114
- mcp_agent/agents/agent.py +0 -92
- mcp_agent/agents/workflow/__init__.py +0 -1
- mcp_agent/agents/workflow/orchestrator_agent.py +0 -597
- mcp_agent/app.py +0 -175
- mcp_agent/core/__init__.py +0 -26
- mcp_agent/core/prompt.py +0 -191
- mcp_agent/event_progress.py +0 -134
- mcp_agent/human_input/handler.py +0 -81
- mcp_agent/llm/__init__.py +0 -2
- mcp_agent/llm/augmented_llm_passthrough.py +0 -232
- mcp_agent/llm/augmented_llm_slow.py +0 -53
- mcp_agent/llm/providers/__init__.py +0 -8
- mcp_agent/llm/providers/augmented_llm_anthropic.py +0 -717
- mcp_agent/llm/providers/augmented_llm_bedrock.py +0 -1788
- mcp_agent/llm/providers/augmented_llm_google_native.py +0 -495
- mcp_agent/llm/providers/sampling_converter_anthropic.py +0 -57
- mcp_agent/llm/providers/sampling_converter_openai.py +0 -26
- mcp_agent/llm/sampling_format_converter.py +0 -37
- mcp_agent/logging/__init__.py +0 -0
- mcp_agent/mcp/__init__.py +0 -50
- mcp_agent/mcp/helpers/__init__.py +0 -25
- mcp_agent/mcp/helpers/content_helpers.py +0 -187
- mcp_agent/mcp/interfaces.py +0 -266
- mcp_agent/mcp/prompts/__init__.py +0 -0
- mcp_agent/mcp/prompts/__main__.py +0 -10
- mcp_agent/mcp_server_registry.py +0 -343
- mcp_agent/tools/tool_definition.py +0 -14
- mcp_agent/ui/console_display.py +0 -790
- mcp_agent/ui/console_display_legacy.py +0 -401
- {mcp_agent → fast_agent}/agents/workflow/orchestrator_prompts.py +0 -0
- {mcp_agent/agents → fast_agent/cli}/__init__.py +0 -0
- {mcp_agent → fast_agent}/cli/constants.py +0 -0
- {mcp_agent → fast_agent}/core/error_handling.py +0 -0
- {mcp_agent → fast_agent}/core/exceptions.py +0 -0
- {mcp_agent/cli → fast_agent/core/executor}/__init__.py +0 -0
- {mcp_agent → fast_agent/core}/executor/task_registry.py +0 -0
- {mcp_agent → fast_agent/core}/executor/workflow_signal.py +0 -0
- {mcp_agent → fast_agent}/human_input/form_fields.py +0 -0
- {mcp_agent → fast_agent}/llm/prompt_utils.py +0 -0
- {mcp_agent/core → fast_agent/llm}/request_params.py +0 -0
- {mcp_agent → fast_agent}/mcp/common.py +0 -0
- {mcp_agent/executor → fast_agent/mcp/prompts}/__init__.py +0 -0
- {mcp_agent → fast_agent}/mcp/prompts/prompt_constants.py +0 -0
- {mcp_agent → fast_agent}/py.typed +0 -0
- {mcp_agent → fast_agent}/resources/examples/data-analysis/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/data-analysis/mount-point/WA_Fn-UseC_-HR-Employee-Attrition.csv +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_account_server.py +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_game_server.py +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/fastagent.secrets.yaml.example +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/fastagent.secrets.yaml.example +0 -0
- {mcp_agent → fast_agent}/resources/examples/researcher/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/.env.sample +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/Makefile +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/README.md +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/clam.jpg +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/crab.png +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/shrimp.png +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/docker-compose.yml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/Dockerfile +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/entrypoint.sh +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/mcp_server.py +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/pyproject.toml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/system_schema.json +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/system_template.minijinja +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/tensorzero.toml +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/graded_report.md +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/short_story.md +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/short_story.txt +0 -0
- {mcp_agent → fast_agent/ui}/console.py +0 -0
- {mcp_agent/core → fast_agent/ui}/mermaid_utils.py +0 -0
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.3.0.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,717 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import TYPE_CHECKING, Any, List, Tuple, Type
|
|
3
|
-
|
|
4
|
-
from mcp.types import TextContent
|
|
5
|
-
|
|
6
|
-
from mcp_agent.core.prompt import Prompt
|
|
7
|
-
from mcp_agent.event_progress import ProgressAction
|
|
8
|
-
from mcp_agent.llm.provider_types import Provider
|
|
9
|
-
from mcp_agent.llm.providers.multipart_converter_anthropic import (
|
|
10
|
-
AnthropicConverter,
|
|
11
|
-
)
|
|
12
|
-
from mcp_agent.llm.providers.sampling_converter_anthropic import (
|
|
13
|
-
AnthropicSamplingConverter,
|
|
14
|
-
)
|
|
15
|
-
from mcp_agent.llm.usage_tracking import TurnUsage
|
|
16
|
-
from mcp_agent.mcp.interfaces import ModelT
|
|
17
|
-
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
|
18
|
-
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from mcp import ListToolsResult
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
from anthropic import AsyncAnthropic, AuthenticationError
|
|
24
|
-
from anthropic.lib.streaming import AsyncMessageStream
|
|
25
|
-
from anthropic.types import (
|
|
26
|
-
Message,
|
|
27
|
-
MessageParam,
|
|
28
|
-
TextBlock,
|
|
29
|
-
TextBlockParam,
|
|
30
|
-
ToolParam,
|
|
31
|
-
ToolUseBlockParam,
|
|
32
|
-
Usage,
|
|
33
|
-
)
|
|
34
|
-
from mcp.types import (
|
|
35
|
-
CallToolRequest,
|
|
36
|
-
CallToolRequestParams,
|
|
37
|
-
CallToolResult,
|
|
38
|
-
ContentBlock,
|
|
39
|
-
)
|
|
40
|
-
from rich.text import Text
|
|
41
|
-
|
|
42
|
-
from mcp_agent.core.exceptions import ProviderKeyError
|
|
43
|
-
from mcp_agent.llm.augmented_llm import (
|
|
44
|
-
AugmentedLLM,
|
|
45
|
-
RequestParams,
|
|
46
|
-
)
|
|
47
|
-
from mcp_agent.logging.logger import get_logger
|
|
48
|
-
|
|
49
|
-
DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-0"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
53
|
-
"""
|
|
54
|
-
The basic building block of agentic systems is an LLM enhanced with augmentations
|
|
55
|
-
such as retrieval, tools, and memory provided from a collection of MCP servers.
|
|
56
|
-
Our current models can actively use these capabilities—generating their own search queries,
|
|
57
|
-
selecting appropriate tools, and determining what information to retain.
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
# Anthropic-specific parameter exclusions
|
|
61
|
-
ANTHROPIC_EXCLUDE_FIELDS = {
|
|
62
|
-
AugmentedLLM.PARAM_MESSAGES,
|
|
63
|
-
AugmentedLLM.PARAM_MODEL,
|
|
64
|
-
AugmentedLLM.PARAM_SYSTEM_PROMPT,
|
|
65
|
-
AugmentedLLM.PARAM_STOP_SEQUENCES,
|
|
66
|
-
AugmentedLLM.PARAM_MAX_TOKENS,
|
|
67
|
-
AugmentedLLM.PARAM_METADATA,
|
|
68
|
-
AugmentedLLM.PARAM_USE_HISTORY,
|
|
69
|
-
AugmentedLLM.PARAM_MAX_ITERATIONS,
|
|
70
|
-
AugmentedLLM.PARAM_PARALLEL_TOOL_CALLS,
|
|
71
|
-
AugmentedLLM.PARAM_TEMPLATE_VARS,
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
def __init__(self, *args, **kwargs) -> None:
|
|
75
|
-
# Initialize logger - keep it simple without name reference
|
|
76
|
-
self.logger = get_logger(__name__)
|
|
77
|
-
|
|
78
|
-
super().__init__(
|
|
79
|
-
*args, provider=Provider.ANTHROPIC, type_converter=AnthropicSamplingConverter, **kwargs
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
|
83
|
-
"""Initialize Anthropic-specific default parameters"""
|
|
84
|
-
# Get base defaults from parent (includes ModelDatabase lookup)
|
|
85
|
-
base_params = super()._initialize_default_params(kwargs)
|
|
86
|
-
|
|
87
|
-
# Override with Anthropic-specific settings
|
|
88
|
-
chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
|
|
89
|
-
base_params.model = chosen_model
|
|
90
|
-
|
|
91
|
-
return base_params
|
|
92
|
-
|
|
93
|
-
def _base_url(self) -> str | None:
|
|
94
|
-
assert self.context.config
|
|
95
|
-
return self.context.config.anthropic.base_url if self.context.config.anthropic else None
|
|
96
|
-
|
|
97
|
-
def _get_cache_mode(self) -> str:
|
|
98
|
-
"""Get the cache mode configuration."""
|
|
99
|
-
cache_mode = "auto" # Default to auto
|
|
100
|
-
if self.context.config and self.context.config.anthropic:
|
|
101
|
-
cache_mode = self.context.config.anthropic.cache_mode
|
|
102
|
-
return cache_mode
|
|
103
|
-
|
|
104
|
-
async def _prepare_tools(self, structured_model: Type[ModelT] | None = None) -> List[ToolParam]:
|
|
105
|
-
"""Prepare tools based on whether we're in structured output mode."""
|
|
106
|
-
if structured_model:
|
|
107
|
-
# JSON mode - create a single tool for structured output
|
|
108
|
-
return [
|
|
109
|
-
ToolParam(
|
|
110
|
-
name="return_structured_output",
|
|
111
|
-
description="Return the response in the required JSON format",
|
|
112
|
-
input_schema=structured_model.model_json_schema(),
|
|
113
|
-
)
|
|
114
|
-
]
|
|
115
|
-
else:
|
|
116
|
-
# Regular mode - use tools from aggregator
|
|
117
|
-
tool_list: ListToolsResult = await self.aggregator.list_tools()
|
|
118
|
-
return [
|
|
119
|
-
ToolParam(
|
|
120
|
-
name=tool.name,
|
|
121
|
-
description=tool.description or "",
|
|
122
|
-
input_schema=tool.inputSchema,
|
|
123
|
-
)
|
|
124
|
-
for tool in tool_list.tools
|
|
125
|
-
]
|
|
126
|
-
|
|
127
|
-
def _apply_system_cache(self, base_args: dict, cache_mode: str) -> None:
|
|
128
|
-
"""Apply cache control to system prompt if cache mode allows it."""
|
|
129
|
-
if cache_mode != "off" and base_args["system"]:
|
|
130
|
-
if isinstance(base_args["system"], str):
|
|
131
|
-
base_args["system"] = [
|
|
132
|
-
{
|
|
133
|
-
"type": "text",
|
|
134
|
-
"text": base_args["system"],
|
|
135
|
-
"cache_control": {"type": "ephemeral"},
|
|
136
|
-
}
|
|
137
|
-
]
|
|
138
|
-
self.logger.debug(
|
|
139
|
-
"Applied cache_control to system prompt (caches tools+system in one block)"
|
|
140
|
-
)
|
|
141
|
-
else:
|
|
142
|
-
self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
|
|
143
|
-
|
|
144
|
-
async def _apply_conversation_cache(self, messages: List[MessageParam], cache_mode: str) -> int:
|
|
145
|
-
"""Apply conversation caching if in auto mode. Returns number of cache blocks applied."""
|
|
146
|
-
applied_count = 0
|
|
147
|
-
if cache_mode == "auto" and self.history.should_apply_conversation_cache():
|
|
148
|
-
cache_updates = self.history.get_conversation_cache_updates()
|
|
149
|
-
|
|
150
|
-
# Remove cache control from old positions
|
|
151
|
-
if cache_updates["remove"]:
|
|
152
|
-
self.history.remove_cache_control_from_messages(messages, cache_updates["remove"])
|
|
153
|
-
self.logger.debug(
|
|
154
|
-
f"Removed conversation cache_control from positions {cache_updates['remove']}"
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
# Add cache control to new positions
|
|
158
|
-
if cache_updates["add"]:
|
|
159
|
-
applied_count = self.history.add_cache_control_to_messages(
|
|
160
|
-
messages, cache_updates["add"]
|
|
161
|
-
)
|
|
162
|
-
if applied_count > 0:
|
|
163
|
-
self.history.apply_conversation_cache_updates(cache_updates)
|
|
164
|
-
self.logger.debug(
|
|
165
|
-
f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
|
|
166
|
-
)
|
|
167
|
-
else:
|
|
168
|
-
self.logger.debug(
|
|
169
|
-
f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
return applied_count
|
|
173
|
-
|
|
174
|
-
async def _process_structured_output(
|
|
175
|
-
self,
|
|
176
|
-
content_block: Any,
|
|
177
|
-
) -> Tuple[str, CallToolResult, TextContent]:
|
|
178
|
-
"""
|
|
179
|
-
Process a structured output tool call from Anthropic.
|
|
180
|
-
|
|
181
|
-
This handles the special case where Anthropic's model was forced to use
|
|
182
|
-
a 'return_structured_output' tool via tool_choice. The tool input contains
|
|
183
|
-
the JSON data we want, so we extract it and format it for display.
|
|
184
|
-
|
|
185
|
-
Even though we don't call an external tool, we must create a CallToolResult
|
|
186
|
-
to satisfy Anthropic's API requirement that every tool_use has a corresponding
|
|
187
|
-
tool_result in the next message.
|
|
188
|
-
|
|
189
|
-
Returns:
|
|
190
|
-
Tuple of (tool_use_id, tool_result, content_block) for the structured data
|
|
191
|
-
"""
|
|
192
|
-
tool_args = content_block.input
|
|
193
|
-
tool_use_id = content_block.id
|
|
194
|
-
|
|
195
|
-
# Show the formatted JSON response to the user
|
|
196
|
-
json_response = json.dumps(tool_args, indent=2)
|
|
197
|
-
await self.show_assistant_message(json_response)
|
|
198
|
-
|
|
199
|
-
# Create the content for responses
|
|
200
|
-
structured_content = TextContent(type="text", text=json.dumps(tool_args))
|
|
201
|
-
|
|
202
|
-
# Create a CallToolResult to satisfy Anthropic's API requirements
|
|
203
|
-
# This represents the "result" of our structured output "tool"
|
|
204
|
-
tool_result = CallToolResult(isError=False, content=[structured_content])
|
|
205
|
-
|
|
206
|
-
return tool_use_id, tool_result, structured_content
|
|
207
|
-
|
|
208
|
-
async def _process_regular_tool_call(
|
|
209
|
-
self,
|
|
210
|
-
content_block: Any,
|
|
211
|
-
available_tools: List[ToolParam],
|
|
212
|
-
is_first_tool: bool,
|
|
213
|
-
message_text: str | Text,
|
|
214
|
-
) -> Tuple[str, CallToolResult]:
|
|
215
|
-
"""
|
|
216
|
-
Process a regular MCP tool call.
|
|
217
|
-
|
|
218
|
-
This handles actual tool execution via the MCP aggregator.
|
|
219
|
-
"""
|
|
220
|
-
tool_name = content_block.name
|
|
221
|
-
tool_args = content_block.input
|
|
222
|
-
tool_use_id = content_block.id
|
|
223
|
-
|
|
224
|
-
if is_first_tool:
|
|
225
|
-
await self.show_assistant_message(message_text, tool_name)
|
|
226
|
-
|
|
227
|
-
self.show_tool_call(available_tools, tool_name, tool_args)
|
|
228
|
-
tool_call_request = CallToolRequest(
|
|
229
|
-
method="tools/call",
|
|
230
|
-
params=CallToolRequestParams(name=tool_name, arguments=tool_args),
|
|
231
|
-
)
|
|
232
|
-
result = await self.call_tool(request=tool_call_request, tool_call_id=tool_use_id)
|
|
233
|
-
self.show_tool_result(result)
|
|
234
|
-
return tool_use_id, result
|
|
235
|
-
|
|
236
|
-
async def _process_tool_calls(
|
|
237
|
-
self,
|
|
238
|
-
tool_uses: List[Any],
|
|
239
|
-
available_tools: List[ToolParam],
|
|
240
|
-
message_text: str | Text,
|
|
241
|
-
structured_model: Type[ModelT] | None = None,
|
|
242
|
-
) -> Tuple[List[Tuple[str, CallToolResult]], List[ContentBlock]]:
|
|
243
|
-
"""
|
|
244
|
-
Process tool calls, handling both structured output and regular MCP tools.
|
|
245
|
-
|
|
246
|
-
For structured output mode:
|
|
247
|
-
- Extracts JSON data from the forced 'return_structured_output' tool
|
|
248
|
-
- Does NOT create fake CallToolResults
|
|
249
|
-
- Returns the JSON content directly
|
|
250
|
-
|
|
251
|
-
For regular tools:
|
|
252
|
-
- Calls actual MCP tools via the aggregator
|
|
253
|
-
- Returns real CallToolResults
|
|
254
|
-
"""
|
|
255
|
-
tool_results = []
|
|
256
|
-
responses = []
|
|
257
|
-
|
|
258
|
-
for tool_idx, content_block in enumerate(tool_uses):
|
|
259
|
-
tool_name = content_block.name
|
|
260
|
-
is_first_tool = tool_idx == 0
|
|
261
|
-
|
|
262
|
-
if tool_name == "return_structured_output" and structured_model:
|
|
263
|
-
# Structured output: extract JSON, don't call external tools
|
|
264
|
-
(
|
|
265
|
-
tool_use_id,
|
|
266
|
-
tool_result,
|
|
267
|
-
structured_content,
|
|
268
|
-
) = await self._process_structured_output(content_block)
|
|
269
|
-
responses.append(structured_content)
|
|
270
|
-
# Add to tool_results to satisfy Anthropic's API requirement for tool_result messages
|
|
271
|
-
tool_results.append((tool_use_id, tool_result))
|
|
272
|
-
else:
|
|
273
|
-
# Regular tool: call external MCP tool
|
|
274
|
-
tool_use_id, tool_result = await self._process_regular_tool_call(
|
|
275
|
-
content_block, available_tools, is_first_tool, message_text
|
|
276
|
-
)
|
|
277
|
-
tool_results.append((tool_use_id, tool_result))
|
|
278
|
-
responses.extend(tool_result.content)
|
|
279
|
-
|
|
280
|
-
return tool_results, responses
|
|
281
|
-
|
|
282
|
-
async def _process_stream(self, stream: AsyncMessageStream, model: str) -> Message:
|
|
283
|
-
"""Process the streaming response and display real-time token usage."""
|
|
284
|
-
# Track estimated output tokens by counting text chunks
|
|
285
|
-
estimated_tokens = 0
|
|
286
|
-
|
|
287
|
-
# Process the raw event stream to get token counts
|
|
288
|
-
async for event in stream:
|
|
289
|
-
# Count tokens in real-time from content_block_delta events
|
|
290
|
-
if (
|
|
291
|
-
event.type == "content_block_delta"
|
|
292
|
-
and hasattr(event, "delta")
|
|
293
|
-
and event.delta.type == "text_delta"
|
|
294
|
-
):
|
|
295
|
-
# Use base class method for token estimation and progress emission
|
|
296
|
-
estimated_tokens = self._update_streaming_progress(
|
|
297
|
-
event.delta.text, model, estimated_tokens
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
# Also check for final message_delta events with actual usage info
|
|
301
|
-
elif (
|
|
302
|
-
event.type == "message_delta"
|
|
303
|
-
and hasattr(event, "usage")
|
|
304
|
-
and event.usage.output_tokens
|
|
305
|
-
):
|
|
306
|
-
actual_tokens = event.usage.output_tokens
|
|
307
|
-
# Emit final progress with actual token count
|
|
308
|
-
token_str = str(actual_tokens).rjust(5)
|
|
309
|
-
data = {
|
|
310
|
-
"progress_action": ProgressAction.STREAMING,
|
|
311
|
-
"model": model,
|
|
312
|
-
"agent_name": self.name,
|
|
313
|
-
"chat_turn": self.chat_turn(),
|
|
314
|
-
"details": token_str.strip(),
|
|
315
|
-
}
|
|
316
|
-
self.logger.info("Streaming progress", data=data)
|
|
317
|
-
|
|
318
|
-
# Get the final message with complete usage data
|
|
319
|
-
message = await stream.get_final_message()
|
|
320
|
-
|
|
321
|
-
# Log final usage information
|
|
322
|
-
if hasattr(message, "usage") and message.usage:
|
|
323
|
-
self.logger.info(
|
|
324
|
-
f"Streaming complete - Model: {model}, Input tokens: {message.usage.input_tokens}, Output tokens: {message.usage.output_tokens}"
|
|
325
|
-
)
|
|
326
|
-
|
|
327
|
-
return message
|
|
328
|
-
|
|
329
|
-
async def _anthropic_completion(
|
|
330
|
-
self,
|
|
331
|
-
message_param,
|
|
332
|
-
request_params: RequestParams | None = None,
|
|
333
|
-
structured_model: Type[ModelT] | None = None,
|
|
334
|
-
) -> list[ContentBlock]:
|
|
335
|
-
"""
|
|
336
|
-
Process a query using an LLM and available tools.
|
|
337
|
-
Override this method to use a different LLM.
|
|
338
|
-
"""
|
|
339
|
-
|
|
340
|
-
api_key = self._api_key()
|
|
341
|
-
base_url = self._base_url()
|
|
342
|
-
if base_url and base_url.endswith("/v1"):
|
|
343
|
-
base_url = base_url.rstrip("/v1")
|
|
344
|
-
|
|
345
|
-
try:
|
|
346
|
-
anthropic = AsyncAnthropic(api_key=api_key, base_url=base_url)
|
|
347
|
-
messages: List[MessageParam] = []
|
|
348
|
-
params = self.get_request_params(request_params)
|
|
349
|
-
except AuthenticationError as e:
|
|
350
|
-
raise ProviderKeyError(
|
|
351
|
-
"Invalid Anthropic API key",
|
|
352
|
-
"The configured Anthropic API key was rejected.\nPlease check that your API key is valid and not expired.",
|
|
353
|
-
) from e
|
|
354
|
-
|
|
355
|
-
# Always include prompt messages, but only include conversation history
|
|
356
|
-
# if use_history is True
|
|
357
|
-
messages.extend(self.history.get(include_completion_history=params.use_history))
|
|
358
|
-
|
|
359
|
-
messages.append(message_param) # message_param is the current user turn
|
|
360
|
-
|
|
361
|
-
# Get cache mode configuration
|
|
362
|
-
cache_mode = self._get_cache_mode()
|
|
363
|
-
self.logger.debug(f"Anthropic cache_mode: {cache_mode}")
|
|
364
|
-
|
|
365
|
-
available_tools = await self._prepare_tools(structured_model)
|
|
366
|
-
|
|
367
|
-
responses: List[ContentBlock] = []
|
|
368
|
-
|
|
369
|
-
model = self.default_request_params.model
|
|
370
|
-
|
|
371
|
-
# Note: We'll cache tools+system together by putting cache_control only on system prompt
|
|
372
|
-
|
|
373
|
-
for i in range(params.max_iterations):
|
|
374
|
-
self._log_chat_progress(self.chat_turn(), model=model)
|
|
375
|
-
|
|
376
|
-
# Create base arguments dictionary
|
|
377
|
-
base_args = {
|
|
378
|
-
"model": model,
|
|
379
|
-
"messages": messages,
|
|
380
|
-
"system": self.instruction or params.systemPrompt,
|
|
381
|
-
"stop_sequences": params.stopSequences,
|
|
382
|
-
"tools": available_tools,
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
# Add tool_choice for structured output mode
|
|
386
|
-
if structured_model:
|
|
387
|
-
base_args["tool_choice"] = {"type": "tool", "name": "return_structured_output"}
|
|
388
|
-
|
|
389
|
-
# Apply cache control to system prompt
|
|
390
|
-
self._apply_system_cache(base_args, cache_mode)
|
|
391
|
-
|
|
392
|
-
# Apply conversation caching
|
|
393
|
-
applied_count = await self._apply_conversation_cache(messages, cache_mode)
|
|
394
|
-
|
|
395
|
-
# Verify we don't exceed Anthropic's 4 cache block limit
|
|
396
|
-
if applied_count > 0:
|
|
397
|
-
total_cache_blocks = applied_count
|
|
398
|
-
if cache_mode != "off" and base_args["system"]:
|
|
399
|
-
total_cache_blocks += 1 # tools+system cache block
|
|
400
|
-
if total_cache_blocks > 4:
|
|
401
|
-
self.logger.warning(
|
|
402
|
-
f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
|
|
403
|
-
)
|
|
404
|
-
|
|
405
|
-
if params.maxTokens is not None:
|
|
406
|
-
base_args["max_tokens"] = params.maxTokens
|
|
407
|
-
|
|
408
|
-
# Use the base class method to prepare all arguments with Anthropic-specific exclusions
|
|
409
|
-
arguments = self.prepare_provider_arguments(
|
|
410
|
-
base_args, params, self.ANTHROPIC_EXCLUDE_FIELDS
|
|
411
|
-
)
|
|
412
|
-
|
|
413
|
-
self.logger.debug(f"{arguments}")
|
|
414
|
-
|
|
415
|
-
# Use streaming API with helper
|
|
416
|
-
async with anthropic.messages.stream(**arguments) as stream:
|
|
417
|
-
# Process the stream
|
|
418
|
-
response = await self._process_stream(stream, model)
|
|
419
|
-
|
|
420
|
-
# Track usage if response is valid and has usage data
|
|
421
|
-
if (
|
|
422
|
-
hasattr(response, "usage")
|
|
423
|
-
and response.usage
|
|
424
|
-
and not isinstance(response, BaseException)
|
|
425
|
-
):
|
|
426
|
-
try:
|
|
427
|
-
turn_usage = TurnUsage.from_anthropic(
|
|
428
|
-
response.usage, model or DEFAULT_ANTHROPIC_MODEL
|
|
429
|
-
)
|
|
430
|
-
self._finalize_turn_usage(turn_usage)
|
|
431
|
-
# self._show_usage(response.usage, turn_usage)
|
|
432
|
-
except Exception as e:
|
|
433
|
-
self.logger.warning(f"Failed to track usage: {e}")
|
|
434
|
-
|
|
435
|
-
if isinstance(response, AuthenticationError):
|
|
436
|
-
raise ProviderKeyError(
|
|
437
|
-
"Invalid Anthropic API key",
|
|
438
|
-
"The configured Anthropic API key was rejected.\nPlease check that your API key is valid and not expired.",
|
|
439
|
-
) from response
|
|
440
|
-
elif isinstance(response, BaseException):
|
|
441
|
-
error_details = str(response)
|
|
442
|
-
self.logger.error(f"Error: {error_details}", data=BaseException)
|
|
443
|
-
|
|
444
|
-
# Try to extract more useful information for API errors
|
|
445
|
-
if hasattr(response, "status_code") and hasattr(response, "response"):
|
|
446
|
-
try:
|
|
447
|
-
error_json = response.response.json()
|
|
448
|
-
error_details = f"Error code: {response.status_code} - {error_json}"
|
|
449
|
-
except: # noqa: E722
|
|
450
|
-
error_details = f"Error code: {response.status_code} - {str(response)}"
|
|
451
|
-
|
|
452
|
-
# Convert other errors to text response
|
|
453
|
-
error_message = f"Error during generation: {error_details}"
|
|
454
|
-
response = Message(
|
|
455
|
-
id="error",
|
|
456
|
-
model="error",
|
|
457
|
-
role="assistant",
|
|
458
|
-
type="message",
|
|
459
|
-
content=[TextBlock(type="text", text=error_message)],
|
|
460
|
-
stop_reason="end_turn",
|
|
461
|
-
usage=Usage(input_tokens=0, output_tokens=0),
|
|
462
|
-
)
|
|
463
|
-
|
|
464
|
-
self.logger.debug(
|
|
465
|
-
f"{model} response:",
|
|
466
|
-
data=response,
|
|
467
|
-
)
|
|
468
|
-
|
|
469
|
-
response_as_message = self.convert_message_to_message_param(response)
|
|
470
|
-
messages.append(response_as_message)
|
|
471
|
-
if response.content and response.content[0].type == "text":
|
|
472
|
-
responses.append(TextContent(type="text", text=response.content[0].text))
|
|
473
|
-
|
|
474
|
-
if response.stop_reason == "end_turn":
|
|
475
|
-
message_text = ""
|
|
476
|
-
for block in response_as_message["content"]:
|
|
477
|
-
if isinstance(block, dict) and block.get("type") == "text":
|
|
478
|
-
message_text += block.get("text", "")
|
|
479
|
-
elif hasattr(block, "type") and block.type == "text":
|
|
480
|
-
message_text += block.text
|
|
481
|
-
|
|
482
|
-
await self.show_assistant_message(message_text)
|
|
483
|
-
|
|
484
|
-
self.logger.debug(f"Iteration {i}: Stopping because finish_reason is 'end_turn'")
|
|
485
|
-
break
|
|
486
|
-
elif response.stop_reason == "stop_sequence":
|
|
487
|
-
# We have reached a stop sequence
|
|
488
|
-
self.logger.debug(
|
|
489
|
-
f"Iteration {i}: Stopping because finish_reason is 'stop_sequence'"
|
|
490
|
-
)
|
|
491
|
-
break
|
|
492
|
-
elif response.stop_reason == "max_tokens":
|
|
493
|
-
# We have reached the max tokens limit
|
|
494
|
-
|
|
495
|
-
self.logger.debug(f"Iteration {i}: Stopping because finish_reason is 'max_tokens'")
|
|
496
|
-
if params.maxTokens is not None:
|
|
497
|
-
message_text = Text(
|
|
498
|
-
f"the assistant has reached the maximum token limit ({params.maxTokens})",
|
|
499
|
-
style="dim green italic",
|
|
500
|
-
)
|
|
501
|
-
else:
|
|
502
|
-
message_text = Text(
|
|
503
|
-
"the assistant has reached the maximum token limit",
|
|
504
|
-
style="dim green italic",
|
|
505
|
-
)
|
|
506
|
-
|
|
507
|
-
await self.show_assistant_message(message_text)
|
|
508
|
-
|
|
509
|
-
break
|
|
510
|
-
else:
|
|
511
|
-
message_text = ""
|
|
512
|
-
for block in response_as_message["content"]:
|
|
513
|
-
if isinstance(block, dict) and block.get("type") == "text":
|
|
514
|
-
message_text += block.get("text", "")
|
|
515
|
-
elif hasattr(block, "type") and block.type == "text":
|
|
516
|
-
message_text += block.text
|
|
517
|
-
|
|
518
|
-
# response.stop_reason == "tool_use":
|
|
519
|
-
# First, collect all tool uses in this turn
|
|
520
|
-
tool_uses = [c for c in response.content if c.type == "tool_use"]
|
|
521
|
-
|
|
522
|
-
if tool_uses:
|
|
523
|
-
if message_text == "":
|
|
524
|
-
message_text = Text(
|
|
525
|
-
"the assistant requested tool calls",
|
|
526
|
-
style="dim green italic",
|
|
527
|
-
)
|
|
528
|
-
|
|
529
|
-
# Process all tool calls using the helper method
|
|
530
|
-
tool_results, tool_responses = await self._process_tool_calls(
|
|
531
|
-
tool_uses, available_tools, message_text, structured_model
|
|
532
|
-
)
|
|
533
|
-
responses.extend(tool_responses)
|
|
534
|
-
|
|
535
|
-
# Always add tool_results_message first (required by Anthropic API)
|
|
536
|
-
messages.append(AnthropicConverter.create_tool_results_message(tool_results))
|
|
537
|
-
|
|
538
|
-
# For structured output, we have our result and should exit after sending tool_result
|
|
539
|
-
if structured_model and any(
|
|
540
|
-
tool.name == "return_structured_output" for tool in tool_uses
|
|
541
|
-
):
|
|
542
|
-
self.logger.debug("Structured output received, breaking iteration loop")
|
|
543
|
-
break
|
|
544
|
-
|
|
545
|
-
# Only save the new conversation messages to history if use_history is true
|
|
546
|
-
# Keep the prompt messages separate
|
|
547
|
-
if params.use_history:
|
|
548
|
-
# Get current prompt messages
|
|
549
|
-
prompt_messages = self.history.get(include_completion_history=False)
|
|
550
|
-
new_messages = messages[len(prompt_messages) :]
|
|
551
|
-
self.history.set(new_messages)
|
|
552
|
-
|
|
553
|
-
self._log_chat_finished(model=model)
|
|
554
|
-
|
|
555
|
-
return responses
|
|
556
|
-
|
|
557
|
-
async def generate_messages(
|
|
558
|
-
self,
|
|
559
|
-
message_param,
|
|
560
|
-
request_params: RequestParams | None = None,
|
|
561
|
-
) -> PromptMessageMultipart:
|
|
562
|
-
"""
|
|
563
|
-
Process a query using an LLM and available tools.
|
|
564
|
-
The default implementation uses Claude as the LLM.
|
|
565
|
-
Override this method to use a different LLM.
|
|
566
|
-
|
|
567
|
-
"""
|
|
568
|
-
# Reset tool call counter for new turn
|
|
569
|
-
self._reset_turn_tool_calls()
|
|
570
|
-
|
|
571
|
-
res = await self._anthropic_completion(
|
|
572
|
-
message_param=message_param,
|
|
573
|
-
request_params=request_params,
|
|
574
|
-
)
|
|
575
|
-
return Prompt.assistant(*res)
|
|
576
|
-
|
|
577
|
-
async def _apply_prompt_provider_specific(
|
|
578
|
-
self,
|
|
579
|
-
multipart_messages: List["PromptMessageMultipart"],
|
|
580
|
-
request_params: RequestParams | None = None,
|
|
581
|
-
is_template: bool = False,
|
|
582
|
-
) -> PromptMessageMultipart:
|
|
583
|
-
# Check the last message role
|
|
584
|
-
last_message = multipart_messages[-1]
|
|
585
|
-
|
|
586
|
-
# Add all previous messages to history (or all messages if last is from assistant)
|
|
587
|
-
messages_to_add = (
|
|
588
|
-
multipart_messages[:-1] if last_message.role == "user" else multipart_messages
|
|
589
|
-
)
|
|
590
|
-
converted = []
|
|
591
|
-
|
|
592
|
-
# Get cache mode configuration
|
|
593
|
-
cache_mode = self._get_cache_mode()
|
|
594
|
-
|
|
595
|
-
for msg in messages_to_add:
|
|
596
|
-
anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
|
|
597
|
-
|
|
598
|
-
# Apply caching to template messages if cache_mode is "prompt" or "auto"
|
|
599
|
-
if is_template and cache_mode in ["prompt", "auto"] and anthropic_msg.get("content"):
|
|
600
|
-
content_list = anthropic_msg["content"]
|
|
601
|
-
if isinstance(content_list, list) and content_list:
|
|
602
|
-
# Apply cache control to the last content block
|
|
603
|
-
last_block = content_list[-1]
|
|
604
|
-
if isinstance(last_block, dict):
|
|
605
|
-
last_block["cache_control"] = {"type": "ephemeral"}
|
|
606
|
-
self.logger.debug(
|
|
607
|
-
f"Applied cache_control to template message with role {anthropic_msg.get('role')}"
|
|
608
|
-
)
|
|
609
|
-
|
|
610
|
-
converted.append(anthropic_msg)
|
|
611
|
-
|
|
612
|
-
self.history.extend(converted, is_prompt=is_template)
|
|
613
|
-
|
|
614
|
-
if last_message.role == "user":
|
|
615
|
-
self.logger.debug("Last message in prompt is from user, generating assistant response")
|
|
616
|
-
message_param = AnthropicConverter.convert_to_anthropic(last_message)
|
|
617
|
-
return await self.generate_messages(message_param, request_params)
|
|
618
|
-
else:
|
|
619
|
-
# For assistant messages: Return the last message content as text
|
|
620
|
-
self.logger.debug("Last message in prompt is from assistant, returning it directly")
|
|
621
|
-
return last_message
|
|
622
|
-
|
|
623
|
-
async def _apply_prompt_provider_specific_structured(
|
|
624
|
-
self,
|
|
625
|
-
multipart_messages: List[PromptMessageMultipart],
|
|
626
|
-
model: Type[ModelT],
|
|
627
|
-
request_params: RequestParams | None = None,
|
|
628
|
-
) -> Tuple[ModelT | None, PromptMessageMultipart]: # noqa: F821
|
|
629
|
-
request_params = self.get_request_params(request_params)
|
|
630
|
-
|
|
631
|
-
# Check the last message role
|
|
632
|
-
last_message = multipart_messages[-1]
|
|
633
|
-
|
|
634
|
-
# Add all previous messages to history (or all messages if last is from assistant)
|
|
635
|
-
messages_to_add = (
|
|
636
|
-
multipart_messages[:-1] if last_message.role == "user" else multipart_messages
|
|
637
|
-
)
|
|
638
|
-
converted = []
|
|
639
|
-
|
|
640
|
-
for msg in messages_to_add:
|
|
641
|
-
anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
|
|
642
|
-
converted.append(anthropic_msg)
|
|
643
|
-
|
|
644
|
-
self.history.extend(converted, is_prompt=False)
|
|
645
|
-
|
|
646
|
-
if last_message.role == "user":
|
|
647
|
-
self.logger.debug("Last message in prompt is from user, generating structured response")
|
|
648
|
-
message_param = AnthropicConverter.convert_to_anthropic(last_message)
|
|
649
|
-
|
|
650
|
-
# Call _anthropic_completion with the structured model
|
|
651
|
-
response_content = await self._anthropic_completion(
|
|
652
|
-
message_param, request_params, structured_model=model
|
|
653
|
-
)
|
|
654
|
-
|
|
655
|
-
# Extract the structured data from the response
|
|
656
|
-
for content in response_content:
|
|
657
|
-
if content.type == "text":
|
|
658
|
-
try:
|
|
659
|
-
# Parse the JSON response from the tool
|
|
660
|
-
data = json.loads(content.text)
|
|
661
|
-
parsed_model = model(**data)
|
|
662
|
-
# Create assistant response
|
|
663
|
-
assistant_response = Prompt.assistant(content)
|
|
664
|
-
return parsed_model, assistant_response
|
|
665
|
-
except (json.JSONDecodeError, ValueError) as e:
|
|
666
|
-
self.logger.error(f"Failed to parse structured output: {e}")
|
|
667
|
-
assistant_response = Prompt.assistant(content)
|
|
668
|
-
return None, assistant_response
|
|
669
|
-
|
|
670
|
-
# If no valid response found
|
|
671
|
-
return None, Prompt.assistant()
|
|
672
|
-
else:
|
|
673
|
-
# For assistant messages: Return the last message content
|
|
674
|
-
self.logger.debug("Last message in prompt is from assistant, returning it directly")
|
|
675
|
-
return None, last_message
|
|
676
|
-
|
|
677
|
-
def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
|
|
678
|
-
# Print raw usage for debugging
|
|
679
|
-
print(f"\n=== USAGE DEBUG ({turn_usage.model}) ===")
|
|
680
|
-
print(f"Raw usage: {raw_usage}")
|
|
681
|
-
print(
|
|
682
|
-
f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
|
683
|
-
)
|
|
684
|
-
print(
|
|
685
|
-
f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
|
686
|
-
)
|
|
687
|
-
print(f"Effective input: {turn_usage.effective_input_tokens}")
|
|
688
|
-
print(
|
|
689
|
-
f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
|
690
|
-
)
|
|
691
|
-
if self.usage_accumulator.context_usage_percentage:
|
|
692
|
-
print(
|
|
693
|
-
f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
|
694
|
-
)
|
|
695
|
-
if self.usage_accumulator.cache_hit_rate:
|
|
696
|
-
print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
|
697
|
-
print("===========================\n")
|
|
698
|
-
|
|
699
|
-
@classmethod
|
|
700
|
-
def convert_message_to_message_param(cls, message: Message, **kwargs) -> MessageParam:
|
|
701
|
-
"""Convert a response object to an input parameter object to allow LLM calls to be chained."""
|
|
702
|
-
content = []
|
|
703
|
-
|
|
704
|
-
for content_block in message.content:
|
|
705
|
-
if content_block.type == "text":
|
|
706
|
-
content.append(TextBlockParam(type="text", text=content_block.text))
|
|
707
|
-
elif content_block.type == "tool_use":
|
|
708
|
-
content.append(
|
|
709
|
-
ToolUseBlockParam(
|
|
710
|
-
type="tool_use",
|
|
711
|
-
name=content_block.name,
|
|
712
|
-
input=content_block.input,
|
|
713
|
-
id=content_block.id,
|
|
714
|
-
)
|
|
715
|
-
)
|
|
716
|
-
|
|
717
|
-
return MessageParam(role="assistant", content=content, **kwargs)
|