fast-agent-mcp 0.3.15__py3-none-any.whl → 0.3.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fast-agent-mcp might be problematic. Click here for more details.
- fast_agent/__init__.py +2 -0
- fast_agent/agents/agent_types.py +5 -0
- fast_agent/agents/llm_agent.py +7 -0
- fast_agent/agents/llm_decorator.py +6 -0
- fast_agent/agents/mcp_agent.py +134 -10
- fast_agent/cli/__main__.py +35 -0
- fast_agent/cli/commands/check_config.py +85 -0
- fast_agent/cli/commands/go.py +100 -36
- fast_agent/cli/constants.py +13 -1
- fast_agent/cli/main.py +1 -0
- fast_agent/config.py +39 -10
- fast_agent/constants.py +8 -0
- fast_agent/context.py +24 -15
- fast_agent/core/direct_decorators.py +9 -0
- fast_agent/core/fastagent.py +101 -1
- fast_agent/core/logging/listeners.py +8 -0
- fast_agent/interfaces.py +8 -0
- fast_agent/llm/fastagent_llm.py +45 -0
- fast_agent/llm/memory.py +26 -1
- fast_agent/llm/provider/anthropic/llm_anthropic.py +112 -0
- fast_agent/llm/provider/openai/llm_openai.py +184 -18
- fast_agent/llm/provider/openai/responses.py +133 -0
- fast_agent/resources/setup/agent.py +2 -0
- fast_agent/resources/setup/fastagent.config.yaml +6 -0
- fast_agent/skills/__init__.py +9 -0
- fast_agent/skills/registry.py +200 -0
- fast_agent/tools/shell_runtime.py +404 -0
- fast_agent/ui/console_display.py +396 -129
- fast_agent/ui/elicitation_form.py +76 -24
- fast_agent/ui/elicitation_style.py +2 -2
- fast_agent/ui/enhanced_prompt.py +81 -25
- fast_agent/ui/history_display.py +20 -5
- fast_agent/ui/interactive_prompt.py +108 -3
- fast_agent/ui/markdown_truncator.py +1 -1
- {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/METADATA +8 -7
- {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/RECORD +39 -35
- {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/licenses/LICENSE +0 -0
|
@@ -64,6 +64,14 @@ def convert_log_event(event: Event) -> "ProgressEvent | None":
|
|
|
64
64
|
chat_turn = event_data.get("chat_turn")
|
|
65
65
|
if chat_turn is not None:
|
|
66
66
|
details = f"{model} turn {chat_turn}"
|
|
67
|
+
|
|
68
|
+
tool_name = event_data.get("tool_name")
|
|
69
|
+
tool_event = event_data.get("tool_event")
|
|
70
|
+
if tool_name:
|
|
71
|
+
tool_suffix = tool_name
|
|
72
|
+
if tool_event:
|
|
73
|
+
tool_suffix = f"{tool_suffix} ({tool_event})"
|
|
74
|
+
details = f"{details} • {tool_suffix}".strip()
|
|
67
75
|
else:
|
|
68
76
|
if not target:
|
|
69
77
|
target = event_data.get("target", "unknown")
|
fast_agent/interfaces.py
CHANGED
|
@@ -87,9 +87,15 @@ class FastAgentLLMProtocol(Protocol):
|
|
|
87
87
|
|
|
88
88
|
def add_stream_listener(self, listener: Callable[[str], None]) -> Callable[[], None]: ...
|
|
89
89
|
|
|
90
|
+
def add_tool_stream_listener(
|
|
91
|
+
self, listener: Callable[[str, Dict[str, Any] | None], None]
|
|
92
|
+
) -> Callable[[], None]: ...
|
|
93
|
+
|
|
90
94
|
@property
|
|
91
95
|
def message_history(self) -> List[PromptMessageExtended]: ...
|
|
92
96
|
|
|
97
|
+
def pop_last_message(self) -> PromptMessageExtended | None: ...
|
|
98
|
+
|
|
93
99
|
@property
|
|
94
100
|
def usage_accumulator(self) -> UsageAccumulator | None: ...
|
|
95
101
|
|
|
@@ -123,6 +129,8 @@ class LlmAgentProtocol(Protocol):
|
|
|
123
129
|
|
|
124
130
|
def clear(self, *, clear_prompts: bool = False) -> None: ...
|
|
125
131
|
|
|
132
|
+
def pop_last_message(self) -> PromptMessageExtended | None: ...
|
|
133
|
+
|
|
126
134
|
|
|
127
135
|
class AgentProtocol(LlmAgentProtocol, Protocol):
|
|
128
136
|
"""Standard agent interface with flexible input types."""
|
fast_agent/llm/fastagent_llm.py
CHANGED
|
@@ -159,6 +159,7 @@ class FastAgentLLM(ContextDependent, FastAgentLLMProtocol, Generic[MessageParamT
|
|
|
159
159
|
# Initialize usage tracking
|
|
160
160
|
self._usage_accumulator = UsageAccumulator()
|
|
161
161
|
self._stream_listeners: set[Callable[[str], None]] = set()
|
|
162
|
+
self._tool_stream_listeners: set[Callable[[str, Dict[str, Any] | None], None]] = set()
|
|
162
163
|
|
|
163
164
|
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
|
164
165
|
"""Initialize default parameters for the LLM.
|
|
@@ -534,6 +535,37 @@ class FastAgentLLM(ContextDependent, FastAgentLLMProtocol, Generic[MessageParamT
|
|
|
534
535
|
except Exception:
|
|
535
536
|
self.logger.exception("Stream listener raised an exception")
|
|
536
537
|
|
|
538
|
+
def add_tool_stream_listener(
|
|
539
|
+
self, listener: Callable[[str, Dict[str, Any] | None], None]
|
|
540
|
+
) -> Callable[[], None]:
|
|
541
|
+
"""Register a callback invoked with tool streaming events.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
listener: Callable receiving event_type (str) and optional info dict.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
A function that removes the listener when called.
|
|
548
|
+
"""
|
|
549
|
+
|
|
550
|
+
self._tool_stream_listeners.add(listener)
|
|
551
|
+
|
|
552
|
+
def remove() -> None:
|
|
553
|
+
self._tool_stream_listeners.discard(listener)
|
|
554
|
+
|
|
555
|
+
return remove
|
|
556
|
+
|
|
557
|
+
def _notify_tool_stream_listeners(
|
|
558
|
+
self, event_type: str, payload: Dict[str, Any] | None = None
|
|
559
|
+
) -> None:
|
|
560
|
+
"""Notify listeners about tool streaming lifecycle events."""
|
|
561
|
+
|
|
562
|
+
data = payload or {}
|
|
563
|
+
for listener in list(self._tool_stream_listeners):
|
|
564
|
+
try:
|
|
565
|
+
listener(event_type, data)
|
|
566
|
+
except Exception:
|
|
567
|
+
self.logger.exception("Tool stream listener raised an exception")
|
|
568
|
+
|
|
537
569
|
def _log_chat_finished(self, model: Optional[str] = None) -> None:
|
|
538
570
|
"""Log a chat finished event"""
|
|
539
571
|
data = {
|
|
@@ -643,6 +675,19 @@ class FastAgentLLM(ContextDependent, FastAgentLLMProtocol, Generic[MessageParamT
|
|
|
643
675
|
"""
|
|
644
676
|
return self._message_history
|
|
645
677
|
|
|
678
|
+
def pop_last_message(self) -> PromptMessageExtended | None:
|
|
679
|
+
"""Remove and return the most recent message from the conversation history."""
|
|
680
|
+
if not self._message_history:
|
|
681
|
+
return None
|
|
682
|
+
|
|
683
|
+
removed = self._message_history.pop()
|
|
684
|
+
try:
|
|
685
|
+
self.history.pop()
|
|
686
|
+
except Exception:
|
|
687
|
+
# If provider-specific memory isn't available, ignore to avoid crashing UX
|
|
688
|
+
pass
|
|
689
|
+
return removed
|
|
690
|
+
|
|
646
691
|
def clear(self, *, clear_prompts: bool = False) -> None:
|
|
647
692
|
"""Reset stored message history while optionally retaining prompt templates."""
|
|
648
693
|
|
fast_agent/llm/memory.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Generic, List, Protocol, TypeVar
|
|
1
|
+
from typing import Generic, List, Optional, Protocol, TypeVar
|
|
2
2
|
|
|
3
3
|
# Define our own type variable for implementation use
|
|
4
4
|
MessageParamT = TypeVar("MessageParamT")
|
|
@@ -23,6 +23,8 @@ class Memory(Protocol, Generic[MessageParamT]):
|
|
|
23
23
|
|
|
24
24
|
def clear(self, clear_prompts: bool = False) -> None: ...
|
|
25
25
|
|
|
26
|
+
def pop(self, *, from_prompts: bool = False) -> Optional[MessageParamT]: ...
|
|
27
|
+
|
|
26
28
|
|
|
27
29
|
class SimpleMemory(Memory, Generic[MessageParamT]):
|
|
28
30
|
"""
|
|
@@ -108,6 +110,29 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
|
|
|
108
110
|
if clear_prompts:
|
|
109
111
|
self.prompt_messages = []
|
|
110
112
|
|
|
113
|
+
def pop(self, *, from_prompts: bool = False) -> Optional[MessageParamT]:
|
|
114
|
+
"""
|
|
115
|
+
Remove and return the most recent message from history or prompt messages.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
from_prompts: If True, pop from prompt_messages instead of history
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
The removed message if available, otherwise None
|
|
122
|
+
"""
|
|
123
|
+
if from_prompts:
|
|
124
|
+
if not self.prompt_messages:
|
|
125
|
+
return None
|
|
126
|
+
return self.prompt_messages.pop()
|
|
127
|
+
|
|
128
|
+
if not self.history:
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
removed = self.history.pop()
|
|
132
|
+
# Recalculate cache positions now that the history shrank
|
|
133
|
+
self.conversation_cache_positions = self._calculate_cache_positions(len(self.history))
|
|
134
|
+
return removed
|
|
135
|
+
|
|
111
136
|
def should_apply_conversation_cache(self) -> bool:
|
|
112
137
|
"""
|
|
113
138
|
Determine if conversation caching should be applied based on walking algorithm.
|
|
@@ -244,10 +244,114 @@ class AnthropicLLM(FastAgentLLM[MessageParam, Message]):
|
|
|
244
244
|
"""Process the streaming response and display real-time token usage."""
|
|
245
245
|
# Track estimated output tokens by counting text chunks
|
|
246
246
|
estimated_tokens = 0
|
|
247
|
+
tool_streams: dict[int, dict[str, Any]] = {}
|
|
247
248
|
|
|
248
249
|
try:
|
|
249
250
|
# Process the raw event stream to get token counts
|
|
250
251
|
async for event in stream:
|
|
252
|
+
if (
|
|
253
|
+
event.type == "content_block_start"
|
|
254
|
+
and hasattr(event, "content_block")
|
|
255
|
+
and getattr(event.content_block, "type", None) == "tool_use"
|
|
256
|
+
):
|
|
257
|
+
content_block = event.content_block
|
|
258
|
+
tool_streams[event.index] = {
|
|
259
|
+
"name": content_block.name,
|
|
260
|
+
"id": content_block.id,
|
|
261
|
+
"buffer": [],
|
|
262
|
+
}
|
|
263
|
+
self._notify_tool_stream_listeners(
|
|
264
|
+
"start",
|
|
265
|
+
{
|
|
266
|
+
"tool_name": content_block.name,
|
|
267
|
+
"tool_use_id": content_block.id,
|
|
268
|
+
"index": event.index,
|
|
269
|
+
"streams_arguments": False, # Anthropic doesn't stream arguments
|
|
270
|
+
},
|
|
271
|
+
)
|
|
272
|
+
self.logger.info(
|
|
273
|
+
"Model started streaming tool input",
|
|
274
|
+
data={
|
|
275
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
276
|
+
"agent_name": self.name,
|
|
277
|
+
"model": model,
|
|
278
|
+
"tool_name": content_block.name,
|
|
279
|
+
"tool_use_id": content_block.id,
|
|
280
|
+
"tool_event": "start",
|
|
281
|
+
},
|
|
282
|
+
)
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
if (
|
|
286
|
+
event.type == "content_block_delta"
|
|
287
|
+
and hasattr(event, "delta")
|
|
288
|
+
and event.delta.type == "input_json_delta"
|
|
289
|
+
):
|
|
290
|
+
info = tool_streams.get(event.index)
|
|
291
|
+
if info is not None:
|
|
292
|
+
chunk = event.delta.partial_json or ""
|
|
293
|
+
info["buffer"].append(chunk)
|
|
294
|
+
preview = chunk if len(chunk) <= 80 else chunk[:77] + "..."
|
|
295
|
+
self._notify_tool_stream_listeners(
|
|
296
|
+
"delta",
|
|
297
|
+
{
|
|
298
|
+
"tool_name": info.get("name"),
|
|
299
|
+
"tool_use_id": info.get("id"),
|
|
300
|
+
"index": event.index,
|
|
301
|
+
"chunk": chunk,
|
|
302
|
+
"streams_arguments": False,
|
|
303
|
+
},
|
|
304
|
+
)
|
|
305
|
+
self.logger.debug(
|
|
306
|
+
"Streaming tool input delta",
|
|
307
|
+
data={
|
|
308
|
+
"tool_name": info.get("name"),
|
|
309
|
+
"tool_use_id": info.get("id"),
|
|
310
|
+
"chunk": preview,
|
|
311
|
+
},
|
|
312
|
+
)
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
if (
|
|
316
|
+
event.type == "content_block_stop"
|
|
317
|
+
and event.index in tool_streams
|
|
318
|
+
):
|
|
319
|
+
info = tool_streams.pop(event.index)
|
|
320
|
+
preview_raw = "".join(info.get("buffer", []))
|
|
321
|
+
if preview_raw:
|
|
322
|
+
preview = (
|
|
323
|
+
preview_raw if len(preview_raw) <= 120 else preview_raw[:117] + "..."
|
|
324
|
+
)
|
|
325
|
+
self.logger.debug(
|
|
326
|
+
"Completed tool input stream",
|
|
327
|
+
data={
|
|
328
|
+
"tool_name": info.get("name"),
|
|
329
|
+
"tool_use_id": info.get("id"),
|
|
330
|
+
"input_preview": preview,
|
|
331
|
+
},
|
|
332
|
+
)
|
|
333
|
+
self._notify_tool_stream_listeners(
|
|
334
|
+
"stop",
|
|
335
|
+
{
|
|
336
|
+
"tool_name": info.get("name"),
|
|
337
|
+
"tool_use_id": info.get("id"),
|
|
338
|
+
"index": event.index,
|
|
339
|
+
"streams_arguments": False,
|
|
340
|
+
},
|
|
341
|
+
)
|
|
342
|
+
self.logger.info(
|
|
343
|
+
"Model finished streaming tool input",
|
|
344
|
+
data={
|
|
345
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
346
|
+
"agent_name": self.name,
|
|
347
|
+
"model": model,
|
|
348
|
+
"tool_name": info.get("name"),
|
|
349
|
+
"tool_use_id": info.get("id"),
|
|
350
|
+
"tool_event": "stop",
|
|
351
|
+
},
|
|
352
|
+
)
|
|
353
|
+
continue
|
|
354
|
+
|
|
251
355
|
# Count tokens in real-time from content_block_delta events
|
|
252
356
|
if (
|
|
253
357
|
event.type == "content_block_delta"
|
|
@@ -258,6 +362,14 @@ class AnthropicLLM(FastAgentLLM[MessageParam, Message]):
|
|
|
258
362
|
estimated_tokens = self._update_streaming_progress(
|
|
259
363
|
event.delta.text, model, estimated_tokens
|
|
260
364
|
)
|
|
365
|
+
self._notify_tool_stream_listeners(
|
|
366
|
+
"text",
|
|
367
|
+
{
|
|
368
|
+
"chunk": event.delta.text,
|
|
369
|
+
"index": event.index,
|
|
370
|
+
"streams_arguments": False,
|
|
371
|
+
},
|
|
372
|
+
)
|
|
261
373
|
|
|
262
374
|
# Also check for final message_delta events with actual usage info
|
|
263
375
|
elif (
|
|
@@ -7,7 +7,7 @@ from mcp.types import (
|
|
|
7
7
|
ContentBlock,
|
|
8
8
|
TextContent,
|
|
9
9
|
)
|
|
10
|
-
from openai import APIError, AsyncOpenAI, AuthenticationError
|
|
10
|
+
from openai import APIError, AsyncOpenAI, AuthenticationError, DefaultAioHttpClient
|
|
11
11
|
from openai.lib.streaming.chat import ChatCompletionStreamState
|
|
12
12
|
|
|
13
13
|
# from openai.types.beta.chat import
|
|
@@ -95,9 +95,19 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
|
|
|
95
95
|
return self.context.config.openai.base_url if self.context.config.openai else None
|
|
96
96
|
|
|
97
97
|
def _openai_client(self) -> AsyncOpenAI:
|
|
98
|
-
|
|
99
|
-
|
|
98
|
+
"""
|
|
99
|
+
Create an OpenAI client instance.
|
|
100
|
+
Subclasses can override this to provide different client types (e.g., AzureOpenAI).
|
|
100
101
|
|
|
102
|
+
Note: The returned client should be used within an async context manager
|
|
103
|
+
to ensure proper cleanup of aiohttp sessions.
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
return AsyncOpenAI(
|
|
107
|
+
api_key=self._api_key(),
|
|
108
|
+
base_url=self._base_url(),
|
|
109
|
+
http_client=DefaultAioHttpClient(),
|
|
110
|
+
)
|
|
101
111
|
except AuthenticationError as e:
|
|
102
112
|
raise ProviderKeyError(
|
|
103
113
|
"Invalid OpenAI API key",
|
|
@@ -119,16 +129,93 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
|
|
|
119
129
|
# Use ChatCompletionStreamState helper for accumulation (OpenAI only)
|
|
120
130
|
state = ChatCompletionStreamState()
|
|
121
131
|
|
|
132
|
+
# Track tool call state for stream events
|
|
133
|
+
tool_call_started = {} # Maps index -> bool for tracking start events
|
|
134
|
+
|
|
122
135
|
# Process the stream chunks
|
|
123
136
|
async for chunk in stream:
|
|
124
137
|
# Handle chunk accumulation
|
|
125
138
|
state.handle_chunk(chunk)
|
|
126
139
|
|
|
127
|
-
#
|
|
128
|
-
if chunk.choices
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
140
|
+
# Process streaming events for tool calls
|
|
141
|
+
if chunk.choices:
|
|
142
|
+
choice = chunk.choices[0]
|
|
143
|
+
delta = choice.delta
|
|
144
|
+
|
|
145
|
+
# Handle tool call streaming
|
|
146
|
+
if delta.tool_calls:
|
|
147
|
+
for tool_call in delta.tool_calls:
|
|
148
|
+
index = tool_call.index
|
|
149
|
+
|
|
150
|
+
# Fire "start" event on first chunk for this tool call
|
|
151
|
+
if index not in tool_call_started and tool_call.id and tool_call.function and tool_call.function.name:
|
|
152
|
+
tool_call_started[index] = True
|
|
153
|
+
self._notify_tool_stream_listeners(
|
|
154
|
+
"start",
|
|
155
|
+
{
|
|
156
|
+
"tool_name": tool_call.function.name,
|
|
157
|
+
"tool_use_id": tool_call.id,
|
|
158
|
+
"index": index,
|
|
159
|
+
"streams_arguments": True, # OpenAI streams arguments!
|
|
160
|
+
},
|
|
161
|
+
)
|
|
162
|
+
self.logger.info(
|
|
163
|
+
"Model started streaming tool call",
|
|
164
|
+
data={
|
|
165
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
166
|
+
"agent_name": self.name,
|
|
167
|
+
"model": model,
|
|
168
|
+
"tool_name": tool_call.function.name,
|
|
169
|
+
"tool_use_id": tool_call.id,
|
|
170
|
+
"tool_event": "start",
|
|
171
|
+
},
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Fire "delta" event for argument chunks
|
|
175
|
+
if tool_call.function and tool_call.function.arguments:
|
|
176
|
+
self._notify_tool_stream_listeners(
|
|
177
|
+
"delta",
|
|
178
|
+
{
|
|
179
|
+
"tool_name": tool_call.function.name if tool_call.function.name else None,
|
|
180
|
+
"tool_use_id": tool_call.id,
|
|
181
|
+
"index": index,
|
|
182
|
+
"chunk": tool_call.function.arguments,
|
|
183
|
+
"streams_arguments": True,
|
|
184
|
+
},
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Handle text content streaming
|
|
188
|
+
if delta.content:
|
|
189
|
+
content = delta.content
|
|
190
|
+
# Use base class method for token estimation and progress emission
|
|
191
|
+
estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
|
|
192
|
+
self._notify_tool_stream_listeners(
|
|
193
|
+
"text",
|
|
194
|
+
{
|
|
195
|
+
"chunk": content,
|
|
196
|
+
"streams_arguments": True,
|
|
197
|
+
},
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Fire "stop" event when tool calls complete
|
|
201
|
+
if choice.finish_reason == "tool_calls":
|
|
202
|
+
for index in tool_call_started.keys():
|
|
203
|
+
self._notify_tool_stream_listeners(
|
|
204
|
+
"stop",
|
|
205
|
+
{
|
|
206
|
+
"index": index,
|
|
207
|
+
"streams_arguments": True,
|
|
208
|
+
},
|
|
209
|
+
)
|
|
210
|
+
self.logger.info(
|
|
211
|
+
"Model finished streaming tool call",
|
|
212
|
+
data={
|
|
213
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
214
|
+
"agent_name": self.name,
|
|
215
|
+
"model": model,
|
|
216
|
+
"tool_event": "stop",
|
|
217
|
+
},
|
|
218
|
+
)
|
|
132
219
|
|
|
133
220
|
# Check if we hit the length limit to avoid LengthFinishReasonError
|
|
134
221
|
current_snapshot = state.current_completion_snapshot
|
|
@@ -176,14 +263,92 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
|
|
|
176
263
|
finish_reason = None
|
|
177
264
|
usage_data = None
|
|
178
265
|
|
|
266
|
+
# Track tool call state for stream events
|
|
267
|
+
tool_call_started = {} # Maps index -> bool for tracking start events
|
|
268
|
+
|
|
179
269
|
# Process the stream chunks manually
|
|
180
270
|
async for chunk in stream:
|
|
181
|
-
#
|
|
182
|
-
if chunk.choices
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
271
|
+
# Process streaming events for tool calls
|
|
272
|
+
if chunk.choices:
|
|
273
|
+
choice = chunk.choices[0]
|
|
274
|
+
delta = choice.delta
|
|
275
|
+
|
|
276
|
+
# Handle tool call streaming
|
|
277
|
+
if delta.tool_calls:
|
|
278
|
+
for tool_call in delta.tool_calls:
|
|
279
|
+
if tool_call.index is not None:
|
|
280
|
+
index = tool_call.index
|
|
281
|
+
|
|
282
|
+
# Fire "start" event on first chunk for this tool call
|
|
283
|
+
if index not in tool_call_started and tool_call.id and tool_call.function and tool_call.function.name:
|
|
284
|
+
tool_call_started[index] = True
|
|
285
|
+
self._notify_tool_stream_listeners(
|
|
286
|
+
"start",
|
|
287
|
+
{
|
|
288
|
+
"tool_name": tool_call.function.name,
|
|
289
|
+
"tool_use_id": tool_call.id,
|
|
290
|
+
"index": index,
|
|
291
|
+
"streams_arguments": True, # OpenAI-compatible providers stream arguments
|
|
292
|
+
},
|
|
293
|
+
)
|
|
294
|
+
self.logger.info(
|
|
295
|
+
"Model started streaming tool call",
|
|
296
|
+
data={
|
|
297
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
298
|
+
"agent_name": self.name,
|
|
299
|
+
"model": model,
|
|
300
|
+
"tool_name": tool_call.function.name,
|
|
301
|
+
"tool_use_id": tool_call.id,
|
|
302
|
+
"tool_event": "start",
|
|
303
|
+
},
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Fire "delta" event for argument chunks
|
|
307
|
+
if tool_call.function and tool_call.function.arguments:
|
|
308
|
+
self._notify_tool_stream_listeners(
|
|
309
|
+
"delta",
|
|
310
|
+
{
|
|
311
|
+
"tool_name": tool_call.function.name if tool_call.function.name else None,
|
|
312
|
+
"tool_use_id": tool_call.id,
|
|
313
|
+
"index": index,
|
|
314
|
+
"chunk": tool_call.function.arguments,
|
|
315
|
+
"streams_arguments": True,
|
|
316
|
+
},
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Handle text content streaming
|
|
320
|
+
if delta.content:
|
|
321
|
+
content = delta.content
|
|
322
|
+
accumulated_content += content
|
|
323
|
+
# Use base class method for token estimation and progress emission
|
|
324
|
+
estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
|
|
325
|
+
self._notify_tool_stream_listeners(
|
|
326
|
+
"text",
|
|
327
|
+
{
|
|
328
|
+
"chunk": content,
|
|
329
|
+
"streams_arguments": True,
|
|
330
|
+
},
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Fire "stop" event when tool calls complete
|
|
334
|
+
if choice.finish_reason == "tool_calls":
|
|
335
|
+
for index in tool_call_started.keys():
|
|
336
|
+
self._notify_tool_stream_listeners(
|
|
337
|
+
"stop",
|
|
338
|
+
{
|
|
339
|
+
"index": index,
|
|
340
|
+
"streams_arguments": True,
|
|
341
|
+
},
|
|
342
|
+
)
|
|
343
|
+
self.logger.info(
|
|
344
|
+
"Model finished streaming tool call",
|
|
345
|
+
data={
|
|
346
|
+
"progress_action": ProgressAction.CALLING_TOOL,
|
|
347
|
+
"agent_name": self.name,
|
|
348
|
+
"model": model,
|
|
349
|
+
"tool_event": "stop",
|
|
350
|
+
},
|
|
351
|
+
)
|
|
187
352
|
|
|
188
353
|
# Extract other fields from the chunk
|
|
189
354
|
if chunk.choices:
|
|
@@ -343,11 +508,12 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
|
|
|
343
508
|
self._log_chat_progress(self.chat_turn(), model=self.default_request_params.model)
|
|
344
509
|
model_name = self.default_request_params.model or DEFAULT_OPENAI_MODEL
|
|
345
510
|
|
|
346
|
-
# Use basic streaming API
|
|
511
|
+
# Use basic streaming API with context manager to properly close aiohttp session
|
|
347
512
|
try:
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
513
|
+
async with self._openai_client() as client:
|
|
514
|
+
stream = await client.chat.completions.create(**arguments)
|
|
515
|
+
# Process the stream
|
|
516
|
+
response = await self._process_stream(stream, model_name)
|
|
351
517
|
except APIError as error:
|
|
352
518
|
self.logger.error("APIError during OpenAI completion", exc_info=error)
|
|
353
519
|
return self._stream_failure_response(error, model_name)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# from openai.types.beta.chat import
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from mcp import Tool
|
|
5
|
+
from mcp.types import ContentBlock, TextContent
|
|
6
|
+
from openai import AsyncOpenAI
|
|
7
|
+
from openai.types.chat import (
|
|
8
|
+
ChatCompletionMessage,
|
|
9
|
+
ChatCompletionMessageParam,
|
|
10
|
+
)
|
|
11
|
+
from openai.types.responses import (
|
|
12
|
+
ResponseReasoningItem,
|
|
13
|
+
ResponseReasoningSummaryTextDeltaEvent,
|
|
14
|
+
ResponseTextDeltaEvent,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from fast_agent.constants import REASONING
|
|
18
|
+
from fast_agent.core.logging.logger import get_logger
|
|
19
|
+
from fast_agent.event_progress import ProgressAction
|
|
20
|
+
from fast_agent.llm.fastagent_llm import FastAgentLLM
|
|
21
|
+
from fast_agent.llm.provider_types import Provider
|
|
22
|
+
from fast_agent.llm.request_params import RequestParams
|
|
23
|
+
from fast_agent.mcp.prompt_message_extended import PromptMessageExtended
|
|
24
|
+
from fast_agent.types.llm_stop_reason import LlmStopReason
|
|
25
|
+
|
|
26
|
+
_logger = get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
DEFAULT_RESPONSES_MODEL = "gpt-5-mini"
|
|
29
|
+
DEFAULT_REASONING_EFFORT = "medium"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# model selection
|
|
33
|
+
# system prompt
|
|
34
|
+
# usage info
|
|
35
|
+
# reasoning/thinking display and summary
|
|
36
|
+
# encrypted tokens
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ResponsesLLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage]):
|
|
40
|
+
"""LLM implementation for OpenAI's Responses models."""
|
|
41
|
+
|
|
42
|
+
# OpenAI-specific parameter exclusions
|
|
43
|
+
|
|
44
|
+
def __init__(self, provider=Provider.RESPONSES, *args, **kwargs):
|
|
45
|
+
super().__init__(*args, provider=provider, **kwargs)
|
|
46
|
+
|
|
47
|
+
async def _responses_client(self) -> AsyncOpenAI:
|
|
48
|
+
return AsyncOpenAI(api_key=self._api_key())
|
|
49
|
+
|
|
50
|
+
async def _apply_prompt_provider_specific(
|
|
51
|
+
self,
|
|
52
|
+
multipart_messages: List[PromptMessageExtended],
|
|
53
|
+
request_params: RequestParams | None = None,
|
|
54
|
+
tools: List[Tool] | None = None,
|
|
55
|
+
is_template: bool = False,
|
|
56
|
+
) -> PromptMessageExtended:
|
|
57
|
+
responses_client = await self._responses_client()
|
|
58
|
+
|
|
59
|
+
async with responses_client.responses.stream(
|
|
60
|
+
model="gpt-5-mini",
|
|
61
|
+
instructions="You are a helpful assistant.",
|
|
62
|
+
input=multipart_messages[-1].all_text(),
|
|
63
|
+
reasoning={"summary": "auto", "effort": DEFAULT_REASONING_EFFORT},
|
|
64
|
+
) as stream:
|
|
65
|
+
reasoning_chars: int = 0
|
|
66
|
+
text_chars: int = 0
|
|
67
|
+
|
|
68
|
+
async for event in stream:
|
|
69
|
+
if isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
|
|
70
|
+
reasoning_chars += len(event.delta)
|
|
71
|
+
await self._emit_streaming_progress(
|
|
72
|
+
model="gpt-5-mini (thinking)",
|
|
73
|
+
new_total=reasoning_chars,
|
|
74
|
+
type=ProgressAction.THINKING,
|
|
75
|
+
)
|
|
76
|
+
if isinstance(event, ResponseTextDeltaEvent):
|
|
77
|
+
# Notify stream listeners with the delta text
|
|
78
|
+
self._notify_stream_listeners(event.delta)
|
|
79
|
+
text_chars += len(event.delta)
|
|
80
|
+
await self._emit_streaming_progress(
|
|
81
|
+
model="gpt-5-mini",
|
|
82
|
+
new_total=text_chars,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
final_response = await stream.get_final_response()
|
|
86
|
+
reasoning_content: List[ContentBlock] = []
|
|
87
|
+
for output_item in final_response.output:
|
|
88
|
+
if isinstance(output_item, ResponseReasoningItem):
|
|
89
|
+
summary_text = "\n".join(part.text for part in output_item.summary if part.text)
|
|
90
|
+
# reasoning text is not supplied by openai - leaving for future use with other providers
|
|
91
|
+
reasoning_text = "".join(
|
|
92
|
+
chunk.text
|
|
93
|
+
for chunk in (output_item.content or [])
|
|
94
|
+
if chunk.type == "reasoning_text"
|
|
95
|
+
)
|
|
96
|
+
if summary_text.strip():
|
|
97
|
+
reasoning_content.append(TextContent(type="text", text=summary_text.strip()))
|
|
98
|
+
if reasoning_text.strip():
|
|
99
|
+
reasoning_content.append(
|
|
100
|
+
TextContent(type="text", text=reasoning_text.strip())
|
|
101
|
+
)
|
|
102
|
+
channels = {REASONING: reasoning_content} if reasoning_content else None
|
|
103
|
+
|
|
104
|
+
return PromptMessageExtended(
|
|
105
|
+
role="assistant",
|
|
106
|
+
channels=channels,
|
|
107
|
+
content=[TextContent(type="text", text=final_response.output_text)],
|
|
108
|
+
stop_reason=LlmStopReason.END_TURN,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
async def _emit_streaming_progress(
|
|
112
|
+
self,
|
|
113
|
+
model: str,
|
|
114
|
+
new_total: int,
|
|
115
|
+
type: ProgressAction = ProgressAction.STREAMING,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Emit a streaming progress event.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
model: The model being used.
|
|
121
|
+
new_total: The new total token count.
|
|
122
|
+
"""
|
|
123
|
+
token_str = str(new_total).rjust(5)
|
|
124
|
+
|
|
125
|
+
# Emit progress event
|
|
126
|
+
data = {
|
|
127
|
+
"progress_action": type,
|
|
128
|
+
"model": model,
|
|
129
|
+
"agent_name": self.name,
|
|
130
|
+
"chat_turn": self.chat_turn(),
|
|
131
|
+
"details": token_str.strip(), # Token count goes in details for STREAMING action
|
|
132
|
+
}
|
|
133
|
+
self.logger.info("Streaming progress", data=data)
|