fast-agent-mcp 0.3.15__py3-none-any.whl → 0.3.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fast-agent-mcp might be problematic. Click here for more details.

Files changed (39) hide show
  1. fast_agent/__init__.py +2 -0
  2. fast_agent/agents/agent_types.py +5 -0
  3. fast_agent/agents/llm_agent.py +7 -0
  4. fast_agent/agents/llm_decorator.py +6 -0
  5. fast_agent/agents/mcp_agent.py +134 -10
  6. fast_agent/cli/__main__.py +35 -0
  7. fast_agent/cli/commands/check_config.py +85 -0
  8. fast_agent/cli/commands/go.py +100 -36
  9. fast_agent/cli/constants.py +13 -1
  10. fast_agent/cli/main.py +1 -0
  11. fast_agent/config.py +39 -10
  12. fast_agent/constants.py +8 -0
  13. fast_agent/context.py +24 -15
  14. fast_agent/core/direct_decorators.py +9 -0
  15. fast_agent/core/fastagent.py +101 -1
  16. fast_agent/core/logging/listeners.py +8 -0
  17. fast_agent/interfaces.py +8 -0
  18. fast_agent/llm/fastagent_llm.py +45 -0
  19. fast_agent/llm/memory.py +26 -1
  20. fast_agent/llm/provider/anthropic/llm_anthropic.py +112 -0
  21. fast_agent/llm/provider/openai/llm_openai.py +184 -18
  22. fast_agent/llm/provider/openai/responses.py +133 -0
  23. fast_agent/resources/setup/agent.py +2 -0
  24. fast_agent/resources/setup/fastagent.config.yaml +6 -0
  25. fast_agent/skills/__init__.py +9 -0
  26. fast_agent/skills/registry.py +200 -0
  27. fast_agent/tools/shell_runtime.py +404 -0
  28. fast_agent/ui/console_display.py +396 -129
  29. fast_agent/ui/elicitation_form.py +76 -24
  30. fast_agent/ui/elicitation_style.py +2 -2
  31. fast_agent/ui/enhanced_prompt.py +81 -25
  32. fast_agent/ui/history_display.py +20 -5
  33. fast_agent/ui/interactive_prompt.py +108 -3
  34. fast_agent/ui/markdown_truncator.py +1 -1
  35. {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/METADATA +8 -7
  36. {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/RECORD +39 -35
  37. {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/WHEEL +0 -0
  38. {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/entry_points.txt +0 -0
  39. {fast_agent_mcp-0.3.15.dist-info → fast_agent_mcp-0.3.16.dist-info}/licenses/LICENSE +0 -0
@@ -64,6 +64,14 @@ def convert_log_event(event: Event) -> "ProgressEvent | None":
64
64
  chat_turn = event_data.get("chat_turn")
65
65
  if chat_turn is not None:
66
66
  details = f"{model} turn {chat_turn}"
67
+
68
+ tool_name = event_data.get("tool_name")
69
+ tool_event = event_data.get("tool_event")
70
+ if tool_name:
71
+ tool_suffix = tool_name
72
+ if tool_event:
73
+ tool_suffix = f"{tool_suffix} ({tool_event})"
74
+ details = f"{details} • {tool_suffix}".strip()
67
75
  else:
68
76
  if not target:
69
77
  target = event_data.get("target", "unknown")
fast_agent/interfaces.py CHANGED
@@ -87,9 +87,15 @@ class FastAgentLLMProtocol(Protocol):
87
87
 
88
88
  def add_stream_listener(self, listener: Callable[[str], None]) -> Callable[[], None]: ...
89
89
 
90
+ def add_tool_stream_listener(
91
+ self, listener: Callable[[str, Dict[str, Any] | None], None]
92
+ ) -> Callable[[], None]: ...
93
+
90
94
  @property
91
95
  def message_history(self) -> List[PromptMessageExtended]: ...
92
96
 
97
+ def pop_last_message(self) -> PromptMessageExtended | None: ...
98
+
93
99
  @property
94
100
  def usage_accumulator(self) -> UsageAccumulator | None: ...
95
101
 
@@ -123,6 +129,8 @@ class LlmAgentProtocol(Protocol):
123
129
 
124
130
  def clear(self, *, clear_prompts: bool = False) -> None: ...
125
131
 
132
+ def pop_last_message(self) -> PromptMessageExtended | None: ...
133
+
126
134
 
127
135
  class AgentProtocol(LlmAgentProtocol, Protocol):
128
136
  """Standard agent interface with flexible input types."""
@@ -159,6 +159,7 @@ class FastAgentLLM(ContextDependent, FastAgentLLMProtocol, Generic[MessageParamT
159
159
  # Initialize usage tracking
160
160
  self._usage_accumulator = UsageAccumulator()
161
161
  self._stream_listeners: set[Callable[[str], None]] = set()
162
+ self._tool_stream_listeners: set[Callable[[str, Dict[str, Any] | None], None]] = set()
162
163
 
163
164
  def _initialize_default_params(self, kwargs: dict) -> RequestParams:
164
165
  """Initialize default parameters for the LLM.
@@ -534,6 +535,37 @@ class FastAgentLLM(ContextDependent, FastAgentLLMProtocol, Generic[MessageParamT
534
535
  except Exception:
535
536
  self.logger.exception("Stream listener raised an exception")
536
537
 
538
+ def add_tool_stream_listener(
539
+ self, listener: Callable[[str, Dict[str, Any] | None], None]
540
+ ) -> Callable[[], None]:
541
+ """Register a callback invoked with tool streaming events.
542
+
543
+ Args:
544
+ listener: Callable receiving event_type (str) and optional info dict.
545
+
546
+ Returns:
547
+ A function that removes the listener when called.
548
+ """
549
+
550
+ self._tool_stream_listeners.add(listener)
551
+
552
+ def remove() -> None:
553
+ self._tool_stream_listeners.discard(listener)
554
+
555
+ return remove
556
+
557
+ def _notify_tool_stream_listeners(
558
+ self, event_type: str, payload: Dict[str, Any] | None = None
559
+ ) -> None:
560
+ """Notify listeners about tool streaming lifecycle events."""
561
+
562
+ data = payload or {}
563
+ for listener in list(self._tool_stream_listeners):
564
+ try:
565
+ listener(event_type, data)
566
+ except Exception:
567
+ self.logger.exception("Tool stream listener raised an exception")
568
+
537
569
  def _log_chat_finished(self, model: Optional[str] = None) -> None:
538
570
  """Log a chat finished event"""
539
571
  data = {
@@ -643,6 +675,19 @@ class FastAgentLLM(ContextDependent, FastAgentLLMProtocol, Generic[MessageParamT
643
675
  """
644
676
  return self._message_history
645
677
 
678
+ def pop_last_message(self) -> PromptMessageExtended | None:
679
+ """Remove and return the most recent message from the conversation history."""
680
+ if not self._message_history:
681
+ return None
682
+
683
+ removed = self._message_history.pop()
684
+ try:
685
+ self.history.pop()
686
+ except Exception:
687
+ # If provider-specific memory isn't available, ignore to avoid crashing UX
688
+ pass
689
+ return removed
690
+
646
691
  def clear(self, *, clear_prompts: bool = False) -> None:
647
692
  """Reset stored message history while optionally retaining prompt templates."""
648
693
 
fast_agent/llm/memory.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Generic, List, Protocol, TypeVar
1
+ from typing import Generic, List, Optional, Protocol, TypeVar
2
2
 
3
3
  # Define our own type variable for implementation use
4
4
  MessageParamT = TypeVar("MessageParamT")
@@ -23,6 +23,8 @@ class Memory(Protocol, Generic[MessageParamT]):
23
23
 
24
24
  def clear(self, clear_prompts: bool = False) -> None: ...
25
25
 
26
+ def pop(self, *, from_prompts: bool = False) -> Optional[MessageParamT]: ...
27
+
26
28
 
27
29
  class SimpleMemory(Memory, Generic[MessageParamT]):
28
30
  """
@@ -108,6 +110,29 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
108
110
  if clear_prompts:
109
111
  self.prompt_messages = []
110
112
 
113
+ def pop(self, *, from_prompts: bool = False) -> Optional[MessageParamT]:
114
+ """
115
+ Remove and return the most recent message from history or prompt messages.
116
+
117
+ Args:
118
+ from_prompts: If True, pop from prompt_messages instead of history
119
+
120
+ Returns:
121
+ The removed message if available, otherwise None
122
+ """
123
+ if from_prompts:
124
+ if not self.prompt_messages:
125
+ return None
126
+ return self.prompt_messages.pop()
127
+
128
+ if not self.history:
129
+ return None
130
+
131
+ removed = self.history.pop()
132
+ # Recalculate cache positions now that the history shrank
133
+ self.conversation_cache_positions = self._calculate_cache_positions(len(self.history))
134
+ return removed
135
+
111
136
  def should_apply_conversation_cache(self) -> bool:
112
137
  """
113
138
  Determine if conversation caching should be applied based on walking algorithm.
@@ -244,10 +244,114 @@ class AnthropicLLM(FastAgentLLM[MessageParam, Message]):
244
244
  """Process the streaming response and display real-time token usage."""
245
245
  # Track estimated output tokens by counting text chunks
246
246
  estimated_tokens = 0
247
+ tool_streams: dict[int, dict[str, Any]] = {}
247
248
 
248
249
  try:
249
250
  # Process the raw event stream to get token counts
250
251
  async for event in stream:
252
+ if (
253
+ event.type == "content_block_start"
254
+ and hasattr(event, "content_block")
255
+ and getattr(event.content_block, "type", None) == "tool_use"
256
+ ):
257
+ content_block = event.content_block
258
+ tool_streams[event.index] = {
259
+ "name": content_block.name,
260
+ "id": content_block.id,
261
+ "buffer": [],
262
+ }
263
+ self._notify_tool_stream_listeners(
264
+ "start",
265
+ {
266
+ "tool_name": content_block.name,
267
+ "tool_use_id": content_block.id,
268
+ "index": event.index,
269
+ "streams_arguments": False, # Anthropic doesn't stream arguments
270
+ },
271
+ )
272
+ self.logger.info(
273
+ "Model started streaming tool input",
274
+ data={
275
+ "progress_action": ProgressAction.CALLING_TOOL,
276
+ "agent_name": self.name,
277
+ "model": model,
278
+ "tool_name": content_block.name,
279
+ "tool_use_id": content_block.id,
280
+ "tool_event": "start",
281
+ },
282
+ )
283
+ continue
284
+
285
+ if (
286
+ event.type == "content_block_delta"
287
+ and hasattr(event, "delta")
288
+ and event.delta.type == "input_json_delta"
289
+ ):
290
+ info = tool_streams.get(event.index)
291
+ if info is not None:
292
+ chunk = event.delta.partial_json or ""
293
+ info["buffer"].append(chunk)
294
+ preview = chunk if len(chunk) <= 80 else chunk[:77] + "..."
295
+ self._notify_tool_stream_listeners(
296
+ "delta",
297
+ {
298
+ "tool_name": info.get("name"),
299
+ "tool_use_id": info.get("id"),
300
+ "index": event.index,
301
+ "chunk": chunk,
302
+ "streams_arguments": False,
303
+ },
304
+ )
305
+ self.logger.debug(
306
+ "Streaming tool input delta",
307
+ data={
308
+ "tool_name": info.get("name"),
309
+ "tool_use_id": info.get("id"),
310
+ "chunk": preview,
311
+ },
312
+ )
313
+ continue
314
+
315
+ if (
316
+ event.type == "content_block_stop"
317
+ and event.index in tool_streams
318
+ ):
319
+ info = tool_streams.pop(event.index)
320
+ preview_raw = "".join(info.get("buffer", []))
321
+ if preview_raw:
322
+ preview = (
323
+ preview_raw if len(preview_raw) <= 120 else preview_raw[:117] + "..."
324
+ )
325
+ self.logger.debug(
326
+ "Completed tool input stream",
327
+ data={
328
+ "tool_name": info.get("name"),
329
+ "tool_use_id": info.get("id"),
330
+ "input_preview": preview,
331
+ },
332
+ )
333
+ self._notify_tool_stream_listeners(
334
+ "stop",
335
+ {
336
+ "tool_name": info.get("name"),
337
+ "tool_use_id": info.get("id"),
338
+ "index": event.index,
339
+ "streams_arguments": False,
340
+ },
341
+ )
342
+ self.logger.info(
343
+ "Model finished streaming tool input",
344
+ data={
345
+ "progress_action": ProgressAction.CALLING_TOOL,
346
+ "agent_name": self.name,
347
+ "model": model,
348
+ "tool_name": info.get("name"),
349
+ "tool_use_id": info.get("id"),
350
+ "tool_event": "stop",
351
+ },
352
+ )
353
+ continue
354
+
251
355
  # Count tokens in real-time from content_block_delta events
252
356
  if (
253
357
  event.type == "content_block_delta"
@@ -258,6 +362,14 @@ class AnthropicLLM(FastAgentLLM[MessageParam, Message]):
258
362
  estimated_tokens = self._update_streaming_progress(
259
363
  event.delta.text, model, estimated_tokens
260
364
  )
365
+ self._notify_tool_stream_listeners(
366
+ "text",
367
+ {
368
+ "chunk": event.delta.text,
369
+ "index": event.index,
370
+ "streams_arguments": False,
371
+ },
372
+ )
261
373
 
262
374
  # Also check for final message_delta events with actual usage info
263
375
  elif (
@@ -7,7 +7,7 @@ from mcp.types import (
7
7
  ContentBlock,
8
8
  TextContent,
9
9
  )
10
- from openai import APIError, AsyncOpenAI, AuthenticationError
10
+ from openai import APIError, AsyncOpenAI, AuthenticationError, DefaultAioHttpClient
11
11
  from openai.lib.streaming.chat import ChatCompletionStreamState
12
12
 
13
13
  # from openai.types.beta.chat import
@@ -95,9 +95,19 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
95
95
  return self.context.config.openai.base_url if self.context.config.openai else None
96
96
 
97
97
  def _openai_client(self) -> AsyncOpenAI:
98
- try:
99
- return AsyncOpenAI(api_key=self._api_key(), base_url=self._base_url())
98
+ """
99
+ Create an OpenAI client instance.
100
+ Subclasses can override this to provide different client types (e.g., AzureOpenAI).
100
101
 
102
+ Note: The returned client should be used within an async context manager
103
+ to ensure proper cleanup of aiohttp sessions.
104
+ """
105
+ try:
106
+ return AsyncOpenAI(
107
+ api_key=self._api_key(),
108
+ base_url=self._base_url(),
109
+ http_client=DefaultAioHttpClient(),
110
+ )
101
111
  except AuthenticationError as e:
102
112
  raise ProviderKeyError(
103
113
  "Invalid OpenAI API key",
@@ -119,16 +129,93 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
119
129
  # Use ChatCompletionStreamState helper for accumulation (OpenAI only)
120
130
  state = ChatCompletionStreamState()
121
131
 
132
+ # Track tool call state for stream events
133
+ tool_call_started = {} # Maps index -> bool for tracking start events
134
+
122
135
  # Process the stream chunks
123
136
  async for chunk in stream:
124
137
  # Handle chunk accumulation
125
138
  state.handle_chunk(chunk)
126
139
 
127
- # Count tokens in real-time from content deltas
128
- if chunk.choices and chunk.choices[0].delta.content:
129
- content = chunk.choices[0].delta.content
130
- # Use base class method for token estimation and progress emission
131
- estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
140
+ # Process streaming events for tool calls
141
+ if chunk.choices:
142
+ choice = chunk.choices[0]
143
+ delta = choice.delta
144
+
145
+ # Handle tool call streaming
146
+ if delta.tool_calls:
147
+ for tool_call in delta.tool_calls:
148
+ index = tool_call.index
149
+
150
+ # Fire "start" event on first chunk for this tool call
151
+ if index not in tool_call_started and tool_call.id and tool_call.function and tool_call.function.name:
152
+ tool_call_started[index] = True
153
+ self._notify_tool_stream_listeners(
154
+ "start",
155
+ {
156
+ "tool_name": tool_call.function.name,
157
+ "tool_use_id": tool_call.id,
158
+ "index": index,
159
+ "streams_arguments": True, # OpenAI streams arguments!
160
+ },
161
+ )
162
+ self.logger.info(
163
+ "Model started streaming tool call",
164
+ data={
165
+ "progress_action": ProgressAction.CALLING_TOOL,
166
+ "agent_name": self.name,
167
+ "model": model,
168
+ "tool_name": tool_call.function.name,
169
+ "tool_use_id": tool_call.id,
170
+ "tool_event": "start",
171
+ },
172
+ )
173
+
174
+ # Fire "delta" event for argument chunks
175
+ if tool_call.function and tool_call.function.arguments:
176
+ self._notify_tool_stream_listeners(
177
+ "delta",
178
+ {
179
+ "tool_name": tool_call.function.name if tool_call.function.name else None,
180
+ "tool_use_id": tool_call.id,
181
+ "index": index,
182
+ "chunk": tool_call.function.arguments,
183
+ "streams_arguments": True,
184
+ },
185
+ )
186
+
187
+ # Handle text content streaming
188
+ if delta.content:
189
+ content = delta.content
190
+ # Use base class method for token estimation and progress emission
191
+ estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
192
+ self._notify_tool_stream_listeners(
193
+ "text",
194
+ {
195
+ "chunk": content,
196
+ "streams_arguments": True,
197
+ },
198
+ )
199
+
200
+ # Fire "stop" event when tool calls complete
201
+ if choice.finish_reason == "tool_calls":
202
+ for index in tool_call_started.keys():
203
+ self._notify_tool_stream_listeners(
204
+ "stop",
205
+ {
206
+ "index": index,
207
+ "streams_arguments": True,
208
+ },
209
+ )
210
+ self.logger.info(
211
+ "Model finished streaming tool call",
212
+ data={
213
+ "progress_action": ProgressAction.CALLING_TOOL,
214
+ "agent_name": self.name,
215
+ "model": model,
216
+ "tool_event": "stop",
217
+ },
218
+ )
132
219
 
133
220
  # Check if we hit the length limit to avoid LengthFinishReasonError
134
221
  current_snapshot = state.current_completion_snapshot
@@ -176,14 +263,92 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
176
263
  finish_reason = None
177
264
  usage_data = None
178
265
 
266
+ # Track tool call state for stream events
267
+ tool_call_started = {} # Maps index -> bool for tracking start events
268
+
179
269
  # Process the stream chunks manually
180
270
  async for chunk in stream:
181
- # Count tokens in real-time from content deltas
182
- if chunk.choices and chunk.choices[0].delta.content:
183
- content = chunk.choices[0].delta.content
184
- accumulated_content += content
185
- # Use base class method for token estimation and progress emission
186
- estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
271
+ # Process streaming events for tool calls
272
+ if chunk.choices:
273
+ choice = chunk.choices[0]
274
+ delta = choice.delta
275
+
276
+ # Handle tool call streaming
277
+ if delta.tool_calls:
278
+ for tool_call in delta.tool_calls:
279
+ if tool_call.index is not None:
280
+ index = tool_call.index
281
+
282
+ # Fire "start" event on first chunk for this tool call
283
+ if index not in tool_call_started and tool_call.id and tool_call.function and tool_call.function.name:
284
+ tool_call_started[index] = True
285
+ self._notify_tool_stream_listeners(
286
+ "start",
287
+ {
288
+ "tool_name": tool_call.function.name,
289
+ "tool_use_id": tool_call.id,
290
+ "index": index,
291
+ "streams_arguments": True, # OpenAI-compatible providers stream arguments
292
+ },
293
+ )
294
+ self.logger.info(
295
+ "Model started streaming tool call",
296
+ data={
297
+ "progress_action": ProgressAction.CALLING_TOOL,
298
+ "agent_name": self.name,
299
+ "model": model,
300
+ "tool_name": tool_call.function.name,
301
+ "tool_use_id": tool_call.id,
302
+ "tool_event": "start",
303
+ },
304
+ )
305
+
306
+ # Fire "delta" event for argument chunks
307
+ if tool_call.function and tool_call.function.arguments:
308
+ self._notify_tool_stream_listeners(
309
+ "delta",
310
+ {
311
+ "tool_name": tool_call.function.name if tool_call.function.name else None,
312
+ "tool_use_id": tool_call.id,
313
+ "index": index,
314
+ "chunk": tool_call.function.arguments,
315
+ "streams_arguments": True,
316
+ },
317
+ )
318
+
319
+ # Handle text content streaming
320
+ if delta.content:
321
+ content = delta.content
322
+ accumulated_content += content
323
+ # Use base class method for token estimation and progress emission
324
+ estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
325
+ self._notify_tool_stream_listeners(
326
+ "text",
327
+ {
328
+ "chunk": content,
329
+ "streams_arguments": True,
330
+ },
331
+ )
332
+
333
+ # Fire "stop" event when tool calls complete
334
+ if choice.finish_reason == "tool_calls":
335
+ for index in tool_call_started.keys():
336
+ self._notify_tool_stream_listeners(
337
+ "stop",
338
+ {
339
+ "index": index,
340
+ "streams_arguments": True,
341
+ },
342
+ )
343
+ self.logger.info(
344
+ "Model finished streaming tool call",
345
+ data={
346
+ "progress_action": ProgressAction.CALLING_TOOL,
347
+ "agent_name": self.name,
348
+ "model": model,
349
+ "tool_event": "stop",
350
+ },
351
+ )
187
352
 
188
353
  # Extract other fields from the chunk
189
354
  if chunk.choices:
@@ -343,11 +508,12 @@ class OpenAILLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage])
343
508
  self._log_chat_progress(self.chat_turn(), model=self.default_request_params.model)
344
509
  model_name = self.default_request_params.model or DEFAULT_OPENAI_MODEL
345
510
 
346
- # Use basic streaming API
511
+ # Use basic streaming API with context manager to properly close aiohttp session
347
512
  try:
348
- stream = await self._openai_client().chat.completions.create(**arguments)
349
- # Process the stream
350
- response = await self._process_stream(stream, model_name)
513
+ async with self._openai_client() as client:
514
+ stream = await client.chat.completions.create(**arguments)
515
+ # Process the stream
516
+ response = await self._process_stream(stream, model_name)
351
517
  except APIError as error:
352
518
  self.logger.error("APIError during OpenAI completion", exc_info=error)
353
519
  return self._stream_failure_response(error, model_name)
@@ -0,0 +1,133 @@
1
+ # from openai.types.beta.chat import
2
+ from typing import List
3
+
4
+ from mcp import Tool
5
+ from mcp.types import ContentBlock, TextContent
6
+ from openai import AsyncOpenAI
7
+ from openai.types.chat import (
8
+ ChatCompletionMessage,
9
+ ChatCompletionMessageParam,
10
+ )
11
+ from openai.types.responses import (
12
+ ResponseReasoningItem,
13
+ ResponseReasoningSummaryTextDeltaEvent,
14
+ ResponseTextDeltaEvent,
15
+ )
16
+
17
+ from fast_agent.constants import REASONING
18
+ from fast_agent.core.logging.logger import get_logger
19
+ from fast_agent.event_progress import ProgressAction
20
+ from fast_agent.llm.fastagent_llm import FastAgentLLM
21
+ from fast_agent.llm.provider_types import Provider
22
+ from fast_agent.llm.request_params import RequestParams
23
+ from fast_agent.mcp.prompt_message_extended import PromptMessageExtended
24
+ from fast_agent.types.llm_stop_reason import LlmStopReason
25
+
26
+ _logger = get_logger(__name__)
27
+
28
+ DEFAULT_RESPONSES_MODEL = "gpt-5-mini"
29
+ DEFAULT_REASONING_EFFORT = "medium"
30
+
31
+
32
+ # model selection
33
+ # system prompt
34
+ # usage info
35
+ # reasoning/thinking display and summary
36
+ # encrypted tokens
37
+
38
+
39
+ class ResponsesLLM(FastAgentLLM[ChatCompletionMessageParam, ChatCompletionMessage]):
40
+ """LLM implementation for OpenAI's Responses models."""
41
+
42
+ # OpenAI-specific parameter exclusions
43
+
44
+ def __init__(self, provider=Provider.RESPONSES, *args, **kwargs):
45
+ super().__init__(*args, provider=provider, **kwargs)
46
+
47
+ async def _responses_client(self) -> AsyncOpenAI:
48
+ return AsyncOpenAI(api_key=self._api_key())
49
+
50
+ async def _apply_prompt_provider_specific(
51
+ self,
52
+ multipart_messages: List[PromptMessageExtended],
53
+ request_params: RequestParams | None = None,
54
+ tools: List[Tool] | None = None,
55
+ is_template: bool = False,
56
+ ) -> PromptMessageExtended:
57
+ responses_client = await self._responses_client()
58
+
59
+ async with responses_client.responses.stream(
60
+ model="gpt-5-mini",
61
+ instructions="You are a helpful assistant.",
62
+ input=multipart_messages[-1].all_text(),
63
+ reasoning={"summary": "auto", "effort": DEFAULT_REASONING_EFFORT},
64
+ ) as stream:
65
+ reasoning_chars: int = 0
66
+ text_chars: int = 0
67
+
68
+ async for event in stream:
69
+ if isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
70
+ reasoning_chars += len(event.delta)
71
+ await self._emit_streaming_progress(
72
+ model="gpt-5-mini (thinking)",
73
+ new_total=reasoning_chars,
74
+ type=ProgressAction.THINKING,
75
+ )
76
+ if isinstance(event, ResponseTextDeltaEvent):
77
+ # Notify stream listeners with the delta text
78
+ self._notify_stream_listeners(event.delta)
79
+ text_chars += len(event.delta)
80
+ await self._emit_streaming_progress(
81
+ model="gpt-5-mini",
82
+ new_total=text_chars,
83
+ )
84
+
85
+ final_response = await stream.get_final_response()
86
+ reasoning_content: List[ContentBlock] = []
87
+ for output_item in final_response.output:
88
+ if isinstance(output_item, ResponseReasoningItem):
89
+ summary_text = "\n".join(part.text for part in output_item.summary if part.text)
90
+ # reasoning text is not supplied by openai - leaving for future use with other providers
91
+ reasoning_text = "".join(
92
+ chunk.text
93
+ for chunk in (output_item.content or [])
94
+ if chunk.type == "reasoning_text"
95
+ )
96
+ if summary_text.strip():
97
+ reasoning_content.append(TextContent(type="text", text=summary_text.strip()))
98
+ if reasoning_text.strip():
99
+ reasoning_content.append(
100
+ TextContent(type="text", text=reasoning_text.strip())
101
+ )
102
+ channels = {REASONING: reasoning_content} if reasoning_content else None
103
+
104
+ return PromptMessageExtended(
105
+ role="assistant",
106
+ channels=channels,
107
+ content=[TextContent(type="text", text=final_response.output_text)],
108
+ stop_reason=LlmStopReason.END_TURN,
109
+ )
110
+
111
+ async def _emit_streaming_progress(
112
+ self,
113
+ model: str,
114
+ new_total: int,
115
+ type: ProgressAction = ProgressAction.STREAMING,
116
+ ) -> None:
117
+ """Emit a streaming progress event.
118
+
119
+ Args:
120
+ model: The model being used.
121
+ new_total: The new total token count.
122
+ """
123
+ token_str = str(new_total).rjust(5)
124
+
125
+ # Emit progress event
126
+ data = {
127
+ "progress_action": type,
128
+ "model": model,
129
+ "agent_name": self.name,
130
+ "chat_turn": self.chat_turn(),
131
+ "details": token_str.strip(), # Token count goes in details for STREAMING action
132
+ }
133
+ self.logger.info("Streaming progress", data=data)
@@ -10,6 +10,8 @@ default_instruction = """You are a helpful AI Agent.
10
10
 
11
11
  {{serverInstructions}}
12
12
 
13
+ {{agentSkills}}
14
+
13
15
  The current date is {{currentDate}}."""
14
16
 
15
17