lite-agent 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lite-agent might be problematic. Click here for more details.
- lite_agent/agent.py +177 -42
- lite_agent/chat_display.py +21 -13
- lite_agent/client.py +4 -0
- lite_agent/constants.py +30 -0
- lite_agent/message_transfers.py +3 -3
- lite_agent/processors/completion_event_processor.py +14 -20
- lite_agent/processors/response_event_processor.py +21 -15
- lite_agent/response_handlers/__init__.py +1 -0
- lite_agent/response_handlers/base.py +17 -9
- lite_agent/response_handlers/completion.py +35 -7
- lite_agent/response_handlers/responses.py +46 -12
- lite_agent/runner.py +302 -246
- lite_agent/types/__init__.py +2 -0
- lite_agent/types/messages.py +6 -5
- lite_agent/utils/__init__.py +0 -0
- lite_agent/utils/message_builder.py +211 -0
- lite_agent/utils/metrics.py +50 -0
- {lite_agent-0.6.0.dist-info → lite_agent-0.8.0.dist-info}/METADATA +2 -1
- lite_agent-0.8.0.dist-info/RECORD +31 -0
- lite_agent-0.6.0.dist-info/RECORD +0 -27
- {lite_agent-0.6.0.dist-info → lite_agent-0.8.0.dist-info}/WHEEL +0 -0
lite_agent/agent.py
CHANGED
|
@@ -7,9 +7,21 @@ from funcall import Funcall
|
|
|
7
7
|
from jinja2 import Environment, FileSystemLoader
|
|
8
8
|
|
|
9
9
|
from lite_agent.client import BaseLLMClient, LiteLLMClient, ReasoningConfig
|
|
10
|
+
from lite_agent.constants import CompletionMode, ToolName
|
|
10
11
|
from lite_agent.loggers import logger
|
|
11
12
|
from lite_agent.response_handlers import CompletionResponseHandler, ResponsesAPIHandler
|
|
12
|
-
from lite_agent.types import
|
|
13
|
+
from lite_agent.types import (
|
|
14
|
+
AgentChunk,
|
|
15
|
+
AssistantTextContent,
|
|
16
|
+
AssistantToolCall,
|
|
17
|
+
AssistantToolCallResult,
|
|
18
|
+
FunctionCallEvent,
|
|
19
|
+
FunctionCallOutputEvent,
|
|
20
|
+
RunnerMessages,
|
|
21
|
+
ToolCall,
|
|
22
|
+
message_to_llm_dict,
|
|
23
|
+
system_message_to_llm_dict,
|
|
24
|
+
)
|
|
13
25
|
from lite_agent.types.messages import NewAssistantMessage, NewSystemMessage, NewUserMessage
|
|
14
26
|
|
|
15
27
|
TEMPLATES_DIR = Path(__file__).parent / "templates"
|
|
@@ -32,10 +44,24 @@ class Agent:
|
|
|
32
44
|
message_transfer: Callable[[RunnerMessages], RunnerMessages] | None = None,
|
|
33
45
|
completion_condition: str = "stop",
|
|
34
46
|
reasoning: ReasoningConfig = None,
|
|
47
|
+
stop_before_tools: list[str] | list[Callable] | None = None,
|
|
35
48
|
) -> None:
|
|
36
49
|
self.name = name
|
|
37
50
|
self.instructions = instructions
|
|
38
51
|
self.reasoning = reasoning
|
|
52
|
+
# Convert stop_before_functions to function names
|
|
53
|
+
if stop_before_tools:
|
|
54
|
+
self.stop_before_functions = set()
|
|
55
|
+
for func in stop_before_tools:
|
|
56
|
+
if isinstance(func, str):
|
|
57
|
+
self.stop_before_functions.add(func)
|
|
58
|
+
elif callable(func):
|
|
59
|
+
self.stop_before_functions.add(func.__name__)
|
|
60
|
+
else:
|
|
61
|
+
msg = f"stop_before_functions must contain strings or callables, got {type(func)}"
|
|
62
|
+
raise TypeError(msg)
|
|
63
|
+
else:
|
|
64
|
+
self.stop_before_functions = set()
|
|
39
65
|
|
|
40
66
|
if isinstance(model, BaseLLMClient):
|
|
41
67
|
# If model is a BaseLLMClient instance, use it directly
|
|
@@ -54,7 +80,7 @@ class Agent:
|
|
|
54
80
|
self.fc = Funcall(tools)
|
|
55
81
|
|
|
56
82
|
# Add wait_for_user tool if completion condition is "call"
|
|
57
|
-
if completion_condition ==
|
|
83
|
+
if completion_condition == CompletionMode.CALL:
|
|
58
84
|
self._add_wait_for_user_tool()
|
|
59
85
|
|
|
60
86
|
# Set parent for handoff agents
|
|
@@ -99,7 +125,7 @@ class Agent:
|
|
|
99
125
|
|
|
100
126
|
# Add single dynamic tool for all transfers
|
|
101
127
|
self.fc.add_dynamic_tool(
|
|
102
|
-
name=
|
|
128
|
+
name=ToolName.TRANSFER_TO_AGENT,
|
|
103
129
|
description="Transfer conversation to another agent.",
|
|
104
130
|
parameters={
|
|
105
131
|
"name": {
|
|
@@ -129,7 +155,7 @@ class Agent:
|
|
|
129
155
|
|
|
130
156
|
# Add dynamic tool for parent transfer
|
|
131
157
|
self.fc.add_dynamic_tool(
|
|
132
|
-
name=
|
|
158
|
+
name=ToolName.TRANSFER_TO_PARENT,
|
|
133
159
|
description="Transfer conversation back to parent agent when current task is completed or cannot be solved by current agent",
|
|
134
160
|
parameters={},
|
|
135
161
|
required=[],
|
|
@@ -160,7 +186,7 @@ class Agent:
|
|
|
160
186
|
try:
|
|
161
187
|
# Try to remove the existing transfer tool
|
|
162
188
|
if hasattr(self.fc, "remove_dynamic_tool"):
|
|
163
|
-
self.fc.remove_dynamic_tool(
|
|
189
|
+
self.fc.remove_dynamic_tool(ToolName.TRANSFER_TO_AGENT)
|
|
164
190
|
except Exception as e:
|
|
165
191
|
# If removal fails, log and continue anyway
|
|
166
192
|
logger.debug(f"Failed to remove existing transfer tool: {e}")
|
|
@@ -205,31 +231,30 @@ class Agent:
|
|
|
205
231
|
for message in messages:
|
|
206
232
|
if isinstance(message, NewAssistantMessage):
|
|
207
233
|
for item in message.content:
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
)
|
|
234
|
+
if isinstance(item, AssistantTextContent):
|
|
235
|
+
res.append(
|
|
236
|
+
{
|
|
237
|
+
"role": "assistant",
|
|
238
|
+
"content": item.text,
|
|
239
|
+
},
|
|
240
|
+
)
|
|
241
|
+
elif isinstance(item, AssistantToolCall):
|
|
242
|
+
res.append(
|
|
243
|
+
{
|
|
244
|
+
"type": "function_call",
|
|
245
|
+
"call_id": item.call_id,
|
|
246
|
+
"name": item.name,
|
|
247
|
+
"arguments": item.arguments,
|
|
248
|
+
},
|
|
249
|
+
)
|
|
250
|
+
elif isinstance(item, AssistantToolCallResult):
|
|
251
|
+
res.append(
|
|
252
|
+
{
|
|
253
|
+
"type": "function_call_output",
|
|
254
|
+
"call_id": item.call_id,
|
|
255
|
+
"output": item.output,
|
|
256
|
+
},
|
|
257
|
+
)
|
|
233
258
|
elif isinstance(message, NewSystemMessage):
|
|
234
259
|
res.append(
|
|
235
260
|
{
|
|
@@ -269,9 +294,6 @@ class Agent:
|
|
|
269
294
|
"content": contents,
|
|
270
295
|
},
|
|
271
296
|
)
|
|
272
|
-
# Handle dict messages (legacy format)
|
|
273
|
-
elif isinstance(message, dict):
|
|
274
|
-
res.append(message)
|
|
275
297
|
return res
|
|
276
298
|
|
|
277
299
|
async def completion(
|
|
@@ -279,6 +301,7 @@ class Agent:
|
|
|
279
301
|
messages: RunnerMessages,
|
|
280
302
|
record_to_file: Path | None = None,
|
|
281
303
|
reasoning: ReasoningConfig = None,
|
|
304
|
+
*,
|
|
282
305
|
streaming: bool = True,
|
|
283
306
|
) -> AsyncGenerator[AgentChunk, None]:
|
|
284
307
|
# Apply message transfer callback if provided - always use legacy format for LLM compatibility
|
|
@@ -301,13 +324,14 @@ class Agent:
|
|
|
301
324
|
|
|
302
325
|
# Use response handler for unified processing
|
|
303
326
|
handler = CompletionResponseHandler()
|
|
304
|
-
return handler.handle(resp, streaming, record_to_file)
|
|
327
|
+
return handler.handle(resp, streaming=streaming, record_to=record_to_file)
|
|
305
328
|
|
|
306
329
|
async def responses(
|
|
307
330
|
self,
|
|
308
331
|
messages: RunnerMessages,
|
|
309
332
|
record_to_file: Path | None = None,
|
|
310
333
|
reasoning: ReasoningConfig = None,
|
|
334
|
+
*,
|
|
311
335
|
streaming: bool = True,
|
|
312
336
|
) -> AsyncGenerator[AgentChunk, None]:
|
|
313
337
|
# Apply message transfer callback if provided - always use legacy format for LLM compatibility
|
|
@@ -328,20 +352,29 @@ class Agent:
|
|
|
328
352
|
)
|
|
329
353
|
# Use response handler for unified processing
|
|
330
354
|
handler = ResponsesAPIHandler()
|
|
331
|
-
return handler.handle(resp, streaming, record_to_file)
|
|
355
|
+
return handler.handle(resp, streaming=streaming, record_to=record_to_file)
|
|
332
356
|
|
|
333
357
|
async def list_require_confirm_tools(self, tool_calls: Sequence[ToolCall] | None) -> Sequence[ToolCall]:
|
|
334
358
|
if not tool_calls:
|
|
335
359
|
return []
|
|
336
360
|
results = []
|
|
337
361
|
for tool_call in tool_calls:
|
|
338
|
-
|
|
362
|
+
function_name = tool_call.function.name
|
|
363
|
+
|
|
364
|
+
# Check if function is in dynamic stop_before_functions list
|
|
365
|
+
if function_name in self.stop_before_functions:
|
|
366
|
+
logger.debug('Tool call "%s" requires confirmation (stop_before_functions)', tool_call.id)
|
|
367
|
+
results.append(tool_call)
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
# Check decorator-based require_confirmation
|
|
371
|
+
tool_func = self.fc.function_registry.get(function_name)
|
|
339
372
|
if not tool_func:
|
|
340
|
-
logger.warning("Tool function %s not found in registry",
|
|
373
|
+
logger.warning("Tool function %s not found in registry", function_name)
|
|
341
374
|
continue
|
|
342
|
-
tool_meta = self.fc.get_tool_meta(
|
|
375
|
+
tool_meta = self.fc.get_tool_meta(function_name)
|
|
343
376
|
if tool_meta["require_confirm"]:
|
|
344
|
-
logger.debug('Tool call "%s" requires confirmation', tool_call.id)
|
|
377
|
+
logger.debug('Tool call "%s" requires confirmation (decorator)', tool_call.id)
|
|
345
378
|
results.append(tool_call)
|
|
346
379
|
return results
|
|
347
380
|
|
|
@@ -396,10 +429,42 @@ class Agent:
|
|
|
396
429
|
role = message_dict.get("role")
|
|
397
430
|
|
|
398
431
|
if role == "assistant":
|
|
399
|
-
#
|
|
432
|
+
# Extract tool_calls from content if present
|
|
400
433
|
tool_calls = []
|
|
434
|
+
content = message_dict.get("content", [])
|
|
435
|
+
|
|
436
|
+
# Handle both string and array content
|
|
437
|
+
if isinstance(content, list):
|
|
438
|
+
# Extract tool_calls from content array and filter out non-text content
|
|
439
|
+
filtered_content = []
|
|
440
|
+
for item in content:
|
|
441
|
+
if isinstance(item, dict):
|
|
442
|
+
if item.get("type") == "tool_call":
|
|
443
|
+
tool_call = {
|
|
444
|
+
"id": item.get("call_id", ""),
|
|
445
|
+
"type": "function",
|
|
446
|
+
"function": {
|
|
447
|
+
"name": item.get("name", ""),
|
|
448
|
+
"arguments": item.get("arguments", "{}"),
|
|
449
|
+
},
|
|
450
|
+
"index": len(tool_calls),
|
|
451
|
+
}
|
|
452
|
+
tool_calls.append(tool_call)
|
|
453
|
+
elif item.get("type") == "text":
|
|
454
|
+
filtered_content.append(item)
|
|
455
|
+
# Skip tool_call_result - they should be handled by separate function_call_output messages
|
|
456
|
+
|
|
457
|
+
# Update content to only include text items
|
|
458
|
+
if filtered_content:
|
|
459
|
+
message_dict = message_dict.copy()
|
|
460
|
+
message_dict["content"] = filtered_content
|
|
461
|
+
elif tool_calls:
|
|
462
|
+
# If we have tool_calls but no text content, set content to None per OpenAI API spec
|
|
463
|
+
message_dict = message_dict.copy()
|
|
464
|
+
message_dict["content"] = None
|
|
465
|
+
|
|
466
|
+
# Look ahead for function_call messages (legacy support)
|
|
401
467
|
j = i + 1
|
|
402
|
-
|
|
403
468
|
while j < len(messages):
|
|
404
469
|
next_message = messages[j]
|
|
405
470
|
next_dict = message_to_llm_dict(next_message) if isinstance(next_message, (NewUserMessage, NewSystemMessage, NewAssistantMessage)) else next_message
|
|
@@ -424,6 +489,13 @@ class Agent:
|
|
|
424
489
|
if tool_calls:
|
|
425
490
|
assistant_msg["tool_calls"] = tool_calls # type: ignore
|
|
426
491
|
|
|
492
|
+
# Convert content format for OpenAI API compatibility
|
|
493
|
+
content = assistant_msg.get("content", [])
|
|
494
|
+
if isinstance(content, list):
|
|
495
|
+
# Extract text content and convert to string using list comprehension
|
|
496
|
+
text_parts = [item.get("text", "") for item in content if isinstance(item, dict) and item.get("type") == "text"]
|
|
497
|
+
assistant_msg["content"] = " ".join(text_parts) if text_parts else None
|
|
498
|
+
|
|
427
499
|
converted_messages.append(assistant_msg)
|
|
428
500
|
i = j # Skip the function_call messages we've processed
|
|
429
501
|
|
|
@@ -536,10 +608,73 @@ class Agent:
|
|
|
536
608
|
|
|
537
609
|
# Add dynamic tool for task completion
|
|
538
610
|
self.fc.add_dynamic_tool(
|
|
539
|
-
name=
|
|
611
|
+
name=ToolName.WAIT_FOR_USER,
|
|
540
612
|
description="Call this function when you have completed your assigned task or need more information from the user.",
|
|
541
613
|
parameters={},
|
|
542
614
|
required=[],
|
|
543
615
|
handler=wait_for_user_handler,
|
|
544
616
|
)
|
|
545
617
|
|
|
618
|
+
def set_stop_before_functions(self, functions: list[str] | list[Callable]) -> None:
|
|
619
|
+
"""Set the list of functions that require confirmation before execution.
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
functions: List of function names (str) or callable objects
|
|
623
|
+
"""
|
|
624
|
+
self.stop_before_functions = set()
|
|
625
|
+
for func in functions:
|
|
626
|
+
if isinstance(func, str):
|
|
627
|
+
self.stop_before_functions.add(func)
|
|
628
|
+
elif callable(func):
|
|
629
|
+
self.stop_before_functions.add(func.__name__)
|
|
630
|
+
else:
|
|
631
|
+
msg = f"stop_before_functions must contain strings or callables, got {type(func)}"
|
|
632
|
+
raise TypeError(msg)
|
|
633
|
+
logger.debug(f"Set stop_before_functions to: {self.stop_before_functions}")
|
|
634
|
+
|
|
635
|
+
def add_stop_before_function(self, function: str | Callable) -> None:
|
|
636
|
+
"""Add a function to the stop_before_functions list.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
function: Function name (str) or callable object to add
|
|
640
|
+
"""
|
|
641
|
+
if isinstance(function, str):
|
|
642
|
+
function_name = function
|
|
643
|
+
elif callable(function):
|
|
644
|
+
function_name = function.__name__
|
|
645
|
+
else:
|
|
646
|
+
msg = f"function must be a string or callable, got {type(function)}"
|
|
647
|
+
raise TypeError(msg)
|
|
648
|
+
|
|
649
|
+
self.stop_before_functions.add(function_name)
|
|
650
|
+
logger.debug(f"Added '{function_name}' to stop_before_functions")
|
|
651
|
+
|
|
652
|
+
def remove_stop_before_function(self, function: str | Callable) -> None:
|
|
653
|
+
"""Remove a function from the stop_before_functions list.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
function: Function name (str) or callable object to remove
|
|
657
|
+
"""
|
|
658
|
+
if isinstance(function, str):
|
|
659
|
+
function_name = function
|
|
660
|
+
elif callable(function):
|
|
661
|
+
function_name = function.__name__
|
|
662
|
+
else:
|
|
663
|
+
msg = f"function must be a string or callable, got {type(function)}"
|
|
664
|
+
raise TypeError(msg)
|
|
665
|
+
|
|
666
|
+
self.stop_before_functions.discard(function_name)
|
|
667
|
+
logger.debug(f"Removed '{function_name}' from stop_before_functions")
|
|
668
|
+
|
|
669
|
+
def clear_stop_before_functions(self) -> None:
|
|
670
|
+
"""Clear all function names from the stop_before_functions list."""
|
|
671
|
+
self.stop_before_functions.clear()
|
|
672
|
+
logger.debug("Cleared all stop_before_functions")
|
|
673
|
+
|
|
674
|
+
def get_stop_before_functions(self) -> set[str]:
|
|
675
|
+
"""Get the current set of function names that require confirmation.
|
|
676
|
+
|
|
677
|
+
Returns:
|
|
678
|
+
Set of function names
|
|
679
|
+
"""
|
|
680
|
+
return self.stop_before_functions.copy()
|
lite_agent/chat_display.py
CHANGED
|
@@ -26,6 +26,8 @@ from lite_agent.types import (
|
|
|
26
26
|
AgentSystemMessage,
|
|
27
27
|
AgentUserMessage,
|
|
28
28
|
AssistantMessageMeta,
|
|
29
|
+
AssistantToolCall,
|
|
30
|
+
AssistantToolCallResult,
|
|
29
31
|
BasicMessageMeta,
|
|
30
32
|
FlexibleRunnerMessage,
|
|
31
33
|
LLMResponseMeta,
|
|
@@ -228,9 +230,9 @@ def _update_message_counts(message: FlexibleRunnerMessage, counts: dict[str, int
|
|
|
228
230
|
counts["Assistant"] += 1
|
|
229
231
|
# Count tool calls and outputs within the assistant message
|
|
230
232
|
for content_item in message.content:
|
|
231
|
-
if content_item
|
|
233
|
+
if isinstance(content_item, AssistantToolCall):
|
|
232
234
|
counts["Function Call"] += 1
|
|
233
|
-
elif content_item
|
|
235
|
+
elif isinstance(content_item, AssistantToolCallResult):
|
|
234
236
|
counts["Function Output"] += 1
|
|
235
237
|
elif isinstance(message, NewSystemMessage):
|
|
236
238
|
counts["System"] += 1
|
|
@@ -295,10 +297,18 @@ def _process_object_meta(meta: BasicMessageMeta | LLMResponseMeta | AssistantMes
|
|
|
295
297
|
"""处理对象类型的 meta 数据。"""
|
|
296
298
|
# LLMResponseMeta 和 AssistantMessageMeta 都有这些字段
|
|
297
299
|
if isinstance(meta, (LLMResponseMeta, AssistantMessageMeta)):
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
300
|
+
# For AssistantMessageMeta, use the structured usage field
|
|
301
|
+
if isinstance(meta, AssistantMessageMeta) and meta.usage is not None:
|
|
302
|
+
if meta.usage.input_tokens is not None:
|
|
303
|
+
total_input += int(meta.usage.input_tokens)
|
|
304
|
+
if meta.usage.output_tokens is not None:
|
|
305
|
+
total_output += int(meta.usage.output_tokens)
|
|
306
|
+
# For LLMResponseMeta, use the flat fields
|
|
307
|
+
elif isinstance(meta, LLMResponseMeta):
|
|
308
|
+
if hasattr(meta, "input_tokens") and meta.input_tokens is not None:
|
|
309
|
+
total_input += int(meta.input_tokens)
|
|
310
|
+
if hasattr(meta, "output_tokens") and meta.output_tokens is not None:
|
|
311
|
+
total_output += int(meta.output_tokens)
|
|
302
312
|
if hasattr(meta, "latency_ms") and meta.latency_ms is not None:
|
|
303
313
|
total_latency += int(meta.latency_ms)
|
|
304
314
|
if hasattr(meta, "output_time_ms") and meta.output_time_ms is not None:
|
|
@@ -363,11 +373,9 @@ def display_chat_summary(messages: RunnerMessages, *, console: Console | None =
|
|
|
363
373
|
messages: 要汇总的消息列表
|
|
364
374
|
console: Rich Console 实例,如果为 None 则创建新的
|
|
365
375
|
"""
|
|
366
|
-
|
|
367
|
-
console = Console()
|
|
368
|
-
|
|
376
|
+
active_console = console or Console()
|
|
369
377
|
summary_table = build_chat_summary_table(messages)
|
|
370
|
-
|
|
378
|
+
active_console.print(summary_table)
|
|
371
379
|
|
|
372
380
|
|
|
373
381
|
def display_messages(
|
|
@@ -577,9 +585,9 @@ def _display_assistant_message_compact_v2(message: AgentAssistantMessage, contex
|
|
|
577
585
|
meta_parts.append(f"Latency:{message.meta.latency_ms}ms")
|
|
578
586
|
if message.meta.output_time_ms is not None:
|
|
579
587
|
meta_parts.append(f"Output:{message.meta.output_time_ms}ms")
|
|
580
|
-
if message.meta.input_tokens is not None and message.meta.output_tokens is not None:
|
|
581
|
-
total_tokens = message.meta.input_tokens + message.meta.output_tokens
|
|
582
|
-
meta_parts.append(f"Tokens:↑{message.meta.input_tokens}↓{message.meta.output_tokens}={total_tokens}")
|
|
588
|
+
if message.meta.usage and message.meta.usage.input_tokens is not None and message.meta.usage.output_tokens is not None:
|
|
589
|
+
total_tokens = message.meta.usage.input_tokens + message.meta.usage.output_tokens
|
|
590
|
+
meta_parts.append(f"Tokens:↑{message.meta.usage.input_tokens}↓{message.meta.usage.output_tokens}={total_tokens}")
|
|
583
591
|
|
|
584
592
|
if meta_parts:
|
|
585
593
|
meta_info = f" [dim]({' | '.join(meta_parts)})[/dim]"
|
lite_agent/client.py
CHANGED
|
@@ -100,6 +100,7 @@ class BaseLLMClient(abc.ABC):
|
|
|
100
100
|
tools: list[ChatCompletionToolParam] | None = None,
|
|
101
101
|
tool_choice: str = "auto",
|
|
102
102
|
reasoning: ReasoningConfig = None,
|
|
103
|
+
*,
|
|
103
104
|
streaming: bool = True,
|
|
104
105
|
**kwargs: Any, # noqa: ANN401
|
|
105
106
|
) -> Any: # noqa: ANN401
|
|
@@ -112,6 +113,7 @@ class BaseLLMClient(abc.ABC):
|
|
|
112
113
|
tools: list[FunctionToolParam] | None = None,
|
|
113
114
|
tool_choice: Literal["none", "auto", "required"] = "auto",
|
|
114
115
|
reasoning: ReasoningConfig = None,
|
|
116
|
+
*,
|
|
115
117
|
streaming: bool = True,
|
|
116
118
|
**kwargs: Any, # noqa: ANN401
|
|
117
119
|
) -> Any: # noqa: ANN401
|
|
@@ -136,6 +138,7 @@ class LiteLLMClient(BaseLLMClient):
|
|
|
136
138
|
tools: list[ChatCompletionToolParam] | None = None,
|
|
137
139
|
tool_choice: str = "auto",
|
|
138
140
|
reasoning: ReasoningConfig = None,
|
|
141
|
+
*,
|
|
139
142
|
streaming: bool = True,
|
|
140
143
|
**kwargs: Any, # noqa: ANN401
|
|
141
144
|
) -> Any: # noqa: ANN401
|
|
@@ -187,6 +190,7 @@ class LiteLLMClient(BaseLLMClient):
|
|
|
187
190
|
tools: list[FunctionToolParam] | None = None,
|
|
188
191
|
tool_choice: Literal["none", "auto", "required"] = "auto",
|
|
189
192
|
reasoning: ReasoningConfig = None,
|
|
193
|
+
*,
|
|
190
194
|
streaming: bool = True,
|
|
191
195
|
**kwargs: Any, # noqa: ANN401
|
|
192
196
|
) -> Any: # type: ignore[return] # noqa: ANN401
|
lite_agent/constants.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CompletionMode:
|
|
5
|
+
"""Agent completion modes."""
|
|
6
|
+
|
|
7
|
+
STOP: Literal["stop"] = "stop" # Traditional completion until model decides to stop
|
|
8
|
+
CALL: Literal["call"] = "call" # Completion until specific tool is called
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ToolName:
|
|
12
|
+
"""System tool names."""
|
|
13
|
+
|
|
14
|
+
TRANSFER_TO_AGENT = "transfer_to_agent"
|
|
15
|
+
TRANSFER_TO_PARENT = "transfer_to_parent"
|
|
16
|
+
WAIT_FOR_USER = "wait_for_user"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StreamIncludes:
|
|
20
|
+
"""Default stream includes configuration."""
|
|
21
|
+
|
|
22
|
+
DEFAULT_INCLUDES = (
|
|
23
|
+
"completion_raw",
|
|
24
|
+
"usage",
|
|
25
|
+
"function_call",
|
|
26
|
+
"function_call_output",
|
|
27
|
+
"content_delta",
|
|
28
|
+
"function_call_delta",
|
|
29
|
+
"assistant_message",
|
|
30
|
+
)
|
lite_agent/message_transfers.py
CHANGED
|
@@ -5,7 +5,7 @@ This module provides common message transfer functions that can be used
|
|
|
5
5
|
with agents to preprocess messages before sending them to the API.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from lite_agent.types import RunnerMessages
|
|
8
|
+
from lite_agent.types import NewUserMessage, RunnerMessages, UserTextContent
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def consolidate_history_transfer(messages: RunnerMessages) -> RunnerMessages:
|
|
@@ -43,8 +43,8 @@ def consolidate_history_transfer(messages: RunnerMessages) -> RunnerMessages:
|
|
|
43
43
|
# Create the consolidated message
|
|
44
44
|
consolidated_content = "以下是目前发生的所有交互:\n\n" + "\n".join(xml_content) + "\n\n接下来该做什么?"
|
|
45
45
|
|
|
46
|
-
# Return a single user message
|
|
47
|
-
return [
|
|
46
|
+
# Return a single user message using NewMessage format
|
|
47
|
+
return [NewUserMessage(content=[UserTextContent(text=consolidated_content)])]
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
def _process_message_to_xml(message: dict | object) -> list[str]:
|
|
@@ -26,6 +26,7 @@ from lite_agent.types import (
|
|
|
26
26
|
ToolCallFunction,
|
|
27
27
|
UsageEvent,
|
|
28
28
|
)
|
|
29
|
+
from lite_agent.utils.metrics import TimingMetrics
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
class CompletionEventProcessor:
|
|
@@ -71,21 +72,18 @@ class CompletionEventProcessor:
|
|
|
71
72
|
if not self.yielded_content:
|
|
72
73
|
self.yielded_content = True
|
|
73
74
|
end_time = datetime.now(timezone.utc)
|
|
74
|
-
latency_ms =
|
|
75
|
-
output_time_ms =
|
|
76
|
-
# latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
|
|
77
|
-
if self._start_time and self._first_output_time:
|
|
78
|
-
latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
|
|
79
|
-
# output_time_ms: 从输出第一个字符到输出完成的时间差
|
|
80
|
-
if self._first_output_time and self._output_complete_time:
|
|
81
|
-
output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
|
|
75
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
76
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
82
77
|
|
|
83
78
|
usage = MessageUsage(
|
|
84
79
|
input_tokens=self._usage_data.get("input_tokens"),
|
|
85
80
|
output_tokens=self._usage_data.get("output_tokens"),
|
|
86
81
|
)
|
|
82
|
+
# Extract model information from chunk
|
|
83
|
+
model_name = getattr(chunk, "model", None)
|
|
87
84
|
meta = AssistantMessageMeta(
|
|
88
85
|
sent_at=end_time,
|
|
86
|
+
model=model_name,
|
|
89
87
|
latency_ms=latency_ms,
|
|
90
88
|
total_time_ms=output_time_ms,
|
|
91
89
|
usage=usage,
|
|
@@ -152,21 +150,18 @@ class CompletionEventProcessor:
|
|
|
152
150
|
if not self.yielded_content:
|
|
153
151
|
self.yielded_content = True
|
|
154
152
|
end_time = datetime.now(timezone.utc)
|
|
155
|
-
latency_ms =
|
|
156
|
-
output_time_ms =
|
|
157
|
-
# latency_ms: 从开始准备输出到 LLM 输出第一个字符的时间差
|
|
158
|
-
if self._start_time and self._first_output_time:
|
|
159
|
-
latency_ms = int((self._first_output_time - self._start_time).total_seconds() * 1000)
|
|
160
|
-
# output_time_ms: 从输出第一个字符到输出完成的时间差
|
|
161
|
-
if self._first_output_time and self._output_complete_time:
|
|
162
|
-
output_time_ms = int((self._output_complete_time - self._first_output_time).total_seconds() * 1000)
|
|
153
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
154
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
163
155
|
|
|
164
156
|
usage = MessageUsage(
|
|
165
157
|
input_tokens=self._usage_data.get("input_tokens"),
|
|
166
158
|
output_tokens=self._usage_data.get("output_tokens"),
|
|
167
159
|
)
|
|
160
|
+
# Extract model information from chunk
|
|
161
|
+
model_name = getattr(chunk, "model", None)
|
|
168
162
|
meta = AssistantMessageMeta(
|
|
169
163
|
sent_at=end_time,
|
|
164
|
+
model=model_name,
|
|
170
165
|
latency_ms=latency_ms,
|
|
171
166
|
total_time_ms=output_time_ms,
|
|
172
167
|
usage=usage,
|
|
@@ -199,10 +194,9 @@ class CompletionEventProcessor:
|
|
|
199
194
|
results.append(UsageEvent(usage=EventUsage(input_tokens=usage["prompt_tokens"], output_tokens=usage["completion_tokens"])))
|
|
200
195
|
|
|
201
196
|
# Then yield timing event if we have timing data
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
197
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
198
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
199
|
+
if latency_ms is not None and output_time_ms is not None:
|
|
206
200
|
results.append(
|
|
207
201
|
TimingEvent(
|
|
208
202
|
timing=Timing(
|
|
@@ -22,12 +22,14 @@ from lite_agent.types import (
|
|
|
22
22
|
ContentDeltaEvent,
|
|
23
23
|
EventUsage,
|
|
24
24
|
FunctionCallEvent,
|
|
25
|
+
MessageUsage,
|
|
25
26
|
NewAssistantMessage,
|
|
26
27
|
ResponseRawEvent,
|
|
27
28
|
Timing,
|
|
28
29
|
TimingEvent,
|
|
29
30
|
UsageEvent,
|
|
30
31
|
)
|
|
32
|
+
from lite_agent.utils.metrics import TimingMetrics
|
|
31
33
|
|
|
32
34
|
|
|
33
35
|
class ResponseEventProcessor:
|
|
@@ -111,21 +113,26 @@ class ResponseEventProcessor:
|
|
|
111
113
|
content = item.get("content", [])
|
|
112
114
|
if content and isinstance(content, list) and len(content) > 0:
|
|
113
115
|
end_time = datetime.now(timezone.utc)
|
|
114
|
-
latency_ms =
|
|
115
|
-
output_time_ms =
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
#
|
|
120
|
-
if
|
|
121
|
-
|
|
122
|
-
|
|
116
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
117
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
118
|
+
|
|
119
|
+
# Extract model information from event
|
|
120
|
+
model_name = getattr(event, "model", None)
|
|
121
|
+
# Debug: check if event has model info in different location
|
|
122
|
+
if hasattr(event, "response") and hasattr(event.response, "model"):
|
|
123
|
+
model_name = getattr(event.response, "model", None)
|
|
124
|
+
# Create usage information
|
|
125
|
+
usage = MessageUsage(
|
|
126
|
+
input_tokens=self._usage_data.get("input_tokens"),
|
|
127
|
+
output_tokens=self._usage_data.get("output_tokens"),
|
|
128
|
+
total_tokens=(self._usage_data.get("input_tokens") or 0) + (self._usage_data.get("output_tokens") or 0),
|
|
129
|
+
)
|
|
123
130
|
meta = AssistantMessageMeta(
|
|
124
131
|
sent_at=end_time,
|
|
132
|
+
model=model_name,
|
|
125
133
|
latency_ms=latency_ms,
|
|
126
134
|
output_time_ms=output_time_ms,
|
|
127
|
-
|
|
128
|
-
output_tokens=self._usage_data.get("output_tokens"),
|
|
135
|
+
usage=usage,
|
|
129
136
|
)
|
|
130
137
|
return [
|
|
131
138
|
AssistantMessageEvent(
|
|
@@ -173,10 +180,9 @@ class ResponseEventProcessor:
|
|
|
173
180
|
)
|
|
174
181
|
|
|
175
182
|
# Then yield timing event if we have timing data
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
latency_ms = TimingMetrics.calculate_latency_ms(self._start_time, self._first_output_time)
|
|
184
|
+
output_time_ms = TimingMetrics.calculate_output_time_ms(self._first_output_time, self._output_complete_time)
|
|
185
|
+
if latency_ms is not None and output_time_ms is not None:
|
|
180
186
|
results.append(
|
|
181
187
|
TimingEvent(
|
|
182
188
|
timing=Timing(
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Response handlers for unified streaming and non-streaming processing."""
|
|
2
|
+
|
|
2
3
|
from lite_agent.response_handlers.base import ResponseHandler
|
|
3
4
|
from lite_agent.response_handlers.completion import CompletionResponseHandler
|
|
4
5
|
from lite_agent.response_handlers.responses import ResponsesAPIHandler
|