grasp_agents 0.5.8__tar.gz → 0.5.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/PKG-INFO +12 -13
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/README.md +11 -12
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/pyproject.toml +1 -1
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/cloud_llm.py +88 -109
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/litellm/converters.py +4 -2
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/litellm/lite_llm.py +90 -80
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/llm.py +52 -97
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/llm_agent.py +32 -36
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/llm_agent_memory.py +3 -2
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/llm_policy_executor.py +63 -33
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/converters.py +4 -2
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/openai_llm.py +66 -85
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/tool_converters.py +6 -4
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/processors/base_processor.py +18 -10
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/processors/parallel_processor.py +8 -6
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/processors/processor.py +10 -6
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/prompt_builder.py +22 -28
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/run_context.py +1 -1
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/runner.py +1 -1
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/converters.py +3 -1
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/tool.py +13 -5
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/workflow/workflow_processor.py +4 -4
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/.gitignore +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/LICENSE.md +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/__init__.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/costs_dict.yaml +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/errors.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/generics_utils.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/grasp_logging.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/http_client.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/litellm/__init__.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/litellm/completion_chunk_converters.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/litellm/completion_converters.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/litellm/message_converters.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/memory.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/__init__.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/completion_chunk_converters.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/completion_converters.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/content_converters.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/openai/message_converters.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/packet.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/packet_pool.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/printer.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/rate_limiting/__init__.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/rate_limiting/rate_limiter_chunked.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/rate_limiting/types.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/rate_limiting/utils.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/__init__.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/completion.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/completion_chunk.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/content.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/events.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/io.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/typing/message.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/usage_tracker.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/utils.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/workflow/__init__.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/workflow/looped_workflow.py +0 -0
- {grasp_agents-0.5.8 → grasp_agents-0.5.10}/src/grasp_agents/workflow/sequential_workflow.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: grasp_agents
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.10
|
4
4
|
Summary: Grasp Agents Library
|
5
5
|
License-File: LICENSE.md
|
6
6
|
Requires-Python: <4,>=3.11.4
|
@@ -37,31 +37,30 @@ Description-Content-Type: text/markdown
|
|
37
37
|
|
38
38
|
## Features
|
39
39
|
|
40
|
-
- Clean formulation of agents as generic entities over
|
41
|
-
- I/O schemas
|
42
|
-
- Memory
|
43
|
-
- Shared context
|
40
|
+
- Clean formulation of agents as generic entities over I/O schemas and shared context.
|
44
41
|
- Transparent implementation of common agentic patterns:
|
45
|
-
- Single-agent loops
|
42
|
+
- Single-agent loops
|
46
43
|
- Workflows (static communication topology), including loops
|
47
44
|
- Agents-as-tools for task delegation
|
48
45
|
- Freeform A2A communication via the in-process actor model
|
49
|
-
-
|
50
|
-
-
|
46
|
+
- Built-in parallel processing with flexible retries and rate limiting.
|
47
|
+
- Support for all popular API providers via LiteLLM.
|
48
|
+
- Granular event streaming with separate events for standard outputs, thinking, and tool calls.
|
49
|
+
- Callbacks via decorators or subclassing for straightforward customisation of agentic loops and context management.
|
51
50
|
|
52
51
|
## Project Structure
|
53
52
|
|
54
|
-
- `
|
55
|
-
- `packet.py`, `packet_pool.py`: Communication management.
|
53
|
+
- `processors/`, `llm_agent.py`: Core processor and agent class implementations.
|
54
|
+
- `packet.py`, `packet_pool.py`, `runner.py`: Communication management.
|
56
55
|
- `llm_policy_executor.py`: LLM actions and tool call loops.
|
57
56
|
- `prompt_builder.py`: Tools for constructing prompts.
|
58
57
|
- `workflow/`: Modules for defining and managing static agent workflows.
|
59
58
|
- `llm.py`, `cloud_llm.py`: LLM integration and base LLM functionalities.
|
60
59
|
- `openai/`: Modules specific to OpenAI API integration.
|
61
|
-
- `
|
60
|
+
- `litellm/`: Modules specific to LiteLLM integration.
|
61
|
+
- `memory.py`, `llm_agent_memory.py`: Basic agent memory management.
|
62
62
|
- `run_context.py`: Shared context management for agent runs.
|
63
63
|
- `usage_tracker.py`: Tracking of API usage and costs.
|
64
|
-
- `costs_dict.yaml`: Dictionary for cost tracking (update if needed).
|
65
64
|
- `rate_limiting/`: Basic rate limiting tools.
|
66
65
|
|
67
66
|
## Quickstart & Installation Variants (UV Package manager)
|
@@ -190,7 +189,7 @@ teacher = LLMAgent[None, Problem, None](
|
|
190
189
|
)
|
191
190
|
|
192
191
|
async def main():
|
193
|
-
ctx = RunContext[None](
|
192
|
+
ctx = RunContext[None](log_messages=True)
|
194
193
|
out = await teacher.run("start", ctx=ctx)
|
195
194
|
print(out.payloads[0])
|
196
195
|
print(ctx.usage_tracker.total_usage)
|
@@ -20,31 +20,30 @@
|
|
20
20
|
|
21
21
|
## Features
|
22
22
|
|
23
|
-
- Clean formulation of agents as generic entities over
|
24
|
-
- I/O schemas
|
25
|
-
- Memory
|
26
|
-
- Shared context
|
23
|
+
- Clean formulation of agents as generic entities over I/O schemas and shared context.
|
27
24
|
- Transparent implementation of common agentic patterns:
|
28
|
-
- Single-agent loops
|
25
|
+
- Single-agent loops
|
29
26
|
- Workflows (static communication topology), including loops
|
30
27
|
- Agents-as-tools for task delegation
|
31
28
|
- Freeform A2A communication via the in-process actor model
|
32
|
-
-
|
33
|
-
-
|
29
|
+
- Built-in parallel processing with flexible retries and rate limiting.
|
30
|
+
- Support for all popular API providers via LiteLLM.
|
31
|
+
- Granular event streaming with separate events for standard outputs, thinking, and tool calls.
|
32
|
+
- Callbacks via decorators or subclassing for straightforward customisation of agentic loops and context management.
|
34
33
|
|
35
34
|
## Project Structure
|
36
35
|
|
37
|
-
- `
|
38
|
-
- `packet.py`, `packet_pool.py`: Communication management.
|
36
|
+
- `processors/`, `llm_agent.py`: Core processor and agent class implementations.
|
37
|
+
- `packet.py`, `packet_pool.py`, `runner.py`: Communication management.
|
39
38
|
- `llm_policy_executor.py`: LLM actions and tool call loops.
|
40
39
|
- `prompt_builder.py`: Tools for constructing prompts.
|
41
40
|
- `workflow/`: Modules for defining and managing static agent workflows.
|
42
41
|
- `llm.py`, `cloud_llm.py`: LLM integration and base LLM functionalities.
|
43
42
|
- `openai/`: Modules specific to OpenAI API integration.
|
44
|
-
- `
|
43
|
+
- `litellm/`: Modules specific to LiteLLM integration.
|
44
|
+
- `memory.py`, `llm_agent_memory.py`: Basic agent memory management.
|
45
45
|
- `run_context.py`: Shared context management for agent runs.
|
46
46
|
- `usage_tracker.py`: Tracking of API usage and costs.
|
47
|
-
- `costs_dict.yaml`: Dictionary for cost tracking (update if needed).
|
48
47
|
- `rate_limiting/`: Basic rate limiting tools.
|
49
48
|
|
50
49
|
## Quickstart & Installation Variants (UV Package manager)
|
@@ -173,7 +172,7 @@ teacher = LLMAgent[None, Problem, None](
|
|
173
172
|
)
|
174
173
|
|
175
174
|
async def main():
|
176
|
-
ctx = RunContext[None](
|
175
|
+
ctx = RunContext[None](log_messages=True)
|
177
176
|
out = await teacher.run("start", ctx=ctx)
|
178
177
|
print(out.payloads[0])
|
179
178
|
print(ctx.usage_tracker.total_usage)
|
@@ -1,7 +1,8 @@
|
|
1
1
|
import logging
|
2
2
|
from abc import abstractmethod
|
3
|
-
from collections.abc import AsyncIterator, Mapping
|
3
|
+
from collections.abc import AsyncIterator, Mapping
|
4
4
|
from copy import deepcopy
|
5
|
+
from dataclasses import dataclass
|
5
6
|
from typing import Any, Generic, Required, cast
|
6
7
|
|
7
8
|
import httpx
|
@@ -58,111 +59,53 @@ LLMRateLimiter = RateLimiterC[
|
|
58
59
|
]
|
59
60
|
|
60
61
|
|
62
|
+
@dataclass(frozen=True)
|
61
63
|
class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co]):
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
model_id: str | None = None,
|
74
|
-
# Connection settings
|
75
|
-
async_http_client: httpx.AsyncClient | None = None,
|
76
|
-
async_http_client_params: (
|
77
|
-
dict[str, Any] | AsyncHTTPClientParams | None
|
78
|
-
) = None,
|
79
|
-
max_client_retries: int = 2,
|
80
|
-
# Rate limiting
|
81
|
-
rate_limiter: LLMRateLimiter | None = None,
|
82
|
-
# LLM response retries: try to regenerate to pass validation
|
83
|
-
max_response_retries: int = 0,
|
84
|
-
**kwargs: Any,
|
85
|
-
) -> None:
|
86
|
-
self.llm_settings: CloudLLMSettings | None
|
87
|
-
|
88
|
-
super().__init__(
|
89
|
-
model_name=model_name,
|
90
|
-
llm_settings=llm_settings,
|
91
|
-
converters=converters,
|
92
|
-
model_id=model_id,
|
93
|
-
tools=tools,
|
94
|
-
response_schema=response_schema,
|
95
|
-
response_schema_by_xml_tag=response_schema_by_xml_tag,
|
96
|
-
**kwargs,
|
97
|
-
)
|
98
|
-
|
99
|
-
self._model_name = model_name
|
100
|
-
self._api_provider = api_provider
|
101
|
-
self._apply_response_schema_via_provider = apply_response_schema_via_provider
|
102
|
-
|
103
|
-
if (
|
104
|
-
apply_response_schema_via_provider
|
105
|
-
and response_schema_by_xml_tag is not None
|
106
|
-
):
|
107
|
-
raise ValueError(
|
108
|
-
"Response schema by XML tag is not supported "
|
109
|
-
"when apply_response_schema_via_provider is True."
|
110
|
-
)
|
64
|
+
# Make this field keyword-only to avoid ordering issues with inherited defaulted fields
|
65
|
+
api_provider: APIProvider | None = None
|
66
|
+
llm_settings: SettingsT_co | None = None
|
67
|
+
rate_limiter: LLMRateLimiter | None = None
|
68
|
+
max_client_retries: int = 2 # HTTP client retries for network errors
|
69
|
+
max_response_retries: int = (
|
70
|
+
0 # LLM response retries: try to regenerate to pass validation
|
71
|
+
)
|
72
|
+
apply_response_schema_via_provider: bool = False
|
73
|
+
async_http_client: httpx.AsyncClient | None = None
|
74
|
+
async_http_client_params: dict[str, Any] | AsyncHTTPClientParams | None = None
|
111
75
|
|
112
|
-
|
113
|
-
if rate_limiter is not None:
|
114
|
-
self._rate_limiter = rate_limiter
|
76
|
+
def __post_init__(self) -> None:
|
77
|
+
if self.rate_limiter is not None:
|
115
78
|
logger.info(
|
116
|
-
f"[{self.__class__.__name__}] Set rate limit to
|
79
|
+
f"[{self.__class__.__name__}] Set rate limit to "
|
80
|
+
f"{self.rate_limiter.rpm} RPM"
|
117
81
|
)
|
118
82
|
|
119
|
-
self.
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
async_http_client_params
|
83
|
+
if self.async_http_client is None and self.async_http_client_params is not None:
|
84
|
+
object.__setattr__(
|
85
|
+
self,
|
86
|
+
"async_http_client",
|
87
|
+
create_simple_async_httpx_client(self.async_http_client_params),
|
125
88
|
)
|
126
89
|
|
127
|
-
self.max_client_retries = max_client_retries
|
128
|
-
self.max_response_retries = max_response_retries
|
129
|
-
|
130
|
-
@property
|
131
|
-
def api_provider(self) -> APIProvider:
|
132
|
-
return self._api_provider
|
133
|
-
|
134
|
-
@property
|
135
|
-
def rate_limiter(self) -> LLMRateLimiter | None:
|
136
|
-
return self._rate_limiter
|
137
|
-
|
138
|
-
@property
|
139
|
-
def tools(self) -> dict[str, BaseTool[BaseModel, Any, Any]] | None:
|
140
|
-
return self._tools
|
141
|
-
|
142
|
-
@tools.setter
|
143
|
-
def tools(self, tools: Sequence[BaseTool[BaseModel, Any, Any]] | None) -> None:
|
144
|
-
if not tools:
|
145
|
-
self._tools = None
|
146
|
-
return
|
147
|
-
strict_value = True if self._apply_response_schema_via_provider else None
|
148
|
-
for t in tools:
|
149
|
-
t.strict = strict_value
|
150
|
-
self._tools = {t.name: t for t in tools}
|
151
|
-
|
152
90
|
def _make_completion_kwargs(
|
153
91
|
self,
|
154
92
|
conversation: Messages,
|
93
|
+
response_schema: Any | None = None,
|
94
|
+
tools: Mapping[str, BaseTool[BaseModel, Any, Any]] | None = None,
|
155
95
|
tool_choice: ToolChoice | None = None,
|
156
96
|
n_choices: int | None = None,
|
157
97
|
) -> dict[str, Any]:
|
158
|
-
api_messages = [self.
|
98
|
+
api_messages = [self.converters.to_message(m) for m in conversation]
|
159
99
|
|
160
100
|
api_tools = None
|
161
101
|
api_tool_choice = None
|
162
|
-
if
|
163
|
-
|
102
|
+
if tools:
|
103
|
+
strict = True if self.apply_response_schema_via_provider else None
|
104
|
+
api_tools = [
|
105
|
+
self.converters.to_tool(t, strict=strict) for t in tools.values()
|
106
|
+
]
|
164
107
|
if tool_choice is not None:
|
165
|
-
api_tool_choice = self.
|
108
|
+
api_tool_choice = self.converters.to_tool_choice(tool_choice)
|
166
109
|
|
167
110
|
api_llm_settings = deepcopy(self.llm_settings or {})
|
168
111
|
|
@@ -170,7 +113,7 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
170
113
|
api_messages=api_messages,
|
171
114
|
api_tools=api_tools,
|
172
115
|
api_tool_choice=api_tool_choice,
|
173
|
-
api_response_schema=
|
116
|
+
api_response_schema=response_schema,
|
174
117
|
n_choices=n_choices,
|
175
118
|
**api_llm_settings,
|
176
119
|
)
|
@@ -206,24 +149,34 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
206
149
|
self,
|
207
150
|
conversation: Messages,
|
208
151
|
*,
|
152
|
+
response_schema: Any | None = None,
|
153
|
+
response_schema_by_xml_tag: Mapping[str, Any] | None = None,
|
154
|
+
tools: Mapping[str, BaseTool[BaseModel, Any, Any]] | None = None,
|
209
155
|
tool_choice: ToolChoice | None = None,
|
210
156
|
n_choices: int | None = None,
|
211
157
|
) -> Completion:
|
212
158
|
completion_kwargs = self._make_completion_kwargs(
|
213
|
-
conversation=conversation,
|
159
|
+
conversation=conversation,
|
160
|
+
response_schema=response_schema,
|
161
|
+
tools=tools,
|
162
|
+
tool_choice=tool_choice,
|
163
|
+
n_choices=n_choices,
|
214
164
|
)
|
215
165
|
|
216
|
-
if not self.
|
166
|
+
if not self.apply_response_schema_via_provider:
|
217
167
|
completion_kwargs.pop("api_response_schema", None)
|
218
168
|
api_completion = await self._get_completion(**completion_kwargs)
|
219
169
|
|
220
|
-
completion = self.
|
221
|
-
api_completion, name=self.model_id
|
222
|
-
)
|
170
|
+
completion = self.converters.from_completion(api_completion, name=self.model_id)
|
223
171
|
|
224
|
-
if not self.
|
225
|
-
self._validate_response(
|
226
|
-
|
172
|
+
if not self.apply_response_schema_via_provider:
|
173
|
+
self._validate_response(
|
174
|
+
completion,
|
175
|
+
response_schema=response_schema,
|
176
|
+
response_schema_by_xml_tag=response_schema_by_xml_tag,
|
177
|
+
)
|
178
|
+
if tools is not None:
|
179
|
+
self._validate_tool_calls(completion, tools=tools)
|
227
180
|
|
228
181
|
return completion
|
229
182
|
|
@@ -231,6 +184,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
231
184
|
self,
|
232
185
|
conversation: Messages,
|
233
186
|
*,
|
187
|
+
response_schema: Any | None = None,
|
188
|
+
response_schema_by_xml_tag: Mapping[str, Any] | None = None,
|
189
|
+
tools: Mapping[str, BaseTool[BaseModel, Any, Any]] | None = None,
|
234
190
|
tool_choice: ToolChoice | None = None,
|
235
191
|
n_choices: int | None = None,
|
236
192
|
proc_name: str | None = None,
|
@@ -241,6 +197,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
241
197
|
try:
|
242
198
|
return await self._generate_completion_once(
|
243
199
|
conversation, # type: ignore[return]
|
200
|
+
response_schema=response_schema,
|
201
|
+
response_schema_by_xml_tag=response_schema_by_xml_tag,
|
202
|
+
tools=tools,
|
244
203
|
tool_choice=tool_choice,
|
245
204
|
n_choices=n_choices,
|
246
205
|
)
|
@@ -263,7 +222,7 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
263
222
|
)
|
264
223
|
|
265
224
|
return make_refusal_completion(
|
266
|
-
self.
|
225
|
+
self.model_name,
|
267
226
|
Exception("Unexpected error: retry loop exited without returning"),
|
268
227
|
)
|
269
228
|
|
@@ -272,15 +231,22 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
272
231
|
self,
|
273
232
|
conversation: Messages,
|
274
233
|
*,
|
234
|
+
response_schema: Any | None = None,
|
235
|
+
response_schema_by_xml_tag: Mapping[str, Any] | None = None,
|
236
|
+
tools: Mapping[str, BaseTool[BaseModel, Any, Any]] | None = None,
|
275
237
|
tool_choice: ToolChoice | None = None,
|
276
238
|
n_choices: int | None = None,
|
277
239
|
proc_name: str | None = None,
|
278
240
|
call_id: str | None = None,
|
279
241
|
) -> AsyncIterator[CompletionChunkEvent[CompletionChunk] | CompletionEvent]:
|
280
242
|
completion_kwargs = self._make_completion_kwargs(
|
281
|
-
conversation=conversation,
|
243
|
+
conversation=conversation,
|
244
|
+
response_schema=response_schema,
|
245
|
+
tools=tools,
|
246
|
+
tool_choice=tool_choice,
|
247
|
+
n_choices=n_choices,
|
282
248
|
)
|
283
|
-
if not self.
|
249
|
+
if not self.apply_response_schema_via_provider:
|
284
250
|
completion_kwargs.pop("api_response_schema", None)
|
285
251
|
|
286
252
|
api_stream = self._get_completion_stream(**completion_kwargs)
|
@@ -293,7 +259,7 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
293
259
|
|
294
260
|
async for api_completion_chunk in api_stream:
|
295
261
|
api_completion_chunks.append(api_completion_chunk)
|
296
|
-
completion_chunk = self.
|
262
|
+
completion_chunk = self.converters.from_completion_chunk(
|
297
263
|
api_completion_chunk, name=self.model_id
|
298
264
|
)
|
299
265
|
|
@@ -301,16 +267,23 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
301
267
|
data=completion_chunk, proc_name=proc_name, call_id=call_id
|
302
268
|
)
|
303
269
|
|
304
|
-
api_completion = self.combine_completion_chunks(
|
305
|
-
|
270
|
+
api_completion = self.combine_completion_chunks(
|
271
|
+
api_completion_chunks, response_schema=response_schema, tools=tools
|
272
|
+
)
|
273
|
+
completion = self.converters.from_completion(
|
306
274
|
api_completion, name=self.model_id
|
307
275
|
)
|
308
276
|
|
309
277
|
yield CompletionEvent(data=completion, proc_name=proc_name, call_id=call_id)
|
310
278
|
|
311
|
-
if not self.
|
312
|
-
self._validate_response(
|
313
|
-
|
279
|
+
if not self.apply_response_schema_via_provider:
|
280
|
+
self._validate_response(
|
281
|
+
completion,
|
282
|
+
response_schema=response_schema,
|
283
|
+
response_schema_by_xml_tag=response_schema_by_xml_tag,
|
284
|
+
)
|
285
|
+
if tools is not None:
|
286
|
+
self._validate_tool_calls(completion, tools=tools)
|
314
287
|
|
315
288
|
return iterator()
|
316
289
|
|
@@ -318,6 +291,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
318
291
|
self,
|
319
292
|
conversation: Messages,
|
320
293
|
*,
|
294
|
+
response_schema: Any | None = None,
|
295
|
+
response_schema_by_xml_tag: Mapping[str, Any] | None = None,
|
296
|
+
tools: Mapping[str, BaseTool[BaseModel, Any, Any]] | None = None,
|
321
297
|
tool_choice: ToolChoice | None = None,
|
322
298
|
n_choices: int | None = None,
|
323
299
|
proc_name: str | None = None,
|
@@ -330,6 +306,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
330
306
|
try:
|
331
307
|
async for event in await self._generate_completion_stream_once( # type: ignore[return]
|
332
308
|
conversation, # type: ignore[arg-type]
|
309
|
+
response_schema=response_schema,
|
310
|
+
response_schema_by_xml_tag=response_schema_by_xml_tag,
|
311
|
+
tools=tools,
|
333
312
|
tool_choice=tool_choice,
|
334
313
|
n_choices=n_choices,
|
335
314
|
proc_name=proc_name,
|
@@ -339,7 +318,7 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
339
318
|
return
|
340
319
|
except (LLMResponseValidationError, LLMToolCallValidationError) as err:
|
341
320
|
err_data = LLMStreamingErrorData(
|
342
|
-
error=err, model_name=self.
|
321
|
+
error=err, model_name=self.model_name, model_id=self.model_id
|
343
322
|
)
|
344
323
|
yield LLMStreamingErrorEvent(
|
345
324
|
data=err_data, proc_name=proc_name, call_id=call_id
|
@@ -355,7 +334,7 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
355
334
|
f"retrying:\n{err}"
|
356
335
|
)
|
357
336
|
refusal_completion = make_refusal_completion(
|
358
|
-
self.
|
337
|
+
self.model_name, err
|
359
338
|
)
|
360
339
|
yield CompletionEvent(
|
361
340
|
data=refusal_completion,
|
@@ -118,8 +118,10 @@ class LiteLLMConverters(Converters):
|
|
118
118
|
return from_api_tool_message(raw_message, name=name, **kwargs)
|
119
119
|
|
120
120
|
@staticmethod
|
121
|
-
def to_tool(
|
122
|
-
|
121
|
+
def to_tool(
|
122
|
+
tool: BaseTool[BaseModel, Any, Any], strict: bool | None = None, **kwargs: Any
|
123
|
+
) -> OpenAIToolParam:
|
124
|
+
return to_api_tool(tool, strict=strict, **kwargs)
|
123
125
|
|
124
126
|
@staticmethod
|
125
127
|
def to_tool_choice(
|