grasp_agents 0.5.6__tar.gz → 0.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/PKG-INFO +12 -13
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/README.md +11 -12
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/pyproject.toml +1 -1
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/cloud_llm.py +11 -5
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/lite_llm.py +22 -1
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm.py +134 -1
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm_policy_executor.py +9 -3
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/openai_llm.py +11 -3
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/packet_pool.py +23 -43
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/printer.py +75 -77
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/processors/parallel_processor.py +15 -13
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/runner.py +27 -24
- grasp_agents-0.5.9/src/grasp_agents/typing/completion_chunk.py +506 -0
- grasp_agents-0.5.9/src/grasp_agents/typing/events.py +376 -0
- grasp_agents-0.5.6/src/grasp_agents/typing/completion_chunk.py +0 -207
- grasp_agents-0.5.6/src/grasp_agents/typing/events.py +0 -170
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/.gitignore +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/LICENSE.md +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/__init__.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/costs_dict.yaml +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/errors.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/generics_utils.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/grasp_logging.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/http_client.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/__init__.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/completion_chunk_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/completion_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/message_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm_agent.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm_agent_memory.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/memory.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/__init__.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/completion_chunk_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/completion_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/content_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/message_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/tool_converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/packet.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/processors/base_processor.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/processors/processor.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/prompt_builder.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/__init__.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/rate_limiter_chunked.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/types.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/utils.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/run_context.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/__init__.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/completion.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/content.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/converters.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/io.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/message.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/tool.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/usage_tracker.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/utils.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/__init__.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/looped_workflow.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/sequential_workflow.py +0 -0
- {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/workflow_processor.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: grasp_agents
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.9
|
4
4
|
Summary: Grasp Agents Library
|
5
5
|
License-File: LICENSE.md
|
6
6
|
Requires-Python: <4,>=3.11.4
|
@@ -37,31 +37,30 @@ Description-Content-Type: text/markdown
|
|
37
37
|
|
38
38
|
## Features
|
39
39
|
|
40
|
-
- Clean formulation of agents as generic entities over
|
41
|
-
- I/O schemas
|
42
|
-
- Memory
|
43
|
-
- Shared context
|
40
|
+
- Clean formulation of agents as generic entities over I/O schemas and shared context.
|
44
41
|
- Transparent implementation of common agentic patterns:
|
45
|
-
- Single-agent loops
|
42
|
+
- Single-agent loops
|
46
43
|
- Workflows (static communication topology), including loops
|
47
44
|
- Agents-as-tools for task delegation
|
48
45
|
- Freeform A2A communication via the in-process actor model
|
49
|
-
-
|
50
|
-
-
|
46
|
+
- Built-in parallel processing with flexible retries and rate limiting.
|
47
|
+
- Support for all popular API providers via LiteLLM.
|
48
|
+
- Granular event streaming with separate events for standard outputs, thinking, and tool calls.
|
49
|
+
- Callbacks via decorators or subclassing for straightforward customisation of agentic loops and context management.
|
51
50
|
|
52
51
|
## Project Structure
|
53
52
|
|
54
|
-
- `
|
55
|
-
- `packet.py`, `packet_pool.py`: Communication management.
|
53
|
+
- `processors/`, `llm_agent.py`: Core processor and agent class implementations.
|
54
|
+
- `packet.py`, `packet_pool.py`, `runner.py`: Communication management.
|
56
55
|
- `llm_policy_executor.py`: LLM actions and tool call loops.
|
57
56
|
- `prompt_builder.py`: Tools for constructing prompts.
|
58
57
|
- `workflow/`: Modules for defining and managing static agent workflows.
|
59
58
|
- `llm.py`, `cloud_llm.py`: LLM integration and base LLM functionalities.
|
60
59
|
- `openai/`: Modules specific to OpenAI API integration.
|
61
|
-
- `
|
60
|
+
- `litellm/`: Modules specific to LiteLLM integration.
|
61
|
+
- `memory.py`, `llm_agent_memory.py`: Basic agent memory management.
|
62
62
|
- `run_context.py`: Shared context management for agent runs.
|
63
63
|
- `usage_tracker.py`: Tracking of API usage and costs.
|
64
|
-
- `costs_dict.yaml`: Dictionary for cost tracking (update if needed).
|
65
64
|
- `rate_limiting/`: Basic rate limiting tools.
|
66
65
|
|
67
66
|
## Quickstart & Installation Variants (UV Package manager)
|
@@ -190,7 +189,7 @@ teacher = LLMAgent[None, Problem, None](
|
|
190
189
|
)
|
191
190
|
|
192
191
|
async def main():
|
193
|
-
ctx = RunContext[None](
|
192
|
+
ctx = RunContext[None](log_messages=True)
|
194
193
|
out = await teacher.run("start", ctx=ctx)
|
195
194
|
print(out.payloads[0])
|
196
195
|
print(ctx.usage_tracker.total_usage)
|
@@ -20,31 +20,30 @@
|
|
20
20
|
|
21
21
|
## Features
|
22
22
|
|
23
|
-
- Clean formulation of agents as generic entities over
|
24
|
-
- I/O schemas
|
25
|
-
- Memory
|
26
|
-
- Shared context
|
23
|
+
- Clean formulation of agents as generic entities over I/O schemas and shared context.
|
27
24
|
- Transparent implementation of common agentic patterns:
|
28
|
-
- Single-agent loops
|
25
|
+
- Single-agent loops
|
29
26
|
- Workflows (static communication topology), including loops
|
30
27
|
- Agents-as-tools for task delegation
|
31
28
|
- Freeform A2A communication via the in-process actor model
|
32
|
-
-
|
33
|
-
-
|
29
|
+
- Built-in parallel processing with flexible retries and rate limiting.
|
30
|
+
- Support for all popular API providers via LiteLLM.
|
31
|
+
- Granular event streaming with separate events for standard outputs, thinking, and tool calls.
|
32
|
+
- Callbacks via decorators or subclassing for straightforward customisation of agentic loops and context management.
|
34
33
|
|
35
34
|
## Project Structure
|
36
35
|
|
37
|
-
- `
|
38
|
-
- `packet.py`, `packet_pool.py`: Communication management.
|
36
|
+
- `processors/`, `llm_agent.py`: Core processor and agent class implementations.
|
37
|
+
- `packet.py`, `packet_pool.py`, `runner.py`: Communication management.
|
39
38
|
- `llm_policy_executor.py`: LLM actions and tool call loops.
|
40
39
|
- `prompt_builder.py`: Tools for constructing prompts.
|
41
40
|
- `workflow/`: Modules for defining and managing static agent workflows.
|
42
41
|
- `llm.py`, `cloud_llm.py`: LLM integration and base LLM functionalities.
|
43
42
|
- `openai/`: Modules specific to OpenAI API integration.
|
44
|
-
- `
|
43
|
+
- `litellm/`: Modules specific to LiteLLM integration.
|
44
|
+
- `memory.py`, `llm_agent_memory.py`: Basic agent memory management.
|
45
45
|
- `run_context.py`: Shared context management for agent runs.
|
46
46
|
- `usage_tracker.py`: Tracking of API usage and costs.
|
47
|
-
- `costs_dict.yaml`: Dictionary for cost tracking (update if needed).
|
48
47
|
- `rate_limiting/`: Basic rate limiting tools.
|
49
48
|
|
50
49
|
## Quickstart & Installation Variants (UV Package manager)
|
@@ -173,7 +172,7 @@ teacher = LLMAgent[None, Problem, None](
|
|
173
172
|
)
|
174
173
|
|
175
174
|
async def main():
|
176
|
-
ctx = RunContext[None](
|
175
|
+
ctx = RunContext[None](log_messages=True)
|
177
176
|
out = await teacher.run("start", ctx=ctx)
|
178
177
|
print(out.payloads[0])
|
179
178
|
print(ctx.usage_tracker.total_usage)
|
@@ -13,7 +13,7 @@ from .http_client import AsyncHTTPClientParams, create_simple_async_httpx_client
|
|
13
13
|
from .llm import LLM, ConvertT_co, LLMSettings, SettingsT_co
|
14
14
|
from .rate_limiting.rate_limiter_chunked import RateLimiterC, limit_rate
|
15
15
|
from .typing.completion import Completion
|
16
|
-
from .typing.completion_chunk import CompletionChoice
|
16
|
+
from .typing.completion_chunk import CompletionChoice, CompletionChunk
|
17
17
|
from .typing.events import (
|
18
18
|
CompletionChunkEvent,
|
19
19
|
CompletionEvent,
|
@@ -52,7 +52,9 @@ class CloudLLMSettings(LLMSettings, total=False):
|
|
52
52
|
LLMRateLimiter = RateLimiterC[
|
53
53
|
Messages,
|
54
54
|
AssistantMessage
|
55
|
-
| AsyncIterator[
|
55
|
+
| AsyncIterator[
|
56
|
+
CompletionChunkEvent[CompletionChunk] | CompletionEvent | LLMStreamingErrorEvent
|
57
|
+
],
|
56
58
|
]
|
57
59
|
|
58
60
|
|
@@ -274,7 +276,7 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
274
276
|
n_choices: int | None = None,
|
275
277
|
proc_name: str | None = None,
|
276
278
|
call_id: str | None = None,
|
277
|
-
) -> AsyncIterator[CompletionChunkEvent | CompletionEvent]:
|
279
|
+
) -> AsyncIterator[CompletionChunkEvent[CompletionChunk] | CompletionEvent]:
|
278
280
|
completion_kwargs = self._make_completion_kwargs(
|
279
281
|
conversation=conversation, tool_choice=tool_choice, n_choices=n_choices
|
280
282
|
)
|
@@ -284,7 +286,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
284
286
|
api_stream = self._get_completion_stream(**completion_kwargs)
|
285
287
|
api_stream = cast("AsyncIterator[Any]", api_stream)
|
286
288
|
|
287
|
-
async def iterator() -> AsyncIterator[
|
289
|
+
async def iterator() -> AsyncIterator[
|
290
|
+
CompletionChunkEvent[CompletionChunk] | CompletionEvent
|
291
|
+
]:
|
288
292
|
api_completion_chunks: list[Any] = []
|
289
293
|
|
290
294
|
async for api_completion_chunk in api_stream:
|
@@ -318,7 +322,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
|
|
318
322
|
n_choices: int | None = None,
|
319
323
|
proc_name: str | None = None,
|
320
324
|
call_id: str | None = None,
|
321
|
-
) -> AsyncIterator[
|
325
|
+
) -> AsyncIterator[
|
326
|
+
CompletionChunkEvent[CompletionChunk] | CompletionEvent | LLMStreamingErrorEvent
|
327
|
+
]:
|
322
328
|
n_attempt = 0
|
323
329
|
while n_attempt <= self.max_response_retries:
|
324
330
|
try:
|
@@ -1,5 +1,7 @@
|
|
1
1
|
import logging
|
2
|
+
from collections import defaultdict
|
2
3
|
from collections.abc import AsyncIterator, Mapping
|
4
|
+
from copy import deepcopy
|
3
5
|
from typing import Any, cast
|
4
6
|
|
5
7
|
import litellm
|
@@ -90,10 +92,19 @@ class LiteLLM(CloudLLM[LiteLLMSettings, LiteLLMConverters]):
|
|
90
92
|
"was specified. Please provide a valid API provider or use a different "
|
91
93
|
"model."
|
92
94
|
)
|
95
|
+
|
96
|
+
if llm_settings is not None:
|
97
|
+
stream_options = llm_settings.get("stream_options") or {}
|
98
|
+
stream_options["include_usage"] = True
|
99
|
+
_llm_settings = deepcopy(llm_settings)
|
100
|
+
_llm_settings["stream_options"] = stream_options
|
101
|
+
else:
|
102
|
+
_llm_settings = LiteLLMSettings(stream_options={"include_usage": True})
|
103
|
+
|
93
104
|
super().__init__(
|
94
105
|
model_name=model_name,
|
95
106
|
model_id=model_id,
|
96
|
-
llm_settings=
|
107
|
+
llm_settings=_llm_settings,
|
97
108
|
converters=LiteLLMConverters(),
|
98
109
|
tools=tools,
|
99
110
|
response_schema=response_schema,
|
@@ -192,7 +203,17 @@ class LiteLLM(CloudLLM[LiteLLMSettings, LiteLLMConverters]):
|
|
192
203
|
)
|
193
204
|
stream = cast("CustomStreamWrapper", stream)
|
194
205
|
|
206
|
+
tc_indices: dict[int, set[int]] = defaultdict(set)
|
207
|
+
|
195
208
|
async for completion_chunk in stream:
|
209
|
+
# Fix tool call indices to be unique within each choice
|
210
|
+
for n, choice in enumerate(completion_chunk.choices):
|
211
|
+
for tc in choice.delta.tool_calls or []:
|
212
|
+
# Tool call ID is not None only when it is a new tool call
|
213
|
+
if tc.id and tc.index in tc_indices[n]:
|
214
|
+
tc.index = max(tc_indices[n]) + 1
|
215
|
+
tc_indices[n].add(tc.index)
|
216
|
+
|
196
217
|
yield completion_chunk
|
197
218
|
|
198
219
|
def combine_completion_chunks(
|
@@ -7,6 +7,7 @@ from uuid import uuid4
|
|
7
7
|
from pydantic import BaseModel
|
8
8
|
from typing_extensions import TypedDict
|
9
9
|
|
10
|
+
from grasp_agents.typing.completion_chunk import CompletionChunk
|
10
11
|
from grasp_agents.utils import (
|
11
12
|
validate_obj_from_json_or_py_string,
|
12
13
|
validate_tagged_objs_from_json_or_py_string,
|
@@ -20,9 +21,25 @@ from .errors import (
|
|
20
21
|
from .typing.completion import Completion
|
21
22
|
from .typing.converters import Converters
|
22
23
|
from .typing.events import (
|
24
|
+
AnnotationsChunkEvent,
|
25
|
+
AnnotationsEndEvent,
|
26
|
+
AnnotationsStartEvent,
|
23
27
|
CompletionChunkEvent,
|
28
|
+
# CompletionEndEvent,
|
24
29
|
CompletionEvent,
|
30
|
+
CompletionStartEvent,
|
31
|
+
LLMStateChangeEvent,
|
25
32
|
LLMStreamingErrorEvent,
|
33
|
+
# RefusalChunkEvent,
|
34
|
+
ResponseChunkEvent,
|
35
|
+
ResponseEndEvent,
|
36
|
+
ResponseStartEvent,
|
37
|
+
ThinkingChunkEvent,
|
38
|
+
ThinkingEndEvent,
|
39
|
+
ThinkingStartEvent,
|
40
|
+
ToolCallChunkEvent,
|
41
|
+
ToolCallEndEvent,
|
42
|
+
ToolCallStartEvent,
|
26
43
|
)
|
27
44
|
from .typing.message import Messages
|
28
45
|
from .typing.tool import BaseTool, ToolChoice
|
@@ -30,6 +47,14 @@ from .typing.tool import BaseTool, ToolChoice
|
|
30
47
|
logger = logging.getLogger(__name__)
|
31
48
|
|
32
49
|
|
50
|
+
LLMStreamGenerator = AsyncIterator[
|
51
|
+
CompletionChunkEvent[CompletionChunk]
|
52
|
+
| CompletionEvent
|
53
|
+
| LLMStateChangeEvent[Any]
|
54
|
+
| LLMStreamingErrorEvent
|
55
|
+
]
|
56
|
+
|
57
|
+
|
33
58
|
class LLMSettings(TypedDict, total=False):
|
34
59
|
max_completion_tokens: int | None
|
35
60
|
temperature: float | None
|
@@ -160,6 +185,112 @@ class LLM(ABC, Generic[SettingsT_co, ConvertT_co]):
|
|
160
185
|
tool_name, tool_arguments
|
161
186
|
) from exc
|
162
187
|
|
188
|
+
@staticmethod
|
189
|
+
async def postprocess_event_stream(
|
190
|
+
stream: LLMStreamGenerator,
|
191
|
+
) -> LLMStreamGenerator:
|
192
|
+
prev_completion_id: str | None = None
|
193
|
+
chunk_op_evt: CompletionChunkEvent[CompletionChunk] | None = None
|
194
|
+
response_op_evt: ResponseChunkEvent | None = None
|
195
|
+
thinking_op_evt: ThinkingChunkEvent | None = None
|
196
|
+
annotations_op_evt: AnnotationsChunkEvent | None = None
|
197
|
+
tool_calls_op_evt: ToolCallChunkEvent | None = None
|
198
|
+
|
199
|
+
def _close_open_events(
|
200
|
+
_event: CompletionChunkEvent[CompletionChunk] | None = None,
|
201
|
+
) -> list[LLMStateChangeEvent[Any]]:
|
202
|
+
nonlocal \
|
203
|
+
chunk_op_evt, \
|
204
|
+
thinking_op_evt, \
|
205
|
+
tool_calls_op_evt, \
|
206
|
+
response_op_evt, \
|
207
|
+
annotations_op_evt
|
208
|
+
|
209
|
+
events: list[LLMStateChangeEvent[Any]] = []
|
210
|
+
|
211
|
+
if not isinstance(_event, ThinkingChunkEvent) and thinking_op_evt:
|
212
|
+
events.append(ThinkingEndEvent.from_chunk_event(thinking_op_evt))
|
213
|
+
thinking_op_evt = None
|
214
|
+
|
215
|
+
if not isinstance(_event, ToolCallChunkEvent) and tool_calls_op_evt:
|
216
|
+
events.append(ToolCallEndEvent.from_chunk_event(tool_calls_op_evt))
|
217
|
+
tool_calls_op_evt = None
|
218
|
+
|
219
|
+
if not isinstance(_event, ResponseChunkEvent) and response_op_evt:
|
220
|
+
events.append(ResponseEndEvent.from_chunk_event(response_op_evt))
|
221
|
+
response_op_evt = None
|
222
|
+
|
223
|
+
if not isinstance(_event, AnnotationsChunkEvent) and annotations_op_evt:
|
224
|
+
events.append(AnnotationsEndEvent.from_chunk_event(annotations_op_evt))
|
225
|
+
annotations_op_evt = None
|
226
|
+
|
227
|
+
return events
|
228
|
+
|
229
|
+
async for event in stream:
|
230
|
+
if isinstance(event, CompletionChunkEvent) and not isinstance(
|
231
|
+
event, LLMStateChangeEvent
|
232
|
+
):
|
233
|
+
chunk = event.data
|
234
|
+
if len(chunk.choices) != 1:
|
235
|
+
raise ValueError(
|
236
|
+
"Expected exactly one choice in completion chunk, "
|
237
|
+
f"got {len(chunk.choices)}"
|
238
|
+
)
|
239
|
+
|
240
|
+
new_completion = chunk.id != prev_completion_id
|
241
|
+
|
242
|
+
if new_completion:
|
243
|
+
for close_event in _close_open_events():
|
244
|
+
yield close_event
|
245
|
+
|
246
|
+
chunk_op_evt = event
|
247
|
+
yield CompletionStartEvent.from_chunk_event(event)
|
248
|
+
|
249
|
+
sub_events = event.split_into_specialized()
|
250
|
+
|
251
|
+
for sub_event in sub_events:
|
252
|
+
for close_event in _close_open_events(sub_event):
|
253
|
+
yield close_event
|
254
|
+
|
255
|
+
if isinstance(sub_event, ThinkingChunkEvent):
|
256
|
+
if not thinking_op_evt:
|
257
|
+
thinking_op_evt = sub_event
|
258
|
+
yield ThinkingStartEvent.from_chunk_event(sub_event)
|
259
|
+
yield sub_event
|
260
|
+
|
261
|
+
if isinstance(sub_event, ToolCallChunkEvent):
|
262
|
+
tc = sub_event.data.tool_call
|
263
|
+
if tc.id:
|
264
|
+
# Tool call ID is not None only for the first chunk of a tool call
|
265
|
+
if tool_calls_op_evt:
|
266
|
+
yield ToolCallEndEvent.from_chunk_event(
|
267
|
+
tool_calls_op_evt
|
268
|
+
)
|
269
|
+
tool_calls_op_evt = None
|
270
|
+
tool_calls_op_evt = sub_event
|
271
|
+
yield ToolCallStartEvent.from_chunk_event(sub_event)
|
272
|
+
yield sub_event
|
273
|
+
|
274
|
+
if isinstance(sub_event, ResponseChunkEvent):
|
275
|
+
if not response_op_evt:
|
276
|
+
response_op_evt = sub_event
|
277
|
+
yield ResponseStartEvent.from_chunk_event(sub_event)
|
278
|
+
yield sub_event
|
279
|
+
|
280
|
+
if isinstance(sub_event, AnnotationsChunkEvent):
|
281
|
+
if not annotations_op_evt:
|
282
|
+
annotations_op_evt = sub_event
|
283
|
+
yield AnnotationsStartEvent.from_chunk_event(sub_event)
|
284
|
+
yield sub_event
|
285
|
+
|
286
|
+
prev_completion_id = chunk.id
|
287
|
+
|
288
|
+
else:
|
289
|
+
for close_event in _close_open_events():
|
290
|
+
yield close_event
|
291
|
+
|
292
|
+
yield event
|
293
|
+
|
163
294
|
@abstractmethod
|
164
295
|
async def generate_completion(
|
165
296
|
self,
|
@@ -181,7 +312,9 @@ class LLM(ABC, Generic[SettingsT_co, ConvertT_co]):
|
|
181
312
|
n_choices: int | None = None,
|
182
313
|
proc_name: str | None = None,
|
183
314
|
call_id: str | None = None,
|
184
|
-
) -> AsyncIterator[
|
315
|
+
) -> AsyncIterator[
|
316
|
+
CompletionChunkEvent[CompletionChunk] | CompletionEvent | LLMStreamingErrorEvent
|
317
|
+
]:
|
185
318
|
pass
|
186
319
|
|
187
320
|
@abstractmethod
|
@@ -7,6 +7,8 @@ from typing import Any, Generic, Protocol, final
|
|
7
7
|
|
8
8
|
from pydantic import BaseModel
|
9
9
|
|
10
|
+
from grasp_agents.typing.completion_chunk import CompletionChunk
|
11
|
+
|
10
12
|
from .errors import AgentFinalAnswerError
|
11
13
|
from .llm import LLM, LLMSettings
|
12
14
|
from .llm_agent_memory import LLMAgentMemory
|
@@ -149,19 +151,23 @@ class LLMPolicyExecutor(Generic[CtxT]):
|
|
149
151
|
tool_choice: ToolChoice | None = None,
|
150
152
|
ctx: RunContext[CtxT] | None = None,
|
151
153
|
) -> AsyncIterator[
|
152
|
-
CompletionChunkEvent
|
154
|
+
CompletionChunkEvent[CompletionChunk]
|
153
155
|
| CompletionEvent
|
154
156
|
| GenMessageEvent
|
155
157
|
| LLMStreamingErrorEvent
|
156
158
|
]:
|
157
159
|
completion: Completion | None = None
|
158
|
-
|
160
|
+
|
161
|
+
llm_event_stream = self.llm.generate_completion_stream(
|
159
162
|
memory.message_history,
|
160
163
|
tool_choice=tool_choice,
|
161
164
|
n_choices=1,
|
162
165
|
proc_name=self.agent_name,
|
163
166
|
call_id=call_id,
|
164
|
-
)
|
167
|
+
)
|
168
|
+
llm_event_stream_post = self.llm.postprocess_event_stream(llm_event_stream) # type: ignore[assignment]
|
169
|
+
|
170
|
+
async for event in llm_event_stream_post:
|
165
171
|
if isinstance(event, CompletionEvent):
|
166
172
|
completion = event.data
|
167
173
|
yield event
|
@@ -127,8 +127,8 @@ class OpenAILLM(CloudLLM[OpenAILLMSettings, OpenAIConverters]):
|
|
127
127
|
provider_name, provider_model_name = model_name_parts
|
128
128
|
if provider_name not in compat_providers_map:
|
129
129
|
raise ValueError(
|
130
|
-
f"
|
131
|
-
"
|
130
|
+
f"API provider '{provider_name}' is not a supported OpenAI "
|
131
|
+
f"compatible provider. Supported providers are: "
|
132
132
|
f"{', '.join(compat_providers_map.keys())}"
|
133
133
|
)
|
134
134
|
api_provider = compat_providers_map[provider_name]
|
@@ -138,10 +138,18 @@ class OpenAILLM(CloudLLM[OpenAILLMSettings, OpenAIConverters]):
|
|
138
138
|
"you must provide an 'api_provider' argument."
|
139
139
|
)
|
140
140
|
|
141
|
+
if llm_settings is not None:
|
142
|
+
stream_options = llm_settings.get("stream_options") or {}
|
143
|
+
stream_options["include_usage"] = True
|
144
|
+
_llm_settings = deepcopy(llm_settings)
|
145
|
+
_llm_settings["stream_options"] = stream_options
|
146
|
+
else:
|
147
|
+
_llm_settings = OpenAILLMSettings(stream_options={"include_usage": True})
|
148
|
+
|
141
149
|
super().__init__(
|
142
150
|
model_name=provider_model_name,
|
143
151
|
model_id=model_id,
|
144
|
-
llm_settings=
|
152
|
+
llm_settings=_llm_settings,
|
145
153
|
converters=OpenAIConverters(),
|
146
154
|
tools=tools,
|
147
155
|
response_schema=response_schema,
|
@@ -2,10 +2,9 @@ import asyncio
|
|
2
2
|
import logging
|
3
3
|
from collections.abc import AsyncIterator
|
4
4
|
from types import TracebackType
|
5
|
-
from typing import Any,
|
5
|
+
from typing import Any, Literal, Protocol, TypeVar
|
6
6
|
|
7
7
|
from .packet import Packet
|
8
|
-
from .run_context import CtxT, RunContext
|
9
8
|
from .typing.events import Event
|
10
9
|
from .typing.io import ProcName
|
11
10
|
|
@@ -18,24 +17,21 @@ END_PROC_NAME: Literal["*END*"] = "*END*"
|
|
18
17
|
_PayloadT_contra = TypeVar("_PayloadT_contra", contravariant=True)
|
19
18
|
|
20
19
|
|
21
|
-
class PacketHandler(Protocol[_PayloadT_contra
|
20
|
+
class PacketHandler(Protocol[_PayloadT_contra]):
|
22
21
|
async def __call__(
|
23
|
-
self,
|
24
|
-
packet: Packet[_PayloadT_contra],
|
25
|
-
ctx: RunContext[CtxT],
|
26
|
-
**kwargs: Any,
|
22
|
+
self, packet: Packet[_PayloadT_contra], **kwargs: Any
|
27
23
|
) -> None: ...
|
28
24
|
|
29
25
|
|
30
|
-
class PacketPool
|
26
|
+
class PacketPool:
|
31
27
|
def __init__(self) -> None:
|
32
28
|
self._packet_queues: dict[ProcName, asyncio.Queue[Packet[Any] | None]] = {}
|
33
|
-
self._packet_handlers: dict[ProcName, PacketHandler[Any
|
29
|
+
self._packet_handlers: dict[ProcName, PacketHandler[Any]] = {}
|
34
30
|
self._task_group: asyncio.TaskGroup | None = None
|
35
31
|
|
36
32
|
self._event_queue: asyncio.Queue[Event[Any] | None] = asyncio.Queue()
|
37
33
|
|
38
|
-
self._final_result_fut: asyncio.Future[Packet[Any]]
|
34
|
+
self._final_result_fut: asyncio.Future[Packet[Any]]
|
39
35
|
|
40
36
|
self._stopping = False
|
41
37
|
self._stopped_evt = asyncio.Event()
|
@@ -44,9 +40,8 @@ class PacketPool(Generic[CtxT]):
|
|
44
40
|
|
45
41
|
async def post(self, packet: Packet[Any]) -> None:
|
46
42
|
if packet.recipients == [END_PROC_NAME]:
|
47
|
-
|
48
|
-
|
49
|
-
fut.set_result(packet)
|
43
|
+
if not self._final_result_fut.done():
|
44
|
+
self._final_result_fut.set_result(packet)
|
50
45
|
await self.shutdown()
|
51
46
|
return
|
52
47
|
|
@@ -54,31 +49,14 @@ class PacketPool(Generic[CtxT]):
|
|
54
49
|
queue = self._packet_queues.setdefault(recipient_id, asyncio.Queue())
|
55
50
|
await queue.put(packet)
|
56
51
|
|
57
|
-
def _ensure_final_future(self) -> asyncio.Future[Packet[Any]]:
|
58
|
-
fut = self._final_result_fut
|
59
|
-
if fut is None:
|
60
|
-
fut = asyncio.get_running_loop().create_future()
|
61
|
-
self._final_result_fut = fut
|
62
|
-
return fut
|
63
|
-
|
64
52
|
async def final_result(self) -> Packet[Any]:
|
65
|
-
fut = self._ensure_final_future()
|
66
53
|
try:
|
67
|
-
return await
|
54
|
+
return await self._final_result_fut
|
68
55
|
finally:
|
69
56
|
await self.shutdown()
|
70
57
|
|
71
|
-
@property
|
72
|
-
def final_result_ready(self) -> bool:
|
73
|
-
fut = self._final_result_fut
|
74
|
-
return fut is not None and fut.done()
|
75
|
-
|
76
58
|
def register_packet_handler(
|
77
|
-
self,
|
78
|
-
proc_name: ProcName,
|
79
|
-
handler: PacketHandler[Any, CtxT],
|
80
|
-
ctx: RunContext[CtxT],
|
81
|
-
**run_kwargs: Any,
|
59
|
+
self, proc_name: ProcName, handler: PacketHandler[Any]
|
82
60
|
) -> None:
|
83
61
|
if self._stopping:
|
84
62
|
raise RuntimeError("PacketPool is stopping/stopped")
|
@@ -88,17 +66,19 @@ class PacketPool(Generic[CtxT]):
|
|
88
66
|
|
89
67
|
if self._task_group is not None:
|
90
68
|
self._task_group.create_task(
|
91
|
-
self._handle_packets(proc_name
|
69
|
+
self._handle_packets(proc_name),
|
92
70
|
name=f"packet-handler:{proc_name}",
|
93
71
|
)
|
94
72
|
|
95
73
|
async def push_event(self, event: Event[Any]) -> None:
|
96
74
|
await self._event_queue.put(event)
|
97
75
|
|
98
|
-
async def __aenter__(self) -> "PacketPool
|
76
|
+
async def __aenter__(self) -> "PacketPool":
|
99
77
|
self._task_group = asyncio.TaskGroup()
|
100
78
|
await self._task_group.__aenter__()
|
101
79
|
|
80
|
+
self._final_result_fut = asyncio.get_running_loop().create_future()
|
81
|
+
|
102
82
|
return self
|
103
83
|
|
104
84
|
async def __aexit__(
|
@@ -120,26 +100,27 @@ class PacketPool(Generic[CtxT]):
|
|
120
100
|
|
121
101
|
return False
|
122
102
|
|
123
|
-
async def _handle_packets(
|
124
|
-
self, proc_name: ProcName, ctx: RunContext[CtxT], **run_kwargs: Any
|
125
|
-
) -> None:
|
103
|
+
async def _handle_packets(self, proc_name: ProcName) -> None:
|
126
104
|
queue = self._packet_queues[proc_name]
|
127
105
|
handler = self._packet_handlers[proc_name]
|
128
106
|
|
129
|
-
while
|
107
|
+
while True:
|
130
108
|
packet = await queue.get()
|
131
109
|
if packet is None:
|
132
110
|
break
|
111
|
+
|
112
|
+
if self._final_result_fut.done():
|
113
|
+
continue
|
114
|
+
|
133
115
|
try:
|
134
|
-
await handler(packet
|
116
|
+
await handler(packet)
|
135
117
|
except asyncio.CancelledError:
|
136
118
|
raise
|
137
119
|
except Exception as err:
|
138
120
|
logger.exception("Error handling packet for %s", proc_name)
|
139
121
|
self._errors.append(err)
|
140
|
-
|
141
|
-
|
142
|
-
fut.set_exception(err)
|
122
|
+
if not self._final_result_fut.done():
|
123
|
+
self._final_result_fut.set_exception(err)
|
143
124
|
await self.shutdown()
|
144
125
|
raise
|
145
126
|
|
@@ -159,6 +140,5 @@ class PacketPool(Generic[CtxT]):
|
|
159
140
|
await self._event_queue.put(None)
|
160
141
|
for queue in self._packet_queues.values():
|
161
142
|
await queue.put(None)
|
162
|
-
|
163
143
|
finally:
|
164
144
|
self._stopped_evt.set()
|