grasp_agents 0.5.6__tar.gz → 0.5.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/PKG-INFO +12 -13
  2. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/README.md +11 -12
  3. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/pyproject.toml +1 -1
  4. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/cloud_llm.py +11 -5
  5. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/lite_llm.py +22 -1
  6. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm.py +134 -1
  7. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm_policy_executor.py +9 -3
  8. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/openai_llm.py +11 -3
  9. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/packet_pool.py +23 -43
  10. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/printer.py +75 -77
  11. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/processors/parallel_processor.py +15 -13
  12. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/runner.py +27 -24
  13. grasp_agents-0.5.9/src/grasp_agents/typing/completion_chunk.py +506 -0
  14. grasp_agents-0.5.9/src/grasp_agents/typing/events.py +376 -0
  15. grasp_agents-0.5.6/src/grasp_agents/typing/completion_chunk.py +0 -207
  16. grasp_agents-0.5.6/src/grasp_agents/typing/events.py +0 -170
  17. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/.gitignore +0 -0
  18. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/LICENSE.md +0 -0
  19. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/__init__.py +0 -0
  20. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/costs_dict.yaml +0 -0
  21. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/errors.py +0 -0
  22. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/generics_utils.py +0 -0
  23. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/grasp_logging.py +0 -0
  24. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/http_client.py +0 -0
  25. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/__init__.py +0 -0
  26. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/completion_chunk_converters.py +0 -0
  27. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/completion_converters.py +0 -0
  28. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/converters.py +0 -0
  29. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/litellm/message_converters.py +0 -0
  30. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm_agent.py +0 -0
  31. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/llm_agent_memory.py +0 -0
  32. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/memory.py +0 -0
  33. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/__init__.py +0 -0
  34. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/completion_chunk_converters.py +0 -0
  35. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/completion_converters.py +0 -0
  36. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/content_converters.py +0 -0
  37. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/converters.py +0 -0
  38. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/message_converters.py +0 -0
  39. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/openai/tool_converters.py +0 -0
  40. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/packet.py +0 -0
  41. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/processors/base_processor.py +0 -0
  42. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/processors/processor.py +0 -0
  43. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/prompt_builder.py +0 -0
  44. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/__init__.py +0 -0
  45. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/rate_limiter_chunked.py +0 -0
  46. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/types.py +0 -0
  47. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/rate_limiting/utils.py +0 -0
  48. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/run_context.py +0 -0
  49. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/__init__.py +0 -0
  50. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/completion.py +0 -0
  51. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/content.py +0 -0
  52. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/converters.py +0 -0
  53. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/io.py +0 -0
  54. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/message.py +0 -0
  55. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/typing/tool.py +0 -0
  56. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/usage_tracker.py +0 -0
  57. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/utils.py +0 -0
  58. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/__init__.py +0 -0
  59. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/looped_workflow.py +0 -0
  60. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/sequential_workflow.py +0 -0
  61. {grasp_agents-0.5.6 → grasp_agents-0.5.9}/src/grasp_agents/workflow/workflow_processor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: grasp_agents
3
- Version: 0.5.6
3
+ Version: 0.5.9
4
4
  Summary: Grasp Agents Library
5
5
  License-File: LICENSE.md
6
6
  Requires-Python: <4,>=3.11.4
@@ -37,31 +37,30 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  ## Features
39
39
 
40
- - Clean formulation of agents as generic entities over:
41
- - I/O schemas
42
- - Memory
43
- - Shared context
40
+ - Clean formulation of agents as generic entities over I/O schemas and shared context.
44
41
  - Transparent implementation of common agentic patterns:
45
- - Single-agent loops with an optional "ReAct mode" to enforce reasoning between the tool calls
42
+ - Single-agent loops
46
43
  - Workflows (static communication topology), including loops
47
44
  - Agents-as-tools for task delegation
48
45
  - Freeform A2A communication via the in-process actor model
49
- - Parallel processing with flexible retries and rate limiting
50
- - Simple logging and usage/cost tracking
46
+ - Built-in parallel processing with flexible retries and rate limiting.
47
+ - Support for all popular API providers via LiteLLM.
48
+ - Granular event streaming with separate events for standard outputs, thinking, and tool calls.
49
+ - Callbacks via decorators or subclassing for straightforward customisation of agentic loops and context management.
51
50
 
52
51
  ## Project Structure
53
52
 
54
- - `processor.py`, `comm_processor.py`, `llm_agent.py`: Core processor and agent class implementations.
55
- - `packet.py`, `packet_pool.py`: Communication management.
53
+ - `processors/`, `llm_agent.py`: Core processor and agent class implementations.
54
+ - `packet.py`, `packet_pool.py`, `runner.py`: Communication management.
56
55
  - `llm_policy_executor.py`: LLM actions and tool call loops.
57
56
  - `prompt_builder.py`: Tools for constructing prompts.
58
57
  - `workflow/`: Modules for defining and managing static agent workflows.
59
58
  - `llm.py`, `cloud_llm.py`: LLM integration and base LLM functionalities.
60
59
  - `openai/`: Modules specific to OpenAI API integration.
61
- - `memory.py`, `llm_agent_memory.py`: Memory management.
60
+ - `litellm/`: Modules specific to LiteLLM integration.
61
+ - `memory.py`, `llm_agent_memory.py`: Basic agent memory management.
62
62
  - `run_context.py`: Shared context management for agent runs.
63
63
  - `usage_tracker.py`: Tracking of API usage and costs.
64
- - `costs_dict.yaml`: Dictionary for cost tracking (update if needed).
65
64
  - `rate_limiting/`: Basic rate limiting tools.
66
65
 
67
66
  ## Quickstart & Installation Variants (UV Package manager)
@@ -190,7 +189,7 @@ teacher = LLMAgent[None, Problem, None](
190
189
  )
191
190
 
192
191
  async def main():
193
- ctx = RunContext[None](print_messages=True)
192
+ ctx = RunContext[None](log_messages=True)
194
193
  out = await teacher.run("start", ctx=ctx)
195
194
  print(out.payloads[0])
196
195
  print(ctx.usage_tracker.total_usage)
@@ -20,31 +20,30 @@
20
20
 
21
21
  ## Features
22
22
 
23
- - Clean formulation of agents as generic entities over:
24
- - I/O schemas
25
- - Memory
26
- - Shared context
23
+ - Clean formulation of agents as generic entities over I/O schemas and shared context.
27
24
  - Transparent implementation of common agentic patterns:
28
- - Single-agent loops with an optional "ReAct mode" to enforce reasoning between the tool calls
25
+ - Single-agent loops
29
26
  - Workflows (static communication topology), including loops
30
27
  - Agents-as-tools for task delegation
31
28
  - Freeform A2A communication via the in-process actor model
32
- - Parallel processing with flexible retries and rate limiting
33
- - Simple logging and usage/cost tracking
29
+ - Built-in parallel processing with flexible retries and rate limiting.
30
+ - Support for all popular API providers via LiteLLM.
31
+ - Granular event streaming with separate events for standard outputs, thinking, and tool calls.
32
+ - Callbacks via decorators or subclassing for straightforward customisation of agentic loops and context management.
34
33
 
35
34
  ## Project Structure
36
35
 
37
- - `processor.py`, `comm_processor.py`, `llm_agent.py`: Core processor and agent class implementations.
38
- - `packet.py`, `packet_pool.py`: Communication management.
36
+ - `processors/`, `llm_agent.py`: Core processor and agent class implementations.
37
+ - `packet.py`, `packet_pool.py`, `runner.py`: Communication management.
39
38
  - `llm_policy_executor.py`: LLM actions and tool call loops.
40
39
  - `prompt_builder.py`: Tools for constructing prompts.
41
40
  - `workflow/`: Modules for defining and managing static agent workflows.
42
41
  - `llm.py`, `cloud_llm.py`: LLM integration and base LLM functionalities.
43
42
  - `openai/`: Modules specific to OpenAI API integration.
44
- - `memory.py`, `llm_agent_memory.py`: Memory management.
43
+ - `litellm/`: Modules specific to LiteLLM integration.
44
+ - `memory.py`, `llm_agent_memory.py`: Basic agent memory management.
45
45
  - `run_context.py`: Shared context management for agent runs.
46
46
  - `usage_tracker.py`: Tracking of API usage and costs.
47
- - `costs_dict.yaml`: Dictionary for cost tracking (update if needed).
48
47
  - `rate_limiting/`: Basic rate limiting tools.
49
48
 
50
49
  ## Quickstart & Installation Variants (UV Package manager)
@@ -173,7 +172,7 @@ teacher = LLMAgent[None, Problem, None](
173
172
  )
174
173
 
175
174
  async def main():
176
- ctx = RunContext[None](print_messages=True)
175
+ ctx = RunContext[None](log_messages=True)
177
176
  out = await teacher.run("start", ctx=ctx)
178
177
  print(out.payloads[0])
179
178
  print(ctx.usage_tracker.total_usage)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "grasp_agents"
3
- version = "0.5.6"
3
+ version = "0.5.9"
4
4
  description = "Grasp Agents Library"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11.4,<4"
@@ -13,7 +13,7 @@ from .http_client import AsyncHTTPClientParams, create_simple_async_httpx_client
13
13
  from .llm import LLM, ConvertT_co, LLMSettings, SettingsT_co
14
14
  from .rate_limiting.rate_limiter_chunked import RateLimiterC, limit_rate
15
15
  from .typing.completion import Completion
16
- from .typing.completion_chunk import CompletionChoice
16
+ from .typing.completion_chunk import CompletionChoice, CompletionChunk
17
17
  from .typing.events import (
18
18
  CompletionChunkEvent,
19
19
  CompletionEvent,
@@ -52,7 +52,9 @@ class CloudLLMSettings(LLMSettings, total=False):
52
52
  LLMRateLimiter = RateLimiterC[
53
53
  Messages,
54
54
  AssistantMessage
55
- | AsyncIterator[CompletionChunkEvent | CompletionEvent | LLMStreamingErrorEvent],
55
+ | AsyncIterator[
56
+ CompletionChunkEvent[CompletionChunk] | CompletionEvent | LLMStreamingErrorEvent
57
+ ],
56
58
  ]
57
59
 
58
60
 
@@ -274,7 +276,7 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
274
276
  n_choices: int | None = None,
275
277
  proc_name: str | None = None,
276
278
  call_id: str | None = None,
277
- ) -> AsyncIterator[CompletionChunkEvent | CompletionEvent]:
279
+ ) -> AsyncIterator[CompletionChunkEvent[CompletionChunk] | CompletionEvent]:
278
280
  completion_kwargs = self._make_completion_kwargs(
279
281
  conversation=conversation, tool_choice=tool_choice, n_choices=n_choices
280
282
  )
@@ -284,7 +286,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
284
286
  api_stream = self._get_completion_stream(**completion_kwargs)
285
287
  api_stream = cast("AsyncIterator[Any]", api_stream)
286
288
 
287
- async def iterator() -> AsyncIterator[CompletionChunkEvent | CompletionEvent]:
289
+ async def iterator() -> AsyncIterator[
290
+ CompletionChunkEvent[CompletionChunk] | CompletionEvent
291
+ ]:
288
292
  api_completion_chunks: list[Any] = []
289
293
 
290
294
  async for api_completion_chunk in api_stream:
@@ -318,7 +322,9 @@ class CloudLLM(LLM[SettingsT_co, ConvertT_co], Generic[SettingsT_co, ConvertT_co
318
322
  n_choices: int | None = None,
319
323
  proc_name: str | None = None,
320
324
  call_id: str | None = None,
321
- ) -> AsyncIterator[CompletionChunkEvent | CompletionEvent | LLMStreamingErrorEvent]:
325
+ ) -> AsyncIterator[
326
+ CompletionChunkEvent[CompletionChunk] | CompletionEvent | LLMStreamingErrorEvent
327
+ ]:
322
328
  n_attempt = 0
323
329
  while n_attempt <= self.max_response_retries:
324
330
  try:
@@ -1,5 +1,7 @@
1
1
  import logging
2
+ from collections import defaultdict
2
3
  from collections.abc import AsyncIterator, Mapping
4
+ from copy import deepcopy
3
5
  from typing import Any, cast
4
6
 
5
7
  import litellm
@@ -90,10 +92,19 @@ class LiteLLM(CloudLLM[LiteLLMSettings, LiteLLMConverters]):
90
92
  "was specified. Please provide a valid API provider or use a different "
91
93
  "model."
92
94
  )
95
+
96
+ if llm_settings is not None:
97
+ stream_options = llm_settings.get("stream_options") or {}
98
+ stream_options["include_usage"] = True
99
+ _llm_settings = deepcopy(llm_settings)
100
+ _llm_settings["stream_options"] = stream_options
101
+ else:
102
+ _llm_settings = LiteLLMSettings(stream_options={"include_usage": True})
103
+
93
104
  super().__init__(
94
105
  model_name=model_name,
95
106
  model_id=model_id,
96
- llm_settings=llm_settings,
107
+ llm_settings=_llm_settings,
97
108
  converters=LiteLLMConverters(),
98
109
  tools=tools,
99
110
  response_schema=response_schema,
@@ -192,7 +203,17 @@ class LiteLLM(CloudLLM[LiteLLMSettings, LiteLLMConverters]):
192
203
  )
193
204
  stream = cast("CustomStreamWrapper", stream)
194
205
 
206
+ tc_indices: dict[int, set[int]] = defaultdict(set)
207
+
195
208
  async for completion_chunk in stream:
209
+ # Fix tool call indices to be unique within each choice
210
+ for n, choice in enumerate(completion_chunk.choices):
211
+ for tc in choice.delta.tool_calls or []:
212
+ # Tool call ID is not None only when it is a new tool call
213
+ if tc.id and tc.index in tc_indices[n]:
214
+ tc.index = max(tc_indices[n]) + 1
215
+ tc_indices[n].add(tc.index)
216
+
196
217
  yield completion_chunk
197
218
 
198
219
  def combine_completion_chunks(
@@ -7,6 +7,7 @@ from uuid import uuid4
7
7
  from pydantic import BaseModel
8
8
  from typing_extensions import TypedDict
9
9
 
10
+ from grasp_agents.typing.completion_chunk import CompletionChunk
10
11
  from grasp_agents.utils import (
11
12
  validate_obj_from_json_or_py_string,
12
13
  validate_tagged_objs_from_json_or_py_string,
@@ -20,9 +21,25 @@ from .errors import (
20
21
  from .typing.completion import Completion
21
22
  from .typing.converters import Converters
22
23
  from .typing.events import (
24
+ AnnotationsChunkEvent,
25
+ AnnotationsEndEvent,
26
+ AnnotationsStartEvent,
23
27
  CompletionChunkEvent,
28
+ # CompletionEndEvent,
24
29
  CompletionEvent,
30
+ CompletionStartEvent,
31
+ LLMStateChangeEvent,
25
32
  LLMStreamingErrorEvent,
33
+ # RefusalChunkEvent,
34
+ ResponseChunkEvent,
35
+ ResponseEndEvent,
36
+ ResponseStartEvent,
37
+ ThinkingChunkEvent,
38
+ ThinkingEndEvent,
39
+ ThinkingStartEvent,
40
+ ToolCallChunkEvent,
41
+ ToolCallEndEvent,
42
+ ToolCallStartEvent,
26
43
  )
27
44
  from .typing.message import Messages
28
45
  from .typing.tool import BaseTool, ToolChoice
@@ -30,6 +47,14 @@ from .typing.tool import BaseTool, ToolChoice
30
47
  logger = logging.getLogger(__name__)
31
48
 
32
49
 
50
+ LLMStreamGenerator = AsyncIterator[
51
+ CompletionChunkEvent[CompletionChunk]
52
+ | CompletionEvent
53
+ | LLMStateChangeEvent[Any]
54
+ | LLMStreamingErrorEvent
55
+ ]
56
+
57
+
33
58
  class LLMSettings(TypedDict, total=False):
34
59
  max_completion_tokens: int | None
35
60
  temperature: float | None
@@ -160,6 +185,112 @@ class LLM(ABC, Generic[SettingsT_co, ConvertT_co]):
160
185
  tool_name, tool_arguments
161
186
  ) from exc
162
187
 
188
+ @staticmethod
189
+ async def postprocess_event_stream(
190
+ stream: LLMStreamGenerator,
191
+ ) -> LLMStreamGenerator:
192
+ prev_completion_id: str | None = None
193
+ chunk_op_evt: CompletionChunkEvent[CompletionChunk] | None = None
194
+ response_op_evt: ResponseChunkEvent | None = None
195
+ thinking_op_evt: ThinkingChunkEvent | None = None
196
+ annotations_op_evt: AnnotationsChunkEvent | None = None
197
+ tool_calls_op_evt: ToolCallChunkEvent | None = None
198
+
199
+ def _close_open_events(
200
+ _event: CompletionChunkEvent[CompletionChunk] | None = None,
201
+ ) -> list[LLMStateChangeEvent[Any]]:
202
+ nonlocal \
203
+ chunk_op_evt, \
204
+ thinking_op_evt, \
205
+ tool_calls_op_evt, \
206
+ response_op_evt, \
207
+ annotations_op_evt
208
+
209
+ events: list[LLMStateChangeEvent[Any]] = []
210
+
211
+ if not isinstance(_event, ThinkingChunkEvent) and thinking_op_evt:
212
+ events.append(ThinkingEndEvent.from_chunk_event(thinking_op_evt))
213
+ thinking_op_evt = None
214
+
215
+ if not isinstance(_event, ToolCallChunkEvent) and tool_calls_op_evt:
216
+ events.append(ToolCallEndEvent.from_chunk_event(tool_calls_op_evt))
217
+ tool_calls_op_evt = None
218
+
219
+ if not isinstance(_event, ResponseChunkEvent) and response_op_evt:
220
+ events.append(ResponseEndEvent.from_chunk_event(response_op_evt))
221
+ response_op_evt = None
222
+
223
+ if not isinstance(_event, AnnotationsChunkEvent) and annotations_op_evt:
224
+ events.append(AnnotationsEndEvent.from_chunk_event(annotations_op_evt))
225
+ annotations_op_evt = None
226
+
227
+ return events
228
+
229
+ async for event in stream:
230
+ if isinstance(event, CompletionChunkEvent) and not isinstance(
231
+ event, LLMStateChangeEvent
232
+ ):
233
+ chunk = event.data
234
+ if len(chunk.choices) != 1:
235
+ raise ValueError(
236
+ "Expected exactly one choice in completion chunk, "
237
+ f"got {len(chunk.choices)}"
238
+ )
239
+
240
+ new_completion = chunk.id != prev_completion_id
241
+
242
+ if new_completion:
243
+ for close_event in _close_open_events():
244
+ yield close_event
245
+
246
+ chunk_op_evt = event
247
+ yield CompletionStartEvent.from_chunk_event(event)
248
+
249
+ sub_events = event.split_into_specialized()
250
+
251
+ for sub_event in sub_events:
252
+ for close_event in _close_open_events(sub_event):
253
+ yield close_event
254
+
255
+ if isinstance(sub_event, ThinkingChunkEvent):
256
+ if not thinking_op_evt:
257
+ thinking_op_evt = sub_event
258
+ yield ThinkingStartEvent.from_chunk_event(sub_event)
259
+ yield sub_event
260
+
261
+ if isinstance(sub_event, ToolCallChunkEvent):
262
+ tc = sub_event.data.tool_call
263
+ if tc.id:
264
+ # Tool call ID is not None only for the first chunk of a tool call
265
+ if tool_calls_op_evt:
266
+ yield ToolCallEndEvent.from_chunk_event(
267
+ tool_calls_op_evt
268
+ )
269
+ tool_calls_op_evt = None
270
+ tool_calls_op_evt = sub_event
271
+ yield ToolCallStartEvent.from_chunk_event(sub_event)
272
+ yield sub_event
273
+
274
+ if isinstance(sub_event, ResponseChunkEvent):
275
+ if not response_op_evt:
276
+ response_op_evt = sub_event
277
+ yield ResponseStartEvent.from_chunk_event(sub_event)
278
+ yield sub_event
279
+
280
+ if isinstance(sub_event, AnnotationsChunkEvent):
281
+ if not annotations_op_evt:
282
+ annotations_op_evt = sub_event
283
+ yield AnnotationsStartEvent.from_chunk_event(sub_event)
284
+ yield sub_event
285
+
286
+ prev_completion_id = chunk.id
287
+
288
+ else:
289
+ for close_event in _close_open_events():
290
+ yield close_event
291
+
292
+ yield event
293
+
163
294
  @abstractmethod
164
295
  async def generate_completion(
165
296
  self,
@@ -181,7 +312,9 @@ class LLM(ABC, Generic[SettingsT_co, ConvertT_co]):
181
312
  n_choices: int | None = None,
182
313
  proc_name: str | None = None,
183
314
  call_id: str | None = None,
184
- ) -> AsyncIterator[CompletionChunkEvent | CompletionEvent | LLMStreamingErrorEvent]:
315
+ ) -> AsyncIterator[
316
+ CompletionChunkEvent[CompletionChunk] | CompletionEvent | LLMStreamingErrorEvent
317
+ ]:
185
318
  pass
186
319
 
187
320
  @abstractmethod
@@ -7,6 +7,8 @@ from typing import Any, Generic, Protocol, final
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
10
+ from grasp_agents.typing.completion_chunk import CompletionChunk
11
+
10
12
  from .errors import AgentFinalAnswerError
11
13
  from .llm import LLM, LLMSettings
12
14
  from .llm_agent_memory import LLMAgentMemory
@@ -149,19 +151,23 @@ class LLMPolicyExecutor(Generic[CtxT]):
149
151
  tool_choice: ToolChoice | None = None,
150
152
  ctx: RunContext[CtxT] | None = None,
151
153
  ) -> AsyncIterator[
152
- CompletionChunkEvent
154
+ CompletionChunkEvent[CompletionChunk]
153
155
  | CompletionEvent
154
156
  | GenMessageEvent
155
157
  | LLMStreamingErrorEvent
156
158
  ]:
157
159
  completion: Completion | None = None
158
- async for event in self.llm.generate_completion_stream( # type: ignore[no-untyped-call]
160
+
161
+ llm_event_stream = self.llm.generate_completion_stream(
159
162
  memory.message_history,
160
163
  tool_choice=tool_choice,
161
164
  n_choices=1,
162
165
  proc_name=self.agent_name,
163
166
  call_id=call_id,
164
- ):
167
+ )
168
+ llm_event_stream_post = self.llm.postprocess_event_stream(llm_event_stream) # type: ignore[assignment]
169
+
170
+ async for event in llm_event_stream_post:
165
171
  if isinstance(event, CompletionEvent):
166
172
  completion = event.data
167
173
  yield event
@@ -127,8 +127,8 @@ class OpenAILLM(CloudLLM[OpenAILLMSettings, OpenAIConverters]):
127
127
  provider_name, provider_model_name = model_name_parts
128
128
  if provider_name not in compat_providers_map:
129
129
  raise ValueError(
130
- f"OpenAI compatible API provider '{provider_name}' "
131
- "is not supported. Supported providers are: "
130
+ f"API provider '{provider_name}' is not a supported OpenAI "
131
+ f"compatible provider. Supported providers are: "
132
132
  f"{', '.join(compat_providers_map.keys())}"
133
133
  )
134
134
  api_provider = compat_providers_map[provider_name]
@@ -138,10 +138,18 @@ class OpenAILLM(CloudLLM[OpenAILLMSettings, OpenAIConverters]):
138
138
  "you must provide an 'api_provider' argument."
139
139
  )
140
140
 
141
+ if llm_settings is not None:
142
+ stream_options = llm_settings.get("stream_options") or {}
143
+ stream_options["include_usage"] = True
144
+ _llm_settings = deepcopy(llm_settings)
145
+ _llm_settings["stream_options"] = stream_options
146
+ else:
147
+ _llm_settings = OpenAILLMSettings(stream_options={"include_usage": True})
148
+
141
149
  super().__init__(
142
150
  model_name=provider_model_name,
143
151
  model_id=model_id,
144
- llm_settings=llm_settings,
152
+ llm_settings=_llm_settings,
145
153
  converters=OpenAIConverters(),
146
154
  tools=tools,
147
155
  response_schema=response_schema,
@@ -2,10 +2,9 @@ import asyncio
2
2
  import logging
3
3
  from collections.abc import AsyncIterator
4
4
  from types import TracebackType
5
- from typing import Any, Generic, Literal, Protocol, TypeVar
5
+ from typing import Any, Literal, Protocol, TypeVar
6
6
 
7
7
  from .packet import Packet
8
- from .run_context import CtxT, RunContext
9
8
  from .typing.events import Event
10
9
  from .typing.io import ProcName
11
10
 
@@ -18,24 +17,21 @@ END_PROC_NAME: Literal["*END*"] = "*END*"
18
17
  _PayloadT_contra = TypeVar("_PayloadT_contra", contravariant=True)
19
18
 
20
19
 
21
- class PacketHandler(Protocol[_PayloadT_contra, CtxT]):
20
+ class PacketHandler(Protocol[_PayloadT_contra]):
22
21
  async def __call__(
23
- self,
24
- packet: Packet[_PayloadT_contra],
25
- ctx: RunContext[CtxT],
26
- **kwargs: Any,
22
+ self, packet: Packet[_PayloadT_contra], **kwargs: Any
27
23
  ) -> None: ...
28
24
 
29
25
 
30
- class PacketPool(Generic[CtxT]):
26
+ class PacketPool:
31
27
  def __init__(self) -> None:
32
28
  self._packet_queues: dict[ProcName, asyncio.Queue[Packet[Any] | None]] = {}
33
- self._packet_handlers: dict[ProcName, PacketHandler[Any, CtxT]] = {}
29
+ self._packet_handlers: dict[ProcName, PacketHandler[Any]] = {}
34
30
  self._task_group: asyncio.TaskGroup | None = None
35
31
 
36
32
  self._event_queue: asyncio.Queue[Event[Any] | None] = asyncio.Queue()
37
33
 
38
- self._final_result_fut: asyncio.Future[Packet[Any]] | None = None
34
+ self._final_result_fut: asyncio.Future[Packet[Any]]
39
35
 
40
36
  self._stopping = False
41
37
  self._stopped_evt = asyncio.Event()
@@ -44,9 +40,8 @@ class PacketPool(Generic[CtxT]):
44
40
 
45
41
  async def post(self, packet: Packet[Any]) -> None:
46
42
  if packet.recipients == [END_PROC_NAME]:
47
- fut = self._ensure_final_future()
48
- if not fut.done():
49
- fut.set_result(packet)
43
+ if not self._final_result_fut.done():
44
+ self._final_result_fut.set_result(packet)
50
45
  await self.shutdown()
51
46
  return
52
47
 
@@ -54,31 +49,14 @@ class PacketPool(Generic[CtxT]):
54
49
  queue = self._packet_queues.setdefault(recipient_id, asyncio.Queue())
55
50
  await queue.put(packet)
56
51
 
57
- def _ensure_final_future(self) -> asyncio.Future[Packet[Any]]:
58
- fut = self._final_result_fut
59
- if fut is None:
60
- fut = asyncio.get_running_loop().create_future()
61
- self._final_result_fut = fut
62
- return fut
63
-
64
52
  async def final_result(self) -> Packet[Any]:
65
- fut = self._ensure_final_future()
66
53
  try:
67
- return await fut
54
+ return await self._final_result_fut
68
55
  finally:
69
56
  await self.shutdown()
70
57
 
71
- @property
72
- def final_result_ready(self) -> bool:
73
- fut = self._final_result_fut
74
- return fut is not None and fut.done()
75
-
76
58
  def register_packet_handler(
77
- self,
78
- proc_name: ProcName,
79
- handler: PacketHandler[Any, CtxT],
80
- ctx: RunContext[CtxT],
81
- **run_kwargs: Any,
59
+ self, proc_name: ProcName, handler: PacketHandler[Any]
82
60
  ) -> None:
83
61
  if self._stopping:
84
62
  raise RuntimeError("PacketPool is stopping/stopped")
@@ -88,17 +66,19 @@ class PacketPool(Generic[CtxT]):
88
66
 
89
67
  if self._task_group is not None:
90
68
  self._task_group.create_task(
91
- self._handle_packets(proc_name, ctx=ctx, **run_kwargs),
69
+ self._handle_packets(proc_name),
92
70
  name=f"packet-handler:{proc_name}",
93
71
  )
94
72
 
95
73
  async def push_event(self, event: Event[Any]) -> None:
96
74
  await self._event_queue.put(event)
97
75
 
98
- async def __aenter__(self) -> "PacketPool[CtxT]":
76
+ async def __aenter__(self) -> "PacketPool":
99
77
  self._task_group = asyncio.TaskGroup()
100
78
  await self._task_group.__aenter__()
101
79
 
80
+ self._final_result_fut = asyncio.get_running_loop().create_future()
81
+
102
82
  return self
103
83
 
104
84
  async def __aexit__(
@@ -120,26 +100,27 @@ class PacketPool(Generic[CtxT]):
120
100
 
121
101
  return False
122
102
 
123
- async def _handle_packets(
124
- self, proc_name: ProcName, ctx: RunContext[CtxT], **run_kwargs: Any
125
- ) -> None:
103
+ async def _handle_packets(self, proc_name: ProcName) -> None:
126
104
  queue = self._packet_queues[proc_name]
127
105
  handler = self._packet_handlers[proc_name]
128
106
 
129
- while not self.final_result_ready:
107
+ while True:
130
108
  packet = await queue.get()
131
109
  if packet is None:
132
110
  break
111
+
112
+ if self._final_result_fut.done():
113
+ continue
114
+
133
115
  try:
134
- await handler(packet, ctx=ctx, **run_kwargs)
116
+ await handler(packet)
135
117
  except asyncio.CancelledError:
136
118
  raise
137
119
  except Exception as err:
138
120
  logger.exception("Error handling packet for %s", proc_name)
139
121
  self._errors.append(err)
140
- fut = self._final_result_fut
141
- if fut and not fut.done():
142
- fut.set_exception(err)
122
+ if not self._final_result_fut.done():
123
+ self._final_result_fut.set_exception(err)
143
124
  await self.shutdown()
144
125
  raise
145
126
 
@@ -159,6 +140,5 @@ class PacketPool(Generic[CtxT]):
159
140
  await self._event_queue.put(None)
160
141
  for queue in self._packet_queues.values():
161
142
  await queue.put(None)
162
-
163
143
  finally:
164
144
  self._stopped_evt.set()