PyPI - grasp_agents - Versions diffs - 0.4.7__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

grasp_agents 0.4.7py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

grasp_agents/cloud_llm.py +191 -224
grasp_agents/comm_processor.py +101 -100
grasp_agents/errors.py +69 -9
grasp_agents/litellm/__init__.py +106 -0
grasp_agents/litellm/completion_chunk_converters.py +68 -0
grasp_agents/litellm/completion_converters.py +72 -0
grasp_agents/litellm/converters.py +138 -0
grasp_agents/litellm/lite_llm.py +210 -0
grasp_agents/litellm/message_converters.py +66 -0
grasp_agents/llm.py +84 -49
grasp_agents/llm_agent.py +136 -120
grasp_agents/llm_agent_memory.py +3 -3
grasp_agents/llm_policy_executor.py +167 -174
grasp_agents/memory.py +4 -0
grasp_agents/openai/__init__.py +24 -9
grasp_agents/openai/completion_chunk_converters.py +6 -6
grasp_agents/openai/completion_converters.py +12 -14
grasp_agents/openai/content_converters.py +1 -3
grasp_agents/openai/converters.py +6 -8
grasp_agents/openai/message_converters.py +21 -3
grasp_agents/openai/openai_llm.py +155 -103
grasp_agents/openai/tool_converters.py +4 -6
grasp_agents/packet.py +5 -2
grasp_agents/packet_pool.py +14 -13
grasp_agents/printer.py +234 -72
grasp_agents/processor.py +228 -88
grasp_agents/prompt_builder.py +2 -2
grasp_agents/run_context.py +11 -20
grasp_agents/runner.py +42 -0
grasp_agents/typing/completion.py +16 -9
grasp_agents/typing/completion_chunk.py +51 -22
grasp_agents/typing/events.py +95 -19
grasp_agents/typing/message.py +25 -1
grasp_agents/typing/tool.py +2 -0
grasp_agents/usage_tracker.py +31 -37
grasp_agents/utils.py +95 -84
grasp_agents/workflow/looped_workflow.py +60 -11
grasp_agents/workflow/sequential_workflow.py +43 -11
grasp_agents/workflow/workflow_processor.py +25 -24
{grasp_agents-0.4.7.dist-info → grasp_agents-0.5.0.dist-info}/METADATA +7 -6
grasp_agents-0.5.0.dist-info/RECORD +57 -0
grasp_agents-0.4.7.dist-info/RECORD +0 -50
{grasp_agents-0.4.7.dist-info → grasp_agents-0.5.0.dist-info}/WHEEL +0 -0
{grasp_agents-0.4.7.dist-info → grasp_agents-0.5.0.dist-info}/licenses/LICENSE.md +0 -0

grasp_agents/typing/completion_chunk.py CHANGED Viewed

@@ -1,25 +1,31 @@
 import time
 from collections import defaultdict
 from collections.abc import Sequence
+from typing import Any
 from uuid import uuid4
+from litellm import ChatCompletionAnnotation as LiteLLMAnnotation
+from litellm.types.utils import ChoiceLogprobs as LiteLLMChoiceLogprobs
+from openai.types.chat.chat_completion import (
+    ChoiceLogprobs as OpenAIChoiceLogprobs,
+)
 from openai.types.chat.chat_completion_chunk import (
-    ChoiceLogprobs as CompletionChunkChoiceLogprobs,
+    ChoiceLogprobs as OpenAIChunkChoiceLogprobs,
 )
 from openai.types.chat.chat_completion_token_logprob import (
-    ChatCompletionTokenLogprob as CompletionTokenLogprob,
+    ChatCompletionTokenLogprob as OpenAITokenLogprob,
 )
 from pydantic import BaseModel, Field
 from ..errors import CombineCompletionChunksError
-from .completion import (
-    Completion,
-    CompletionChoice,
-    CompletionChoiceLogprobs,
-    FinishReason,
-    Usage,
+from .completion import Completion, CompletionChoice, FinishReason, Usage
+from .message import (
+    AssistantMessage,
+    RedactedThinkingBlock,
+    Role,
+    ThinkingBlock,
+    ToolCall,
 )
-from .message import AssistantMessage, ToolCall
 class CompletionChunkDeltaToolCall(BaseModel):
@@ -31,26 +37,34 @@ class CompletionChunkDeltaToolCall(BaseModel):
 class CompletionChunkChoiceDelta(BaseModel):
     content: str | None = None
-    refusal: str | None
-    role: str | None
+    refusal: str | None = None
+    role: Role | None
     tool_calls: list[CompletionChunkDeltaToolCall] | None
+    reasoning_content: str | None = None
+    thinking_blocks: list[ThinkingBlock | RedactedThinkingBlock] | None = None
+    annotations: list[LiteLLMAnnotation] | None = None
+    provider_specific_fields: dict[str, Any] | None = None
 class CompletionChunkChoice(BaseModel):
     delta: CompletionChunkChoiceDelta
     finish_reason: FinishReason | None
     index: int
-    logprobs: CompletionChunkChoiceLogprobs | None = None
+    logprobs: OpenAIChunkChoiceLogprobs | LiteLLMChoiceLogprobs | Any | None = None
 class CompletionChunk(BaseModel):
     id: str = Field(default_factory=lambda: str(uuid4())[:8])
     created: int = Field(default_factory=lambda: int(time.time()))
-    model: str
+    model: str | None
     name: str | None = None
     system_fingerprint: str | None = None
     choices: list[CompletionChunkChoice]
     usage: Usage | None = None
+    # LiteLLM-specific fields
+    provider_specific_fields: dict[str, Any] | None = None
+    response_ms: float | None = None
+    hidden_params: dict[str, Any] | None = None
 def combine_completion_chunks(chunks: list[CompletionChunk]) -> Completion:
@@ -82,14 +96,20 @@ def combine_completion_chunks(chunks: list[CompletionChunk]) -> Completion:
     # Usage is found in the last completion chunk if requested
     usage = chunks[-1].usage
-    logp_contents_per_choice: defaultdict[int, list[CompletionTokenLogprob]] = (
-        defaultdict(list)
+    logp_contents_per_choice: defaultdict[int, list[OpenAITokenLogprob]] = defaultdict(
+        list
+    )
+    logp_refusals_per_choice: defaultdict[int, list[OpenAITokenLogprob]] = defaultdict(
+        list
     )
-    logp_refusals_per_choice: defaultdict[int, list[CompletionTokenLogprob]] = (
-        defaultdict(list)
+    logprobs_per_choice: defaultdict[int, OpenAIChoiceLogprobs | None] = defaultdict(
+        lambda: None
     )
-    logprobs_per_choice: defaultdict[int, CompletionChoiceLogprobs | None] = (
-        defaultdict(lambda: None)
+    thinking_blocks_per_choice: defaultdict[
+        int, list[ThinkingBlock | RedactedThinkingBlock]
+    ] = defaultdict(list)
+    annotations_per_choice: defaultdict[int, list[LiteLLMAnnotation]] = defaultdict(
+        list
     )
     finish_reasons_per_choice: defaultdict[int, FinishReason | None] = defaultdict(
@@ -97,6 +117,7 @@ def combine_completion_chunks(chunks: list[CompletionChunk]) -> Completion:
     )
     contents_per_choice: defaultdict[int, str] = defaultdict(lambda: "")
+    reasoning_contents_per_choice: defaultdict[int, str] = defaultdict(lambda: "")
     refusals_per_choice: defaultdict[int, str] = defaultdict(lambda: "")
     tool_calls_per_choice: defaultdict[
@@ -111,12 +132,17 @@ def combine_completion_chunks(chunks: list[CompletionChunk]) -> Completion:
             # Concatenate content and refusal tokens for each choice
             contents_per_choice[index] += choice.delta.content or ""
+            reasoning_contents_per_choice[index] += choice.delta.reasoning_content or ""
             refusals_per_choice[index] += choice.delta.refusal or ""
             # Concatenate logprobs for content and refusal tokens for each choice
             if choice.logprobs is not None:
-                logp_contents_per_choice[index].extend(choice.logprobs.content or [])
-                logp_refusals_per_choice[index].extend(choice.logprobs.refusal or [])
+                logp_contents_per_choice[index].extend(choice.logprobs.content or [])  # type: ignore
+                logp_refusals_per_choice[index].extend(choice.logprobs.refusal or [])  # type: ignore
+                thinking_blocks_per_choice[index].extend(
+                    choice.delta.thinking_blocks or []
+                )
+                annotations_per_choice[index].extend(choice.delta.annotations or [])
             # Take the last finish reason for each choice
             finish_reasons_per_choice[index] = choice.finish_reason
@@ -148,12 +174,15 @@ def combine_completion_chunks(chunks: list[CompletionChunk]) -> Completion:
         messages_per_choice[index] = AssistantMessage(
             name=name,
             content=contents_per_choice[index] or "<empty>",
+            reasoning_content=(reasoning_contents_per_choice[index] or None),
+            thinking_blocks=(thinking_blocks_per_choice[index] or None),
+            annotations=(annotations_per_choice[index] or None),
             refusal=(refusals_per_choice[index] or None),
             tool_calls=(tool_calls or None),
         )
         if logp_contents_per_choice[index] or logp_refusals_per_choice[index]:
-            logprobs_per_choice[index] = CompletionChoiceLogprobs(
+            logprobs_per_choice[index] = OpenAIChoiceLogprobs(
                 content=logp_contents_per_choice[index],
                 refusal=logp_refusals_per_choice[index],
             )

grasp_agents/typing/events.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import time
 from enum import StrEnum
 from typing import Any, Generic, Literal, TypeVar
+from uuid import uuid4
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 from ..packet import Packet
 from .completion import Completion
@@ -15,7 +16,9 @@ class EventSourceType(StrEnum):
     AGENT = "agent"
     USER = "user"
     TOOL = "tool"
-    PROCESSOR = "processor"
+    PROC = "processor"
+    WORKFLOW = "workflow"
+    RUN = "run"
 class EventType(StrEnum):
@@ -24,10 +27,22 @@ class EventType(StrEnum):
     TOOL_MSG = "tool_message"
     TOOL_CALL = "tool_call"
     GEN_MSG = "gen_message"
     COMP = "completion"
     COMP_CHUNK = "completion_chunk"
-    PACKET = "packet"
-    PROC_OUT = "processor_output"
+    LLM_ERR = "llm_error"
+    PROC_START = "processor_start"
+    PACKET_OUT = "packet_output"
+    PAYLOAD_OUT = "payload_output"
+    PROC_FINISH = "processor_finish"
+    PROC_ERR = "processor_error"
+    WORKFLOW_RES = "workflow_result"
+    RUN_RES = "run_result"
+    # COMP_THINK_CHUNK = "completion_thinking_chunk"
+    # COMP_RESP_CHUNK = "completion_response_chunk"
 _T = TypeVar("_T")
@@ -36,8 +51,10 @@ _T = TypeVar("_T")
 class Event(BaseModel, Generic[_T], frozen=True):
     type: EventType
     source: EventSourceType
+    id: str = Field(default_factory=lambda: str(uuid4()))
     created: int = Field(default_factory=lambda: int(time.time()))
-    name: str | None = None
+    proc_name: str | None = None
+    call_id: str | None = None
     data: _T
@@ -51,36 +68,95 @@ class CompletionChunkEvent(Event[CompletionChunk], frozen=True):
     source: Literal[EventSourceType.LLM] = EventSourceType.LLM
-class GenMessageEvent(Event[AssistantMessage], frozen=True):
-    type: Literal[EventType.GEN_MSG] = EventType.GEN_MSG
+class LLMStreamingErrorData(BaseModel):
+    error: Exception
+    model_name: str | None = None
+    model_id: str | None = None
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+class LLMStreamingErrorEvent(Event[LLMStreamingErrorData], frozen=True):
+    type: Literal[EventType.LLM_ERR] = EventType.LLM_ERR
     source: Literal[EventSourceType.LLM] = EventSourceType.LLM
-class ToolCallEvent(Event[ToolCall], frozen=True):
-    type: Literal[EventType.TOOL_CALL] = EventType.TOOL_CALL
-    source: Literal[EventSourceType.AGENT] = EventSourceType.AGENT
+# class CompletionThinkingChunkEvent(Event[CompletionChunk], frozen=True):
+#     type: Literal[EventType.COMP_THINK_CHUNK] = EventType.COMP_THINK_CHUNK
+#     source: Literal[EventSourceType.LLM] = EventSourceType.LLM
+# class CompletionResponseChunkEvent(Event[CompletionChunk], frozen=True):
+#     type: Literal[EventType.COMP_RESP_CHUNK] = EventType.COMP_RESP_CHUNK
+#     source: Literal[EventSourceType.LLM] = EventSourceType.LLM
+class MessageEvent(Event[_T], Generic[_T], frozen=True):
+    pass
+class GenMessageEvent(MessageEvent[AssistantMessage], frozen=True):
+    type: Literal[EventType.GEN_MSG] = EventType.GEN_MSG
+    source: Literal[EventSourceType.LLM] = EventSourceType.LLM
-class ToolMessageEvent(Event[ToolMessage], frozen=True):
+class ToolMessageEvent(MessageEvent[ToolMessage], frozen=True):
     type: Literal[EventType.TOOL_MSG] = EventType.TOOL_MSG
     source: Literal[EventSourceType.TOOL] = EventSourceType.TOOL
-class UserMessageEvent(Event[UserMessage], frozen=True):
+class UserMessageEvent(MessageEvent[UserMessage], frozen=True):
     type: Literal[EventType.USR_MSG] = EventType.USR_MSG
     source: Literal[EventSourceType.USER] = EventSourceType.USER
-class SystemMessageEvent(Event[SystemMessage], frozen=True):
+class SystemMessageEvent(MessageEvent[SystemMessage], frozen=True):
     type: Literal[EventType.SYS_MSG] = EventType.SYS_MSG
     source: Literal[EventSourceType.AGENT] = EventSourceType.AGENT
-class PacketEvent(Event[Packet[Any]], frozen=True):
-    type: Literal[EventType.PACKET] = EventType.PACKET
-    source: Literal[EventSourceType.PROCESSOR] = EventSourceType.PROCESSOR
+class ToolCallEvent(Event[ToolCall], frozen=True):
+    type: Literal[EventType.TOOL_CALL] = EventType.TOOL_CALL
+    source: Literal[EventSourceType.AGENT] = EventSourceType.AGENT
+class ProcStartEvent(Event[None], frozen=True):
+    type: Literal[EventType.PROC_START] = EventType.PROC_START
+    source: Literal[EventSourceType.PROC] = EventSourceType.PROC
+class ProcFinishEvent(Event[None], frozen=True):
+    type: Literal[EventType.PROC_FINISH] = EventType.PROC_FINISH
+    source: Literal[EventSourceType.PROC] = EventSourceType.PROC
+class ProcPayloadOutputEvent(Event[Any], frozen=True):
+    type: Literal[EventType.PAYLOAD_OUT] = EventType.PAYLOAD_OUT
+    source: Literal[EventSourceType.PROC] = EventSourceType.PROC
+class ProcPacketOutputEvent(Event[Packet[Any]], frozen=True):
+    type: Literal[EventType.PACKET_OUT] = EventType.PACKET_OUT
+    source: Literal[EventSourceType.PROC] = EventSourceType.PROC
+class WorkflowResultEvent(Event[Packet[Any]], frozen=True):
+    type: Literal[EventType.WORKFLOW_RES] = EventType.WORKFLOW_RES
+    source: Literal[EventSourceType.WORKFLOW] = EventSourceType.WORKFLOW
+class RunResultEvent(Event[Packet[Any]], frozen=True):
+    type: Literal[EventType.RUN_RES] = EventType.RUN_RES
+    source: Literal[EventSourceType.RUN] = EventSourceType.RUN
+class ProcStreamingErrorData(BaseModel):
+    error: Exception
+    call_id: str | None = None
+    model_config = ConfigDict(arbitrary_types_allowed=True)
-class ProcOutputEvent(Event[Any], frozen=True):
-    type: Literal[EventType.PROC_OUT] = EventType.PROC_OUT
-    source: Literal[EventSourceType.PROCESSOR] = EventSourceType.PROCESSOR
+class ProcStreamingErrorEvent(Event[ProcStreamingErrorData], frozen=True):
+    type: Literal[EventType.PROC_ERR] = EventType.PROC_ERR
+    source: Literal[EventSourceType.PROC] = EventSourceType.PROC

grasp_agents/typing/message.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import json
 from collections.abc import Hashable, Mapping, Sequence
 from enum import StrEnum
-from typing import Annotated, Any, Literal, TypeAlias
+from typing import Annotated, Any, Literal, Required, TypeAlias
 from uuid import uuid4
+from litellm.types.llms.openai import ChatCompletionAnnotation as LiteLLMAnnotation
 from pydantic import BaseModel, Field
 from pydantic.json import pydantic_encoder
+from typing_extensions import TypedDict
 from .content import Content, ImageData
 from .tool import ToolCall
@@ -14,6 +16,7 @@ from .tool import ToolCall
 class Role(StrEnum):
     USER = "user"
     SYSTEM = "system"
+    DEVELOPER = "developer"
     ASSISTANT = "assistant"
     TOOL = "tool"
@@ -23,11 +26,32 @@ class MessageBase(BaseModel):
     name: str | None = None
+class ChatCompletionCachedContent(TypedDict):
+    type: Literal["ephemeral"]
+class ThinkingBlock(TypedDict, total=False):
+    type: Required[Literal["thinking"]]
+    thinking: str
+    signature: str | None
+    cache_control: dict[str, Any] | ChatCompletionCachedContent | None
+class RedactedThinkingBlock(TypedDict, total=False):
+    type: Required[Literal["redacted_thinking"]]
+    data: str
+    cache_control: dict[str, Any] | ChatCompletionCachedContent | None
 class AssistantMessage(MessageBase):
     role: Literal[Role.ASSISTANT] = Role.ASSISTANT
     content: str | None
     tool_calls: Sequence[ToolCall] | None = None
     refusal: str | None = None
+    reasoning_content: str | None = None
+    thinking_blocks: Sequence[ThinkingBlock | RedactedThinkingBlock] | None = None
+    annotations: Sequence[LiteLLMAnnotation] | None = None
+    provider_specific_fields: dict[str, Any] | None = None
 class UserMessage(MessageBase):

grasp_agents/typing/tool.py CHANGED Viewed

@@ -48,6 +48,8 @@ class BaseTool(
     name: str
     description: str
+    strict: bool | None = None
     _in_type: type[_InT] = PrivateAttr()
     _out_type: type[_OutT_co] = PrivateAttr()

grasp_agents/usage_tracker.py CHANGED Viewed

@@ -20,7 +20,6 @@ CostsDict: TypeAlias = dict[str, ModelCostsDict]
 class UsageTracker(BaseModel):
-    # TODO: specify different costs per provider:model, not just per model
     costs_dict_path: str | Path = COSTS_DICT_PATH
     costs_dict: CostsDict | None = None
     usages: dict[str, Usage] = Field(default_factory=dict)
@@ -29,34 +28,6 @@ class UsageTracker(BaseModel):
         super().__init__(**kwargs)
         self.costs_dict = self.load_costs_dict()
-    def load_costs_dict(self) -> CostsDict | None:
-        try:
-            with Path(self.costs_dict_path).open() as f:
-                return yaml.safe_load(f)["costs"]
-        except Exception:
-            logger.info(f"Failed to load cost dictionary from {self.costs_dict_path}")
-            return None
-    def _add_cost_to_usage(
-        self, usage: Usage, model_costs_dict: ModelCostsDict
-    ) -> None:
-        in_rate = model_costs_dict["input"]
-        out_rate = model_costs_dict["output"]
-        cached_discount = model_costs_dict.get("cached_discount")
-        input_cost = in_rate * usage.input_tokens
-        output_cost = out_rate * usage.output_tokens
-        reasoning_cost = (
-            out_rate * usage.reasoning_tokens
-            if usage.reasoning_tokens is not None
-            else 0.0
-        )
-        cached_cost: float = (
-            cached_discount * in_rate * usage.cached_tokens
-            if (usage.cached_tokens is not None) and (cached_discount is not None)
-            else 0.0
-        )
-        usage.cost = (input_cost + output_cost + reasoning_cost + cached_cost) / 1e6
     def update(
         self,
         agent_name: str,
@@ -64,13 +35,13 @@ class UsageTracker(BaseModel):
         model_name: str | None = None,
     ) -> None:
         if model_name is not None and self.costs_dict is not None:
-            model_costs_dict = self.costs_dict.get(model_name.split(":", 1)[-1])
+            model_costs_dict = self.costs_dict.get(model_name.split("/", 1)[-1])
         else:
             model_costs_dict = None
         for completion in completions:
             if completion.usage is not None:
-                if model_costs_dict is not None:
+                if completion.usage.cost is None and model_costs_dict is not None:
                     self._add_cost_to_usage(
                         usage=completion.usage, model_costs_dict=model_costs_dict
                     )
@@ -100,9 +71,32 @@ class UsageTracker(BaseModel):
         logger.debug(colored(token_usage_str, "light_grey"))
         if usage.cost is not None:
-            logger.debug(
-                colored(
-                    f"Total cost: ${usage.cost:.4f}",
-                    "light_grey",
-                )
-            )
+            logger.debug(colored(f"Total cost: ${usage.cost:.4f}", "light_grey"))
+    def load_costs_dict(self) -> CostsDict | None:
+        try:
+            with Path(self.costs_dict_path).open() as f:
+                return yaml.safe_load(f)["costs"]
+        except Exception:
+            logger.info(f"Failed to load cost dictionary from {self.costs_dict_path}")
+            return None
+    def _add_cost_to_usage(
+        self, usage: Usage, model_costs_dict: ModelCostsDict
+    ) -> None:
+        in_rate = model_costs_dict["input"]
+        out_rate = model_costs_dict["output"]
+        cached_discount = model_costs_dict.get("cached_discount")
+        input_cost = in_rate * usage.input_tokens
+        output_cost = out_rate * usage.output_tokens
+        reasoning_cost = (
+            out_rate * usage.reasoning_tokens
+            if usage.reasoning_tokens is not None
+            else 0.0
+        )
+        cached_cost: float = (
+            cached_discount * in_rate * usage.cached_tokens
+            if (usage.cached_tokens is not None) and (cached_discount is not None)
+            else 0.0
+        )
+        usage.cost = (input_cost + output_cost + reasoning_cost + cached_cost) / 1e6

grasp_agents 0.4.7__py3-none-any.whl → 0.5.0__py3-none-any.whl

grasp_agents 0.4.7py3-none-any.whl → 0.5.0py3-none-any.whl