PyPI - flowent - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

flowent 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{flowent-0.2.0 → flowent-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,25 @@
 Metadata-Version: 2.4
 Name: flowent
-Version: 0.2.0
-Summary: A workflow orchestration platform for multi-agent collaboration.
+Version: 0.2.2
+Summary: A workflow orchestration platform for multi-agent collaboration
+Keywords: agent,agents,ai,ai-agents,assistant,automation,code-generation,llm,mcp,orchestration,sandbox,web-application,workflow
 Author: ImFeH2
 Author-email: ImFeH2 <i@feh2.im>
 License-Expression: Apache-2.0
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development
 Requires-Dist: fastapi[standard]>=0.136.1
 Requires-Dist: litellm>=1.84.0
 Requires-Dist: mcp>=1.24.0
 Requires-Dist: uvicorn>=0.46.0
-Requires-Python: >=3.12, <3.14
+Requires-Python: >=3.11
 Project-URL: Homepage, https://github.com/ImFeH2/flowent
 Project-URL: Issues, https://github.com/ImFeH2/flowent/issues
 Project-URL: Repository, https://github.com/ImFeH2/flowent

{flowent-0.2.0 → flowent-0.2.2}/pyproject.toml RENAMED Viewed

@@ -1,13 +1,39 @@
 [project]
 name = "flowent"
-version = "0.2.0"
-description = "A workflow orchestration platform for multi-agent collaboration."
+version = "0.2.2"
+description = "A workflow orchestration platform for multi-agent collaboration"
 readme = "README.md"
 authors = [
     { name = "ImFeH2", email = "i@feh2.im" }
 ]
-requires-python = ">=3.12,<3.14"
+requires-python = ">=3.11"
 license = "Apache-2.0"
+keywords = [
+    "agent",
+    "agents",
+    "ai",
+    "ai-agents",
+    "assistant",
+    "automation",
+    "code-generation",
+    "llm",
+    "mcp",
+    "orchestration",
+    "sandbox",
+    "web-application",
+    "workflow",
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development",
+]
 dependencies = [
     "fastapi[standard]>=0.136.1",
     "litellm>=1.84.0",
@@ -37,14 +63,14 @@ requires = ["uv_build>=0.8.14,<0.9.0"]
 build-backend = "uv_build"
 [tool.ruff]
-target-version = "py312"
+target-version = "py311"
 [tool.ruff.lint]
 select = ["E", "W", "F", "I", "UP", "B", "SIM", "N", "RUF"]
 ignore = ["E501"]
 [tool.mypy]
-python_version = "3.12"
+python_version = "3.11"
 [tool.pytest.ini_options]
 testpaths = ["tests"]

{flowent-0.2.0 → flowent-0.2.2}/src/flowent/agent.py RENAMED Viewed

@@ -15,6 +15,7 @@ from flowent.llm import (
     chunk_delta_content,
     chunk_delta_reasoning,
     chunk_delta_tool_calls,
+    chunk_token_usage,
     stream_chat_chunks,
 )
 from flowent.logging import TRACE_LEVEL
@@ -168,6 +169,12 @@ async def run_agent_stream(
                 tools=[*tool_specs(), *list(extra_tool_specs or [])],
             ):
                 chunk_count += 1
+                usage = chunk_token_usage(chunk)
+                if usage is not None:
+                    yield AgentStreamEvent(
+                        event="usage",
+                        data={"usage": usage.model_dump()},
+                    )
                 reasoning = chunk_delta_reasoning(chunk)
                 if reasoning:
                     reasoning_delta_count += 1
@@ -366,7 +373,9 @@ async def run_agent_stream(
                     len(compaction.conversation),
                 )
                 conversation = [dict(message) for message in compaction.conversation]
-                yield AgentStreamEvent(
-                    event="context_optimized",
-                    data={"message": dict(compaction.message)},
-                )
+                compaction_message = dict(compaction.message)
+                usage_info = compaction_message.pop("usage_info", None)
+                event_data: dict[str, object] = {"message": compaction_message}
+                if isinstance(usage_info, dict):
+                    event_data["usage_info"] = usage_info
+                yield AgentStreamEvent(event="context_optimized", data=event_data)

{flowent-0.2.0 → flowent-0.2.2}/src/flowent/approval.py RENAMED Viewed

@@ -12,7 +12,7 @@ from flowent.llm import (
     ChatMessage,
     CompletionCallable,
     ProviderConnection,
-    complete_chat,
+    stream_chat,
 )
 logger = logging.getLogger("flowent.approval")
@@ -128,7 +128,8 @@ async def review_approval_request(
     completion: CompletionCallable | None = None,
 ) -> ApprovalReviewDecision:
     try:
-        message = await complete_chat(
+        content = ""
+        async for delta in stream_chat(
             connection,
             [
                 ChatMessage(role="system", content=APPROVAL_REVIEWER_PROMPT),
@@ -138,8 +139,9 @@ async def review_approval_request(
                 ),
             ],
             completion=completion,
-        )
-        return parse_review_decision(message.content)
+        ):
+            content += delta
+        return parse_review_decision(content)
     except Exception as error:
         logger.warning("Approval reviewer denied request after failure: %s", error)
         return ApprovalReviewDecision(

{flowent-0.2.0 → flowent-0.2.2}/src/flowent/compact.py RENAMED Viewed

@@ -8,8 +8,9 @@ from flowent.llm import (
     ChatMessage,
     CompletionCallable,
     ProviderConnection,
-    complete_chat,
+    complete_chat_with_usage,
 )
+from flowent.usage import TokenUsage
 if TYPE_CHECKING:
     from flowent.storage import StoredMessage
@@ -44,6 +45,7 @@ class CompactResult:
     method: CompactMethod
     replacement_history: list[ChatMessage]
     summary: str
+    summary_usage: TokenUsage | None
     token_after: int
     token_before: int
@@ -66,12 +68,12 @@ class LocalSummaryCompactProvider:
         *,
         completion: CompletionCallable | None = None,
     ) -> CompactResult:
-        summary_message = await complete_chat(
+        summary_result = await complete_chat_with_usage(
             connection,
             compact_prompt_messages(compact_input.model_history),
             completion=completion,
         )
-        summary = summary_message.content.strip()
+        summary = summary_result.message.content.strip()
         replacement_history = build_replacement_history(
             summary,
             compact_input.messages,
@@ -81,6 +83,7 @@ class LocalSummaryCompactProvider:
             method="local_summary",
             replacement_history=replacement_history,
             summary=summary,
+            summary_usage=summary_result.usage,
             token_after=approximate_tokens_for_messages(replacement_history),
             token_before=approximate_tokens_for_messages(compact_input.model_history),
         )
@@ -127,15 +130,15 @@ def build_replacement_history(
     token_budget: int = DEFAULT_RETAINED_MESSAGE_TOKEN_BUDGET,
 ) -> list[ChatMessage]:
     return [
-        ChatMessage(role="user", content=f"{COMPACT_SUMMARY_PREFIX}{summary}"),
-        *retained_recent_chat_messages(
+        *retained_recent_user_messages(
             recent_messages,
             token_budget=token_budget,
         ),
+        ChatMessage(role="user", content=f"{COMPACT_SUMMARY_PREFIX}{summary}"),
     ]
-def retained_recent_chat_messages(
+def retained_recent_user_messages(
     messages: Sequence[StoredMessage],
     *,
     token_budget: int = DEFAULT_RETAINED_MESSAGE_TOKEN_BUDGET,
@@ -143,17 +146,22 @@ def retained_recent_chat_messages(
     retained: list[ChatMessage] = []
     remaining_tokens = max(token_budget, 0)
     for message in reversed(messages):
-        if message.author not in {"user", "assistant"}:
+        if message.author != "user":
             continue
         token_count = approximate_token_count(message.content)
-        if retained and token_count > remaining_tokens:
+        if token_count > remaining_tokens:
+            if remaining_tokens > 0:
+                retained.append(
+                    ChatMessage(
+                        role="user",
+                        content=truncate_text_to_token_budget(
+                            message.content,
+                            remaining_tokens,
+                        ),
+                    )
+                )
             break
-        if token_count > token_budget:
-            continue
-        role: Literal["user", "assistant"] = (
-            "user" if message.author == "user" else "assistant"
-        )
-        retained.append(ChatMessage(role=role, content=message.content))
+        retained.append(ChatMessage(role="user", content=message.content))
         remaining_tokens -= token_count
         if remaining_tokens <= 0:
             break
@@ -161,6 +169,19 @@ def retained_recent_chat_messages(
     return retained
+def truncate_text_to_token_budget(content: str, token_budget: int) -> str:
+    if token_budget <= 0 or not content:
+        return ""
+    character_budget = max(token_budget * 4, 1)
+    if len(content) <= character_budget:
+        return content
+    left_budget = character_budget // 2
+    right_budget = character_budget - left_budget
+    removed_tokens = approximate_token_count(content[left_budget:-right_budget])
+    marker = f"…{removed_tokens} tokens truncated…"
+    return f"{content[:left_budget]}{marker}{content[-right_budget:]}"
 def transcript_messages_after(
     messages: Sequence[StoredMessage],
     message_id: str | None,

{flowent-0.2.0 → flowent-0.2.2}/src/flowent/llm.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import asyncio
 import logging
 import re
 from collections.abc import AsyncIterator, Awaitable, Mapping, Sequence
@@ -12,6 +13,7 @@ from flowent.logging import (
     configure_litellm_logging,
     write_llm_request_diagnostic,
 )
+from flowent.usage import TokenUsage, token_usage_from_response
 class ProviderFormat(StrEnum):
@@ -57,6 +59,13 @@ class ToolCallDelta(BaseModel):
     type: str = "function"
+class ChatCompletionResult(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    message: ChatMessage
+    usage: TokenUsage | None = None
 class CompletionCallable(Protocol):
     def __call__(self, **kwargs: Any) -> Awaitable[Any]: ...
@@ -67,11 +76,32 @@ class ModelListCallable(Protocol):
 logger = logging.getLogger("flowent.llm")
+LLM_RETRY_LIMIT = 5
+LLM_RETRY_BASE_DELAY_SECONDS = 0.5
 class LLMStreamError(RuntimeError):
     pass
+async def wait_before_llm_retry(attempt_number: int) -> None:
+    await asyncio.sleep(LLM_RETRY_BASE_DELAY_SECONDS * attempt_number)
+async def request_litellm_completion(
+    completion: CompletionCallable,
+    request: Mapping[str, Any],
+) -> Any:
+    for attempt_number in range(LLM_RETRY_LIMIT + 1):
+        try:
+            return await completion(**request)
+        except Exception:
+            if attempt_number >= LLM_RETRY_LIMIT:
+                raise
+            await wait_before_llm_retry(attempt_number + 1)
+    raise RuntimeError("LLM request failed")
 MODEL_PREFIXES: dict[ProviderFormat, str] = {
     ProviderFormat.OPENAI: "openai",
     ProviderFormat.OPENAI_RESPONSES: "openai",
@@ -276,6 +306,7 @@ def build_litellm_request(
         request["tools"] = list(tools)
     if stream:
         request["stream"] = True
+        request["stream_options"] = {"include_usage": True}
     normalized_base_url = normalize_provider_base_url(
         connection.provider, connection.base_url
     )
@@ -322,6 +353,23 @@ async def complete_chat(
     completion: CompletionCallable | None = None,
     tools: Sequence[Mapping[str, Any]] | None = None,
 ) -> ChatMessage:
+    return (
+        await complete_chat_with_usage(
+            connection,
+            messages,
+            completion=completion,
+            tools=tools,
+        )
+    ).message
+async def complete_chat_with_usage(
+    connection: ProviderConnection,
+    messages: Sequence[ChatMessage | Mapping[str, Any]],
+    *,
+    completion: CompletionCallable | None = None,
+    tools: Sequence[Mapping[str, Any]] | None = None,
+) -> ChatCompletionResult:
     if completion is None:
         from litellm import acompletion
@@ -335,10 +383,15 @@ async def complete_chat(
     )
     request = build_litellm_request(connection, messages, tools=tools)
     record_litellm_request_diagnostic(connection, request)
-    response = await completion(**request)
+    response = await request_litellm_completion(completion, request)
     logger.log(TRACE_LEVEL, "LLM completion response=%r", response)
     choice = response["choices"][0]["message"]
-    return ChatMessage(role=choice.get("role", "assistant"), content=choice["content"])
+    return ChatCompletionResult(
+        message=ChatMessage(
+            role=choice.get("role", "assistant"), content=choice["content"]
+        ),
+        usage=token_usage_from_response(response),
+    )
 def value_at(value: Any, key: str, default: Any = None) -> Any:
@@ -424,6 +477,10 @@ def chunk_delta_tool_calls(chunk: Any) -> list[ToolCallDelta]:
     return tool_call_deltas
+def chunk_token_usage(chunk: Any) -> TokenUsage | None:
+    return token_usage_from_response(chunk)
 async def stream_chat_chunks(
     connection: ProviderConnection,
     messages: Sequence[ChatMessage | Mapping[str, Any]],
@@ -445,11 +502,20 @@ async def stream_chat_chunks(
     )
     request = build_litellm_request(connection, messages, stream=True, tools=tools)
     record_litellm_request_diagnostic(connection, request)
-    response = await completion(**request)
-    async for chunk in response:
-        raise_for_stream_failure(chunk)
-        logger.log(TRACE_LEVEL, "LLM stream chunk=%r", chunk)
-        yield chunk
+    for attempt_number in range(LLM_RETRY_LIMIT + 1):
+        yielded_chunk = False
+        try:
+            response = await completion(**request)
+            async for chunk in response:
+                raise_for_stream_failure(chunk)
+                logger.log(TRACE_LEVEL, "LLM stream chunk=%r", chunk)
+                yielded_chunk = True
+                yield chunk
+            return
+        except Exception:
+            if yielded_chunk or attempt_number >= LLM_RETRY_LIMIT:
+                raise
+            await wait_before_llm_retry(attempt_number + 1)
 async def stream_chat(

flowent 0.2.0__tar.gz → 0.2.2__tar.gz

flowent 0.2.0tar.gz → 0.2.2tar.gz