flowent 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowent-0.2.0 → flowent-0.2.2}/PKG-INFO +13 -3
- {flowent-0.2.0 → flowent-0.2.2}/pyproject.toml +31 -5
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/agent.py +13 -4
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/approval.py +6 -4
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/compact.py +35 -14
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/llm.py +73 -7
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/main.py +441 -85
- flowent-0.2.2/src/flowent/static/assets/index-Bz76A4EJ.js +82 -0
- flowent-0.2.2/src/flowent/static/assets/index-DufpDl8x.css +2 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/static/index.html +2 -2
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/storage.py +151 -7
- flowent-0.2.2/src/flowent/usage.py +315 -0
- flowent-0.2.0/src/flowent/static/assets/index-BlaCigkZ.js +0 -82
- flowent-0.2.0/src/flowent/static/assets/index-CRvbsH4K.css +0 -2
- {flowent-0.2.0 → flowent-0.2.2}/README.md +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/__init__.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/_version.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/channels.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/cli.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/context.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/logging.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/mcp.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/mcp_import.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/patch.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/paths.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/permissions.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/sandbox.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/skills.py +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/static/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/static/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/static/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/static/flowent.png +0 -0
- {flowent-0.2.0 → flowent-0.2.2}/src/flowent/tools.py +0 -0
|
@@ -1,15 +1,25 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flowent
|
|
3
|
-
Version: 0.2.
|
|
4
|
-
Summary: A workflow orchestration platform for multi-agent collaboration
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: A workflow orchestration platform for multi-agent collaboration
|
|
5
|
+
Keywords: agent,agents,ai,ai-agents,assistant,automation,code-generation,llm,mcp,orchestration,sandbox,web-application,workflow
|
|
5
6
|
Author: ImFeH2
|
|
6
7
|
Author-email: ImFeH2 <i@feh2.im>
|
|
7
8
|
License-Expression: Apache-2.0
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development
|
|
8
18
|
Requires-Dist: fastapi[standard]>=0.136.1
|
|
9
19
|
Requires-Dist: litellm>=1.84.0
|
|
10
20
|
Requires-Dist: mcp>=1.24.0
|
|
11
21
|
Requires-Dist: uvicorn>=0.46.0
|
|
12
|
-
Requires-Python: >=3.
|
|
22
|
+
Requires-Python: >=3.11
|
|
13
23
|
Project-URL: Homepage, https://github.com/ImFeH2/flowent
|
|
14
24
|
Project-URL: Issues, https://github.com/ImFeH2/flowent/issues
|
|
15
25
|
Project-URL: Repository, https://github.com/ImFeH2/flowent
|
|
@@ -1,13 +1,39 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "flowent"
|
|
3
|
-
version = "0.2.
|
|
4
|
-
description = "A workflow orchestration platform for multi-agent collaboration
|
|
3
|
+
version = "0.2.2"
|
|
4
|
+
description = "A workflow orchestration platform for multi-agent collaboration"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "ImFeH2", email = "i@feh2.im" }
|
|
8
8
|
]
|
|
9
|
-
requires-python = ">=3.
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
10
|
license = "Apache-2.0"
|
|
11
|
+
keywords = [
|
|
12
|
+
"agent",
|
|
13
|
+
"agents",
|
|
14
|
+
"ai",
|
|
15
|
+
"ai-agents",
|
|
16
|
+
"assistant",
|
|
17
|
+
"automation",
|
|
18
|
+
"code-generation",
|
|
19
|
+
"llm",
|
|
20
|
+
"mcp",
|
|
21
|
+
"orchestration",
|
|
22
|
+
"sandbox",
|
|
23
|
+
"web-application",
|
|
24
|
+
"workflow",
|
|
25
|
+
]
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Development Status :: 3 - Alpha",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"License :: OSI Approved :: Apache Software License",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
"Programming Language :: Python :: 3",
|
|
32
|
+
"Programming Language :: Python :: 3.11",
|
|
33
|
+
"Programming Language :: Python :: 3.12",
|
|
34
|
+
"Programming Language :: Python :: 3.13",
|
|
35
|
+
"Topic :: Software Development",
|
|
36
|
+
]
|
|
11
37
|
dependencies = [
|
|
12
38
|
"fastapi[standard]>=0.136.1",
|
|
13
39
|
"litellm>=1.84.0",
|
|
@@ -37,14 +63,14 @@ requires = ["uv_build>=0.8.14,<0.9.0"]
|
|
|
37
63
|
build-backend = "uv_build"
|
|
38
64
|
|
|
39
65
|
[tool.ruff]
|
|
40
|
-
target-version = "
|
|
66
|
+
target-version = "py311"
|
|
41
67
|
|
|
42
68
|
[tool.ruff.lint]
|
|
43
69
|
select = ["E", "W", "F", "I", "UP", "B", "SIM", "N", "RUF"]
|
|
44
70
|
ignore = ["E501"]
|
|
45
71
|
|
|
46
72
|
[tool.mypy]
|
|
47
|
-
python_version = "3.
|
|
73
|
+
python_version = "3.11"
|
|
48
74
|
|
|
49
75
|
[tool.pytest.ini_options]
|
|
50
76
|
testpaths = ["tests"]
|
|
@@ -15,6 +15,7 @@ from flowent.llm import (
|
|
|
15
15
|
chunk_delta_content,
|
|
16
16
|
chunk_delta_reasoning,
|
|
17
17
|
chunk_delta_tool_calls,
|
|
18
|
+
chunk_token_usage,
|
|
18
19
|
stream_chat_chunks,
|
|
19
20
|
)
|
|
20
21
|
from flowent.logging import TRACE_LEVEL
|
|
@@ -168,6 +169,12 @@ async def run_agent_stream(
|
|
|
168
169
|
tools=[*tool_specs(), *list(extra_tool_specs or [])],
|
|
169
170
|
):
|
|
170
171
|
chunk_count += 1
|
|
172
|
+
usage = chunk_token_usage(chunk)
|
|
173
|
+
if usage is not None:
|
|
174
|
+
yield AgentStreamEvent(
|
|
175
|
+
event="usage",
|
|
176
|
+
data={"usage": usage.model_dump()},
|
|
177
|
+
)
|
|
171
178
|
reasoning = chunk_delta_reasoning(chunk)
|
|
172
179
|
if reasoning:
|
|
173
180
|
reasoning_delta_count += 1
|
|
@@ -366,7 +373,9 @@ async def run_agent_stream(
|
|
|
366
373
|
len(compaction.conversation),
|
|
367
374
|
)
|
|
368
375
|
conversation = [dict(message) for message in compaction.conversation]
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
)
|
|
376
|
+
compaction_message = dict(compaction.message)
|
|
377
|
+
usage_info = compaction_message.pop("usage_info", None)
|
|
378
|
+
event_data: dict[str, object] = {"message": compaction_message}
|
|
379
|
+
if isinstance(usage_info, dict):
|
|
380
|
+
event_data["usage_info"] = usage_info
|
|
381
|
+
yield AgentStreamEvent(event="context_optimized", data=event_data)
|
|
@@ -12,7 +12,7 @@ from flowent.llm import (
|
|
|
12
12
|
ChatMessage,
|
|
13
13
|
CompletionCallable,
|
|
14
14
|
ProviderConnection,
|
|
15
|
-
|
|
15
|
+
stream_chat,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger("flowent.approval")
|
|
@@ -128,7 +128,8 @@ async def review_approval_request(
|
|
|
128
128
|
completion: CompletionCallable | None = None,
|
|
129
129
|
) -> ApprovalReviewDecision:
|
|
130
130
|
try:
|
|
131
|
-
|
|
131
|
+
content = ""
|
|
132
|
+
async for delta in stream_chat(
|
|
132
133
|
connection,
|
|
133
134
|
[
|
|
134
135
|
ChatMessage(role="system", content=APPROVAL_REVIEWER_PROMPT),
|
|
@@ -138,8 +139,9 @@ async def review_approval_request(
|
|
|
138
139
|
),
|
|
139
140
|
],
|
|
140
141
|
completion=completion,
|
|
141
|
-
)
|
|
142
|
-
|
|
142
|
+
):
|
|
143
|
+
content += delta
|
|
144
|
+
return parse_review_decision(content)
|
|
143
145
|
except Exception as error:
|
|
144
146
|
logger.warning("Approval reviewer denied request after failure: %s", error)
|
|
145
147
|
return ApprovalReviewDecision(
|
|
@@ -8,8 +8,9 @@ from flowent.llm import (
|
|
|
8
8
|
ChatMessage,
|
|
9
9
|
CompletionCallable,
|
|
10
10
|
ProviderConnection,
|
|
11
|
-
|
|
11
|
+
complete_chat_with_usage,
|
|
12
12
|
)
|
|
13
|
+
from flowent.usage import TokenUsage
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
15
16
|
from flowent.storage import StoredMessage
|
|
@@ -44,6 +45,7 @@ class CompactResult:
|
|
|
44
45
|
method: CompactMethod
|
|
45
46
|
replacement_history: list[ChatMessage]
|
|
46
47
|
summary: str
|
|
48
|
+
summary_usage: TokenUsage | None
|
|
47
49
|
token_after: int
|
|
48
50
|
token_before: int
|
|
49
51
|
|
|
@@ -66,12 +68,12 @@ class LocalSummaryCompactProvider:
|
|
|
66
68
|
*,
|
|
67
69
|
completion: CompletionCallable | None = None,
|
|
68
70
|
) -> CompactResult:
|
|
69
|
-
|
|
71
|
+
summary_result = await complete_chat_with_usage(
|
|
70
72
|
connection,
|
|
71
73
|
compact_prompt_messages(compact_input.model_history),
|
|
72
74
|
completion=completion,
|
|
73
75
|
)
|
|
74
|
-
summary =
|
|
76
|
+
summary = summary_result.message.content.strip()
|
|
75
77
|
replacement_history = build_replacement_history(
|
|
76
78
|
summary,
|
|
77
79
|
compact_input.messages,
|
|
@@ -81,6 +83,7 @@ class LocalSummaryCompactProvider:
|
|
|
81
83
|
method="local_summary",
|
|
82
84
|
replacement_history=replacement_history,
|
|
83
85
|
summary=summary,
|
|
86
|
+
summary_usage=summary_result.usage,
|
|
84
87
|
token_after=approximate_tokens_for_messages(replacement_history),
|
|
85
88
|
token_before=approximate_tokens_for_messages(compact_input.model_history),
|
|
86
89
|
)
|
|
@@ -127,15 +130,15 @@ def build_replacement_history(
|
|
|
127
130
|
token_budget: int = DEFAULT_RETAINED_MESSAGE_TOKEN_BUDGET,
|
|
128
131
|
) -> list[ChatMessage]:
|
|
129
132
|
return [
|
|
130
|
-
|
|
131
|
-
*retained_recent_chat_messages(
|
|
133
|
+
*retained_recent_user_messages(
|
|
132
134
|
recent_messages,
|
|
133
135
|
token_budget=token_budget,
|
|
134
136
|
),
|
|
137
|
+
ChatMessage(role="user", content=f"{COMPACT_SUMMARY_PREFIX}{summary}"),
|
|
135
138
|
]
|
|
136
139
|
|
|
137
140
|
|
|
138
|
-
def
|
|
141
|
+
def retained_recent_user_messages(
|
|
139
142
|
messages: Sequence[StoredMessage],
|
|
140
143
|
*,
|
|
141
144
|
token_budget: int = DEFAULT_RETAINED_MESSAGE_TOKEN_BUDGET,
|
|
@@ -143,17 +146,22 @@ def retained_recent_chat_messages(
|
|
|
143
146
|
retained: list[ChatMessage] = []
|
|
144
147
|
remaining_tokens = max(token_budget, 0)
|
|
145
148
|
for message in reversed(messages):
|
|
146
|
-
if message.author
|
|
149
|
+
if message.author != "user":
|
|
147
150
|
continue
|
|
148
151
|
token_count = approximate_token_count(message.content)
|
|
149
|
-
if
|
|
152
|
+
if token_count > remaining_tokens:
|
|
153
|
+
if remaining_tokens > 0:
|
|
154
|
+
retained.append(
|
|
155
|
+
ChatMessage(
|
|
156
|
+
role="user",
|
|
157
|
+
content=truncate_text_to_token_budget(
|
|
158
|
+
message.content,
|
|
159
|
+
remaining_tokens,
|
|
160
|
+
),
|
|
161
|
+
)
|
|
162
|
+
)
|
|
150
163
|
break
|
|
151
|
-
|
|
152
|
-
continue
|
|
153
|
-
role: Literal["user", "assistant"] = (
|
|
154
|
-
"user" if message.author == "user" else "assistant"
|
|
155
|
-
)
|
|
156
|
-
retained.append(ChatMessage(role=role, content=message.content))
|
|
164
|
+
retained.append(ChatMessage(role="user", content=message.content))
|
|
157
165
|
remaining_tokens -= token_count
|
|
158
166
|
if remaining_tokens <= 0:
|
|
159
167
|
break
|
|
@@ -161,6 +169,19 @@ def retained_recent_chat_messages(
|
|
|
161
169
|
return retained
|
|
162
170
|
|
|
163
171
|
|
|
172
|
+
def truncate_text_to_token_budget(content: str, token_budget: int) -> str:
|
|
173
|
+
if token_budget <= 0 or not content:
|
|
174
|
+
return ""
|
|
175
|
+
character_budget = max(token_budget * 4, 1)
|
|
176
|
+
if len(content) <= character_budget:
|
|
177
|
+
return content
|
|
178
|
+
left_budget = character_budget // 2
|
|
179
|
+
right_budget = character_budget - left_budget
|
|
180
|
+
removed_tokens = approximate_token_count(content[left_budget:-right_budget])
|
|
181
|
+
marker = f"…{removed_tokens} tokens truncated…"
|
|
182
|
+
return f"{content[:left_budget]}{marker}{content[-right_budget:]}"
|
|
183
|
+
|
|
184
|
+
|
|
164
185
|
def transcript_messages_after(
|
|
165
186
|
messages: Sequence[StoredMessage],
|
|
166
187
|
message_id: str | None,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import logging
|
|
2
3
|
import re
|
|
3
4
|
from collections.abc import AsyncIterator, Awaitable, Mapping, Sequence
|
|
@@ -12,6 +13,7 @@ from flowent.logging import (
|
|
|
12
13
|
configure_litellm_logging,
|
|
13
14
|
write_llm_request_diagnostic,
|
|
14
15
|
)
|
|
16
|
+
from flowent.usage import TokenUsage, token_usage_from_response
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class ProviderFormat(StrEnum):
|
|
@@ -57,6 +59,13 @@ class ToolCallDelta(BaseModel):
|
|
|
57
59
|
type: str = "function"
|
|
58
60
|
|
|
59
61
|
|
|
62
|
+
class ChatCompletionResult(BaseModel):
|
|
63
|
+
model_config = ConfigDict(extra="forbid")
|
|
64
|
+
|
|
65
|
+
message: ChatMessage
|
|
66
|
+
usage: TokenUsage | None = None
|
|
67
|
+
|
|
68
|
+
|
|
60
69
|
class CompletionCallable(Protocol):
|
|
61
70
|
def __call__(self, **kwargs: Any) -> Awaitable[Any]: ...
|
|
62
71
|
|
|
@@ -67,11 +76,32 @@ class ModelListCallable(Protocol):
|
|
|
67
76
|
|
|
68
77
|
logger = logging.getLogger("flowent.llm")
|
|
69
78
|
|
|
79
|
+
LLM_RETRY_LIMIT = 5
|
|
80
|
+
LLM_RETRY_BASE_DELAY_SECONDS = 0.5
|
|
81
|
+
|
|
70
82
|
|
|
71
83
|
class LLMStreamError(RuntimeError):
|
|
72
84
|
pass
|
|
73
85
|
|
|
74
86
|
|
|
87
|
+
async def wait_before_llm_retry(attempt_number: int) -> None:
|
|
88
|
+
await asyncio.sleep(LLM_RETRY_BASE_DELAY_SECONDS * attempt_number)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def request_litellm_completion(
|
|
92
|
+
completion: CompletionCallable,
|
|
93
|
+
request: Mapping[str, Any],
|
|
94
|
+
) -> Any:
|
|
95
|
+
for attempt_number in range(LLM_RETRY_LIMIT + 1):
|
|
96
|
+
try:
|
|
97
|
+
return await completion(**request)
|
|
98
|
+
except Exception:
|
|
99
|
+
if attempt_number >= LLM_RETRY_LIMIT:
|
|
100
|
+
raise
|
|
101
|
+
await wait_before_llm_retry(attempt_number + 1)
|
|
102
|
+
raise RuntimeError("LLM request failed")
|
|
103
|
+
|
|
104
|
+
|
|
75
105
|
MODEL_PREFIXES: dict[ProviderFormat, str] = {
|
|
76
106
|
ProviderFormat.OPENAI: "openai",
|
|
77
107
|
ProviderFormat.OPENAI_RESPONSES: "openai",
|
|
@@ -276,6 +306,7 @@ def build_litellm_request(
|
|
|
276
306
|
request["tools"] = list(tools)
|
|
277
307
|
if stream:
|
|
278
308
|
request["stream"] = True
|
|
309
|
+
request["stream_options"] = {"include_usage": True}
|
|
279
310
|
normalized_base_url = normalize_provider_base_url(
|
|
280
311
|
connection.provider, connection.base_url
|
|
281
312
|
)
|
|
@@ -322,6 +353,23 @@ async def complete_chat(
|
|
|
322
353
|
completion: CompletionCallable | None = None,
|
|
323
354
|
tools: Sequence[Mapping[str, Any]] | None = None,
|
|
324
355
|
) -> ChatMessage:
|
|
356
|
+
return (
|
|
357
|
+
await complete_chat_with_usage(
|
|
358
|
+
connection,
|
|
359
|
+
messages,
|
|
360
|
+
completion=completion,
|
|
361
|
+
tools=tools,
|
|
362
|
+
)
|
|
363
|
+
).message
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
async def complete_chat_with_usage(
|
|
367
|
+
connection: ProviderConnection,
|
|
368
|
+
messages: Sequence[ChatMessage | Mapping[str, Any]],
|
|
369
|
+
*,
|
|
370
|
+
completion: CompletionCallable | None = None,
|
|
371
|
+
tools: Sequence[Mapping[str, Any]] | None = None,
|
|
372
|
+
) -> ChatCompletionResult:
|
|
325
373
|
if completion is None:
|
|
326
374
|
from litellm import acompletion
|
|
327
375
|
|
|
@@ -335,10 +383,15 @@ async def complete_chat(
|
|
|
335
383
|
)
|
|
336
384
|
request = build_litellm_request(connection, messages, tools=tools)
|
|
337
385
|
record_litellm_request_diagnostic(connection, request)
|
|
338
|
-
response = await completion
|
|
386
|
+
response = await request_litellm_completion(completion, request)
|
|
339
387
|
logger.log(TRACE_LEVEL, "LLM completion response=%r", response)
|
|
340
388
|
choice = response["choices"][0]["message"]
|
|
341
|
-
return
|
|
389
|
+
return ChatCompletionResult(
|
|
390
|
+
message=ChatMessage(
|
|
391
|
+
role=choice.get("role", "assistant"), content=choice["content"]
|
|
392
|
+
),
|
|
393
|
+
usage=token_usage_from_response(response),
|
|
394
|
+
)
|
|
342
395
|
|
|
343
396
|
|
|
344
397
|
def value_at(value: Any, key: str, default: Any = None) -> Any:
|
|
@@ -424,6 +477,10 @@ def chunk_delta_tool_calls(chunk: Any) -> list[ToolCallDelta]:
|
|
|
424
477
|
return tool_call_deltas
|
|
425
478
|
|
|
426
479
|
|
|
480
|
+
def chunk_token_usage(chunk: Any) -> TokenUsage | None:
|
|
481
|
+
return token_usage_from_response(chunk)
|
|
482
|
+
|
|
483
|
+
|
|
427
484
|
async def stream_chat_chunks(
|
|
428
485
|
connection: ProviderConnection,
|
|
429
486
|
messages: Sequence[ChatMessage | Mapping[str, Any]],
|
|
@@ -445,11 +502,20 @@ async def stream_chat_chunks(
|
|
|
445
502
|
)
|
|
446
503
|
request = build_litellm_request(connection, messages, stream=True, tools=tools)
|
|
447
504
|
record_litellm_request_diagnostic(connection, request)
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
505
|
+
for attempt_number in range(LLM_RETRY_LIMIT + 1):
|
|
506
|
+
yielded_chunk = False
|
|
507
|
+
try:
|
|
508
|
+
response = await completion(**request)
|
|
509
|
+
async for chunk in response:
|
|
510
|
+
raise_for_stream_failure(chunk)
|
|
511
|
+
logger.log(TRACE_LEVEL, "LLM stream chunk=%r", chunk)
|
|
512
|
+
yielded_chunk = True
|
|
513
|
+
yield chunk
|
|
514
|
+
return
|
|
515
|
+
except Exception:
|
|
516
|
+
if yielded_chunk or attempt_number >= LLM_RETRY_LIMIT:
|
|
517
|
+
raise
|
|
518
|
+
await wait_before_llm_retry(attempt_number + 1)
|
|
453
519
|
|
|
454
520
|
|
|
455
521
|
async def stream_chat(
|