flowent 0.1.5 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +31 -5
- package/backend/src/flowent/agent.py +107 -37
- package/backend/src/flowent/compact.py +35 -14
- package/backend/src/flowent/llm.py +198 -12
- package/backend/src/flowent/main.py +260 -59
- package/backend/src/flowent/static/assets/index-CRSV2xu1.css +2 -0
- package/backend/src/flowent/static/assets/index-DUYj6rgD.js +82 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/storage.py +135 -3
- package/backend/src/flowent/usage.py +315 -0
- package/backend/uv.lock +971 -3
- package/dist/frontend/assets/index-CRSV2xu1.css +2 -0
- package/dist/frontend/assets/index-DUYj6rgD.js +82 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +24 -3
- package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
- package/backend/src/flowent/static/assets/index-Cl20cARb.css +0 -2
- package/backend/src/flowent/static/assets/index-dsDDsEym.js +0 -81
- package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/conftest.py +0 -21
- package/backend/tests/test_agent_tools.py +0 -988
- package/backend/tests/test_approval.py +0 -283
- package/backend/tests/test_channels.py +0 -360
- package/backend/tests/test_health.py +0 -12
- package/backend/tests/test_llm_providers.py +0 -387
- package/backend/tests/test_logging.py +0 -212
- package/backend/tests/test_mcp.py +0 -788
- package/backend/tests/test_patch.py +0 -112
- package/backend/tests/test_permissions.py +0 -588
- package/backend/tests/test_persistence.py +0 -249
- package/backend/tests/test_skills.py +0 -462
- package/backend/tests/test_startup_requirements.py +0 -144
- package/backend/tests/test_workspace_chat.py +0 -2122
- package/dist/frontend/assets/index-Cl20cARb.css +0 -2
- package/dist/frontend/assets/index-dsDDsEym.js +0 -81
package/backend/pyproject.toml
CHANGED
|
@@ -1,13 +1,39 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "flowent"
|
|
3
|
-
version = "0.1
|
|
4
|
-
description = "A workflow orchestration platform for multi-agent collaboration
|
|
3
|
+
version = "0.2.1"
|
|
4
|
+
description = "A workflow orchestration platform for multi-agent collaboration"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "ImFeH2", email = "i@feh2.im" }
|
|
8
8
|
]
|
|
9
|
-
requires-python = ">=3.
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
10
|
license = "Apache-2.0"
|
|
11
|
+
keywords = [
|
|
12
|
+
"agent",
|
|
13
|
+
"agents",
|
|
14
|
+
"ai",
|
|
15
|
+
"ai-agents",
|
|
16
|
+
"assistant",
|
|
17
|
+
"automation",
|
|
18
|
+
"code-generation",
|
|
19
|
+
"llm",
|
|
20
|
+
"mcp",
|
|
21
|
+
"orchestration",
|
|
22
|
+
"sandbox",
|
|
23
|
+
"web-application",
|
|
24
|
+
"workflow",
|
|
25
|
+
]
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Development Status :: 3 - Alpha",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"License :: OSI Approved :: Apache Software License",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
"Programming Language :: Python :: 3",
|
|
32
|
+
"Programming Language :: Python :: 3.11",
|
|
33
|
+
"Programming Language :: Python :: 3.12",
|
|
34
|
+
"Programming Language :: Python :: 3.13",
|
|
35
|
+
"Topic :: Software Development",
|
|
36
|
+
]
|
|
11
37
|
dependencies = [
|
|
12
38
|
"fastapi[standard]>=0.136.1",
|
|
13
39
|
"litellm>=1.84.0",
|
|
@@ -37,14 +63,14 @@ requires = ["uv_build>=0.8.14,<0.9.0"]
|
|
|
37
63
|
build-backend = "uv_build"
|
|
38
64
|
|
|
39
65
|
[tool.ruff]
|
|
40
|
-
target-version = "
|
|
66
|
+
target-version = "py311"
|
|
41
67
|
|
|
42
68
|
[tool.ruff.lint]
|
|
43
69
|
select = ["E", "W", "F", "I", "UP", "B", "SIM", "N", "RUF"]
|
|
44
70
|
ignore = ["E501"]
|
|
45
71
|
|
|
46
72
|
[tool.mypy]
|
|
47
|
-
python_version = "3.
|
|
73
|
+
python_version = "3.11"
|
|
48
74
|
|
|
49
75
|
[tool.pytest.ini_options]
|
|
50
76
|
testpaths = ["tests"]
|
|
@@ -15,6 +15,7 @@ from flowent.llm import (
|
|
|
15
15
|
chunk_delta_content,
|
|
16
16
|
chunk_delta_reasoning,
|
|
17
17
|
chunk_delta_tool_calls,
|
|
18
|
+
chunk_token_usage,
|
|
18
19
|
stream_chat_chunks,
|
|
19
20
|
)
|
|
20
21
|
from flowent.logging import TRACE_LEVEL
|
|
@@ -146,56 +147,108 @@ async def run_agent_stream(
|
|
|
146
147
|
while True:
|
|
147
148
|
round_number += 1
|
|
148
149
|
logger.debug("Agent round started id=%s round=%s", assistant_id, round_number)
|
|
150
|
+
logger.info(
|
|
151
|
+
"Agent model call started id=%s round=%s conversation_messages=%s",
|
|
152
|
+
assistant_id,
|
|
153
|
+
round_number,
|
|
154
|
+
len(conversation),
|
|
155
|
+
)
|
|
149
156
|
yield AgentStreamEvent(event="output_start", data={"index": round_number})
|
|
150
157
|
round_content = ""
|
|
151
158
|
pending: dict[int, PendingToolCall] = {}
|
|
159
|
+
chunk_count = 0
|
|
160
|
+
content_delta_count = 0
|
|
161
|
+
reasoning_delta_count = 0
|
|
162
|
+
tool_delta_count = 0
|
|
152
163
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
164
|
+
try:
|
|
165
|
+
async for chunk in stream_chat_chunks(
|
|
166
|
+
connection,
|
|
167
|
+
conversation,
|
|
168
|
+
completion=completion,
|
|
169
|
+
tools=[*tool_specs(), *list(extra_tool_specs or [])],
|
|
170
|
+
):
|
|
171
|
+
chunk_count += 1
|
|
172
|
+
usage = chunk_token_usage(chunk)
|
|
173
|
+
if usage is not None:
|
|
174
|
+
yield AgentStreamEvent(
|
|
175
|
+
event="usage",
|
|
176
|
+
data={"usage": usage.model_dump()},
|
|
177
|
+
)
|
|
178
|
+
reasoning = chunk_delta_reasoning(chunk)
|
|
179
|
+
if reasoning:
|
|
180
|
+
reasoning_delta_count += 1
|
|
181
|
+
final_thinking += reasoning
|
|
182
|
+
logger.log(
|
|
183
|
+
TRACE_LEVEL,
|
|
184
|
+
"Agent stream reasoning id=%s round=%s content=%r",
|
|
185
|
+
assistant_id,
|
|
186
|
+
round_number,
|
|
187
|
+
reasoning,
|
|
188
|
+
)
|
|
189
|
+
yield AgentStreamEvent(
|
|
190
|
+
event="thinking_delta", data={"content": reasoning}
|
|
191
|
+
)
|
|
192
|
+
content = chunk_delta_content(chunk)
|
|
193
|
+
if content:
|
|
194
|
+
content_delta_count += 1
|
|
195
|
+
round_content += content
|
|
196
|
+
final_content += content
|
|
197
|
+
logger.log(
|
|
198
|
+
TRACE_LEVEL,
|
|
199
|
+
"Agent stream delta id=%s round=%s content=%r",
|
|
200
|
+
assistant_id,
|
|
201
|
+
round_number,
|
|
202
|
+
content,
|
|
203
|
+
)
|
|
204
|
+
yield AgentStreamEvent(event="delta", data={"content": content})
|
|
205
|
+
for delta in chunk_delta_tool_calls(chunk):
|
|
206
|
+
tool_delta_count += 1
|
|
207
|
+
pending.setdefault(delta.index, PendingToolCall()).apply_delta(
|
|
208
|
+
delta
|
|
209
|
+
)
|
|
210
|
+
except Exception:
|
|
211
|
+
logger.exception(
|
|
212
|
+
"Agent model call failed id=%s round=%s chunk_count=%s content_deltas=%s reasoning_deltas=%s tool_deltas=%s conversation_messages=%s",
|
|
213
|
+
assistant_id,
|
|
214
|
+
round_number,
|
|
215
|
+
chunk_count,
|
|
216
|
+
content_delta_count,
|
|
217
|
+
reasoning_delta_count,
|
|
218
|
+
tool_delta_count,
|
|
219
|
+
len(conversation),
|
|
220
|
+
)
|
|
221
|
+
raise
|
|
184
222
|
|
|
185
223
|
tool_calls = [pending[index] for index in sorted(pending)]
|
|
224
|
+
logger.info(
|
|
225
|
+
"Agent model call completed id=%s round=%s chunk_count=%s content_deltas=%s reasoning_deltas=%s tool_deltas=%s tool_calls=%s content_length=%s decision=%s",
|
|
226
|
+
assistant_id,
|
|
227
|
+
round_number,
|
|
228
|
+
chunk_count,
|
|
229
|
+
content_delta_count,
|
|
230
|
+
reasoning_delta_count,
|
|
231
|
+
tool_delta_count,
|
|
232
|
+
len(tool_calls),
|
|
233
|
+
len(round_content),
|
|
234
|
+
"run_tools" if tool_calls else "final_response",
|
|
235
|
+
)
|
|
186
236
|
logger.log(
|
|
187
237
|
TRACE_LEVEL,
|
|
188
|
-
"Agent round tool calls id=%s tool_calls=%r",
|
|
238
|
+
"Agent round tool calls id=%s round=%s tool_calls=%r",
|
|
189
239
|
assistant_id,
|
|
240
|
+
round_number,
|
|
190
241
|
tool_calls,
|
|
191
242
|
)
|
|
192
243
|
if not tool_calls:
|
|
193
244
|
if not final_content and not final_thinking:
|
|
194
245
|
raise RuntimeError(EMPTY_MODEL_RESPONSE_ERROR)
|
|
195
246
|
logger.info(
|
|
196
|
-
"Agent response completed id=%s content_length=%s",
|
|
247
|
+
"Agent response completed id=%s rounds=%s content_length=%s thinking_length=%s decision=final_response",
|
|
197
248
|
assistant_id,
|
|
249
|
+
round_number,
|
|
198
250
|
len(final_content),
|
|
251
|
+
len(final_thinking),
|
|
199
252
|
)
|
|
200
253
|
logger.log(
|
|
201
254
|
TRACE_LEVEL,
|
|
@@ -301,11 +354,28 @@ async def run_agent_stream(
|
|
|
301
354
|
)
|
|
302
355
|
conversation.append(tool_result_message(tool_call_id, result_content))
|
|
303
356
|
|
|
357
|
+
logger.info(
|
|
358
|
+
"Agent continuing after tools id=%s completed_round=%s tool_results=%s conversation_messages=%s decision=continue",
|
|
359
|
+
assistant_id,
|
|
360
|
+
round_number,
|
|
361
|
+
len(tool_calls),
|
|
362
|
+
len(conversation),
|
|
363
|
+
)
|
|
364
|
+
|
|
304
365
|
if context_compactor is not None:
|
|
305
366
|
compaction = await context_compactor(conversation)
|
|
306
367
|
if compaction is not None:
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
368
|
+
logger.info(
|
|
369
|
+
"Agent context optimized id=%s round=%s conversation_messages_before=%s conversation_messages_after=%s",
|
|
370
|
+
assistant_id,
|
|
371
|
+
round_number,
|
|
372
|
+
len(conversation),
|
|
373
|
+
len(compaction.conversation),
|
|
311
374
|
)
|
|
375
|
+
conversation = [dict(message) for message in compaction.conversation]
|
|
376
|
+
compaction_message = dict(compaction.message)
|
|
377
|
+
usage_info = compaction_message.pop("usage_info", None)
|
|
378
|
+
event_data: dict[str, object] = {"message": compaction_message}
|
|
379
|
+
if isinstance(usage_info, dict):
|
|
380
|
+
event_data["usage_info"] = usage_info
|
|
381
|
+
yield AgentStreamEvent(event="context_optimized", data=event_data)
|
|
@@ -8,8 +8,9 @@ from flowent.llm import (
|
|
|
8
8
|
ChatMessage,
|
|
9
9
|
CompletionCallable,
|
|
10
10
|
ProviderConnection,
|
|
11
|
-
|
|
11
|
+
complete_chat_with_usage,
|
|
12
12
|
)
|
|
13
|
+
from flowent.usage import TokenUsage
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
15
16
|
from flowent.storage import StoredMessage
|
|
@@ -44,6 +45,7 @@ class CompactResult:
|
|
|
44
45
|
method: CompactMethod
|
|
45
46
|
replacement_history: list[ChatMessage]
|
|
46
47
|
summary: str
|
|
48
|
+
summary_usage: TokenUsage | None
|
|
47
49
|
token_after: int
|
|
48
50
|
token_before: int
|
|
49
51
|
|
|
@@ -66,12 +68,12 @@ class LocalSummaryCompactProvider:
|
|
|
66
68
|
*,
|
|
67
69
|
completion: CompletionCallable | None = None,
|
|
68
70
|
) -> CompactResult:
|
|
69
|
-
|
|
71
|
+
summary_result = await complete_chat_with_usage(
|
|
70
72
|
connection,
|
|
71
73
|
compact_prompt_messages(compact_input.model_history),
|
|
72
74
|
completion=completion,
|
|
73
75
|
)
|
|
74
|
-
summary =
|
|
76
|
+
summary = summary_result.message.content.strip()
|
|
75
77
|
replacement_history = build_replacement_history(
|
|
76
78
|
summary,
|
|
77
79
|
compact_input.messages,
|
|
@@ -81,6 +83,7 @@ class LocalSummaryCompactProvider:
|
|
|
81
83
|
method="local_summary",
|
|
82
84
|
replacement_history=replacement_history,
|
|
83
85
|
summary=summary,
|
|
86
|
+
summary_usage=summary_result.usage,
|
|
84
87
|
token_after=approximate_tokens_for_messages(replacement_history),
|
|
85
88
|
token_before=approximate_tokens_for_messages(compact_input.model_history),
|
|
86
89
|
)
|
|
@@ -127,15 +130,15 @@ def build_replacement_history(
|
|
|
127
130
|
token_budget: int = DEFAULT_RETAINED_MESSAGE_TOKEN_BUDGET,
|
|
128
131
|
) -> list[ChatMessage]:
|
|
129
132
|
return [
|
|
130
|
-
|
|
131
|
-
*retained_recent_chat_messages(
|
|
133
|
+
*retained_recent_user_messages(
|
|
132
134
|
recent_messages,
|
|
133
135
|
token_budget=token_budget,
|
|
134
136
|
),
|
|
137
|
+
ChatMessage(role="user", content=f"{COMPACT_SUMMARY_PREFIX}{summary}"),
|
|
135
138
|
]
|
|
136
139
|
|
|
137
140
|
|
|
138
|
-
def
|
|
141
|
+
def retained_recent_user_messages(
|
|
139
142
|
messages: Sequence[StoredMessage],
|
|
140
143
|
*,
|
|
141
144
|
token_budget: int = DEFAULT_RETAINED_MESSAGE_TOKEN_BUDGET,
|
|
@@ -143,17 +146,22 @@ def retained_recent_chat_messages(
|
|
|
143
146
|
retained: list[ChatMessage] = []
|
|
144
147
|
remaining_tokens = max(token_budget, 0)
|
|
145
148
|
for message in reversed(messages):
|
|
146
|
-
if message.author
|
|
149
|
+
if message.author != "user":
|
|
147
150
|
continue
|
|
148
151
|
token_count = approximate_token_count(message.content)
|
|
149
|
-
if
|
|
152
|
+
if token_count > remaining_tokens:
|
|
153
|
+
if remaining_tokens > 0:
|
|
154
|
+
retained.append(
|
|
155
|
+
ChatMessage(
|
|
156
|
+
role="user",
|
|
157
|
+
content=truncate_text_to_token_budget(
|
|
158
|
+
message.content,
|
|
159
|
+
remaining_tokens,
|
|
160
|
+
),
|
|
161
|
+
)
|
|
162
|
+
)
|
|
150
163
|
break
|
|
151
|
-
|
|
152
|
-
continue
|
|
153
|
-
role: Literal["user", "assistant"] = (
|
|
154
|
-
"user" if message.author == "user" else "assistant"
|
|
155
|
-
)
|
|
156
|
-
retained.append(ChatMessage(role=role, content=message.content))
|
|
164
|
+
retained.append(ChatMessage(role="user", content=message.content))
|
|
157
165
|
remaining_tokens -= token_count
|
|
158
166
|
if remaining_tokens <= 0:
|
|
159
167
|
break
|
|
@@ -161,6 +169,19 @@ def retained_recent_chat_messages(
|
|
|
161
169
|
return retained
|
|
162
170
|
|
|
163
171
|
|
|
172
|
+
def truncate_text_to_token_budget(content: str, token_budget: int) -> str:
|
|
173
|
+
if token_budget <= 0 or not content:
|
|
174
|
+
return ""
|
|
175
|
+
character_budget = max(token_budget * 4, 1)
|
|
176
|
+
if len(content) <= character_budget:
|
|
177
|
+
return content
|
|
178
|
+
left_budget = character_budget // 2
|
|
179
|
+
right_budget = character_budget - left_budget
|
|
180
|
+
removed_tokens = approximate_token_count(content[left_budget:-right_budget])
|
|
181
|
+
marker = f"…{removed_tokens} tokens truncated…"
|
|
182
|
+
return f"{content[:left_budget]}{marker}{content[-right_budget:]}"
|
|
183
|
+
|
|
184
|
+
|
|
164
185
|
def transcript_messages_after(
|
|
165
186
|
messages: Sequence[StoredMessage],
|
|
166
187
|
message_id: str | None,
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import logging
|
|
3
|
+
import re
|
|
2
4
|
from collections.abc import AsyncIterator, Awaitable, Mapping, Sequence
|
|
3
5
|
from enum import StrEnum
|
|
4
|
-
from typing import Any, Literal, Protocol
|
|
6
|
+
from typing import Any, Literal, Protocol, cast
|
|
7
|
+
from urllib.parse import urlsplit, urlunsplit
|
|
5
8
|
|
|
6
9
|
from pydantic import BaseModel, ConfigDict, Field
|
|
7
10
|
|
|
@@ -10,6 +13,7 @@ from flowent.logging import (
|
|
|
10
13
|
configure_litellm_logging,
|
|
11
14
|
write_llm_request_diagnostic,
|
|
12
15
|
)
|
|
16
|
+
from flowent.usage import TokenUsage, token_usage_from_response
|
|
13
17
|
|
|
14
18
|
|
|
15
19
|
class ProviderFormat(StrEnum):
|
|
@@ -55,6 +59,13 @@ class ToolCallDelta(BaseModel):
|
|
|
55
59
|
type: str = "function"
|
|
56
60
|
|
|
57
61
|
|
|
62
|
+
class ChatCompletionResult(BaseModel):
|
|
63
|
+
model_config = ConfigDict(extra="forbid")
|
|
64
|
+
|
|
65
|
+
message: ChatMessage
|
|
66
|
+
usage: TokenUsage | None = None
|
|
67
|
+
|
|
68
|
+
|
|
58
69
|
class CompletionCallable(Protocol):
|
|
59
70
|
def __call__(self, **kwargs: Any) -> Awaitable[Any]: ...
|
|
60
71
|
|
|
@@ -65,6 +76,31 @@ class ModelListCallable(Protocol):
|
|
|
65
76
|
|
|
66
77
|
logger = logging.getLogger("flowent.llm")
|
|
67
78
|
|
|
79
|
+
LLM_RETRY_LIMIT = 5
|
|
80
|
+
LLM_RETRY_BASE_DELAY_SECONDS = 0.5
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class LLMStreamError(RuntimeError):
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
async def wait_before_llm_retry(attempt_number: int) -> None:
|
|
88
|
+
await asyncio.sleep(LLM_RETRY_BASE_DELAY_SECONDS * attempt_number)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def request_litellm_completion(
|
|
92
|
+
completion: CompletionCallable,
|
|
93
|
+
request: Mapping[str, Any],
|
|
94
|
+
) -> Any:
|
|
95
|
+
for attempt_number in range(LLM_RETRY_LIMIT + 1):
|
|
96
|
+
try:
|
|
97
|
+
return await completion(**request)
|
|
98
|
+
except Exception:
|
|
99
|
+
if attempt_number >= LLM_RETRY_LIMIT:
|
|
100
|
+
raise
|
|
101
|
+
await wait_before_llm_retry(attempt_number + 1)
|
|
102
|
+
raise RuntimeError("LLM request failed")
|
|
103
|
+
|
|
68
104
|
|
|
69
105
|
MODEL_PREFIXES: dict[ProviderFormat, str] = {
|
|
70
106
|
ProviderFormat.OPENAI: "openai",
|
|
@@ -72,6 +108,16 @@ MODEL_PREFIXES: dict[ProviderFormat, str] = {
|
|
|
72
108
|
ProviderFormat.ANTHROPIC: "anthropic",
|
|
73
109
|
ProviderFormat.GEMINI: "gemini",
|
|
74
110
|
}
|
|
111
|
+
_litellm_stream_error_patch_installed = False
|
|
112
|
+
|
|
113
|
+
PROVIDER_API_VERSIONS: dict[ProviderFormat, str] = {
|
|
114
|
+
ProviderFormat.OPENAI: "v1",
|
|
115
|
+
ProviderFormat.OPENAI_RESPONSES: "v1",
|
|
116
|
+
ProviderFormat.ANTHROPIC: "v1",
|
|
117
|
+
ProviderFormat.GEMINI: "v1beta",
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
VERSION_PATH_SEGMENT = re.compile(r"^v\d+(?:[a-z]+)?$", re.IGNORECASE)
|
|
75
121
|
|
|
76
122
|
|
|
77
123
|
def provider_model_name(connection: ProviderConnection) -> str:
|
|
@@ -82,6 +128,40 @@ def provider_litellm_name(provider: ProviderFormat) -> str:
|
|
|
82
128
|
return MODEL_PREFIXES[provider]
|
|
83
129
|
|
|
84
130
|
|
|
131
|
+
def normalize_provider_base_url(
|
|
132
|
+
provider: ProviderFormat, base_url: str | None
|
|
133
|
+
) -> str | None:
|
|
134
|
+
if base_url is None:
|
|
135
|
+
return None
|
|
136
|
+
raw_base_url = base_url.strip()
|
|
137
|
+
if not raw_base_url:
|
|
138
|
+
return None
|
|
139
|
+
if raw_base_url.endswith("#"):
|
|
140
|
+
return raw_base_url[:-1].rstrip("/") or None
|
|
141
|
+
|
|
142
|
+
trimmed_base_url = raw_base_url.rstrip("/")
|
|
143
|
+
parsed_base_url = urlsplit(trimmed_base_url)
|
|
144
|
+
path_segments = [segment for segment in parsed_base_url.path.split("/") if segment]
|
|
145
|
+
if any(VERSION_PATH_SEGMENT.fullmatch(segment) for segment in path_segments):
|
|
146
|
+
return trimmed_base_url
|
|
147
|
+
|
|
148
|
+
version = PROVIDER_API_VERSIONS[provider]
|
|
149
|
+
if parsed_base_url.scheme and parsed_base_url.netloc:
|
|
150
|
+
path = parsed_base_url.path.rstrip("/")
|
|
151
|
+
normalized_path = f"{path}/{version}" if path else f"/{version}"
|
|
152
|
+
return urlunsplit(
|
|
153
|
+
(
|
|
154
|
+
parsed_base_url.scheme,
|
|
155
|
+
parsed_base_url.netloc,
|
|
156
|
+
normalized_path,
|
|
157
|
+
parsed_base_url.query,
|
|
158
|
+
parsed_base_url.fragment,
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return f"{trimmed_base_url}/{version}"
|
|
163
|
+
|
|
164
|
+
|
|
85
165
|
def normalize_provider_model_name(provider: ProviderFormat, model: str) -> str:
|
|
86
166
|
prefix = f"{provider_litellm_name(provider)}/"
|
|
87
167
|
if model.startswith(prefix):
|
|
@@ -89,6 +169,71 @@ def normalize_provider_model_name(provider: ProviderFormat, model: str) -> str:
|
|
|
89
169
|
return model
|
|
90
170
|
|
|
91
171
|
|
|
172
|
+
def stream_failure_message(chunk: Any) -> str:
|
|
173
|
+
if isinstance(chunk, BaseModel):
|
|
174
|
+
chunk = chunk.model_dump()
|
|
175
|
+
if not isinstance(chunk, Mapping):
|
|
176
|
+
return ""
|
|
177
|
+
|
|
178
|
+
event_type = getattr(chunk.get("type"), "value", chunk.get("type"))
|
|
179
|
+
event_type = str(event_type or "")
|
|
180
|
+
if event_type == "error":
|
|
181
|
+
error = chunk.get("error", {})
|
|
182
|
+
elif event_type == "response.failed":
|
|
183
|
+
response = chunk.get("response", {})
|
|
184
|
+
error = value_at(response, "error", {})
|
|
185
|
+
else:
|
|
186
|
+
return ""
|
|
187
|
+
|
|
188
|
+
message = value_at(error, "message", "")
|
|
189
|
+
if isinstance(message, str) and message:
|
|
190
|
+
return message
|
|
191
|
+
code = value_at(error, "code", "")
|
|
192
|
+
if isinstance(code, str) and code:
|
|
193
|
+
return code
|
|
194
|
+
return "Upstream request failed"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def raise_for_stream_failure(chunk: Any) -> None:
|
|
198
|
+
message = stream_failure_message(chunk)
|
|
199
|
+
if message:
|
|
200
|
+
raise LLMStreamError(message)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def configure_litellm_stream_error_handling() -> None:
|
|
204
|
+
global _litellm_stream_error_patch_installed
|
|
205
|
+
|
|
206
|
+
if _litellm_stream_error_patch_installed:
|
|
207
|
+
return
|
|
208
|
+
try:
|
|
209
|
+
from litellm.completion_extras.litellm_responses_transformation.transformation import (
|
|
210
|
+
OpenAiResponsesToChatCompletionStreamIterator,
|
|
211
|
+
)
|
|
212
|
+
except Exception:
|
|
213
|
+
return
|
|
214
|
+
|
|
215
|
+
if getattr(
|
|
216
|
+
OpenAiResponsesToChatCompletionStreamIterator,
|
|
217
|
+
"_flowent_stream_error_patch_installed",
|
|
218
|
+
False,
|
|
219
|
+
):
|
|
220
|
+
_litellm_stream_error_patch_installed = True
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
transformer = cast(Any, OpenAiResponsesToChatCompletionStreamIterator)
|
|
224
|
+
original = transformer.translate_responses_chunk_to_openai_stream
|
|
225
|
+
|
|
226
|
+
def translate_responses_chunk_to_openai_stream(parsed_chunk: Any) -> Any:
|
|
227
|
+
raise_for_stream_failure(parsed_chunk)
|
|
228
|
+
return original(parsed_chunk)
|
|
229
|
+
|
|
230
|
+
transformer.translate_responses_chunk_to_openai_stream = staticmethod(
|
|
231
|
+
translate_responses_chunk_to_openai_stream
|
|
232
|
+
)
|
|
233
|
+
transformer._flowent_stream_error_patch_installed = True
|
|
234
|
+
_litellm_stream_error_patch_installed = True
|
|
235
|
+
|
|
236
|
+
|
|
92
237
|
def unique_model_names(provider: ProviderFormat, models: Sequence[str]) -> list[str]:
|
|
93
238
|
seen: set[str] = set()
|
|
94
239
|
normalized_models: list[str] = []
|
|
@@ -115,7 +260,7 @@ def list_provider_models(
|
|
|
115
260
|
model_lister = get_valid_models
|
|
116
261
|
|
|
117
262
|
models = model_lister(
|
|
118
|
-
api_base=base_url,
|
|
263
|
+
api_base=normalize_provider_base_url(provider, base_url),
|
|
119
264
|
api_key=secret_reference,
|
|
120
265
|
check_provider_endpoint=True,
|
|
121
266
|
custom_llm_provider=provider_litellm_name(provider),
|
|
@@ -161,8 +306,12 @@ def build_litellm_request(
|
|
|
161
306
|
request["tools"] = list(tools)
|
|
162
307
|
if stream:
|
|
163
308
|
request["stream"] = True
|
|
164
|
-
|
|
165
|
-
|
|
309
|
+
request["stream_options"] = {"include_usage": True}
|
|
310
|
+
normalized_base_url = normalize_provider_base_url(
|
|
311
|
+
connection.provider, connection.base_url
|
|
312
|
+
)
|
|
313
|
+
if normalized_base_url:
|
|
314
|
+
request["api_base"] = normalized_base_url
|
|
166
315
|
if connection.reasoning_effort != ReasoningEffort.DEFAULT:
|
|
167
316
|
request["reasoning_effort"] = connection.reasoning_effort.value
|
|
168
317
|
logger.log(
|
|
@@ -170,7 +319,7 @@ def build_litellm_request(
|
|
|
170
319
|
"Built LiteLLM request provider=%s model=%s base_url=%s stream=%s tools=%s reasoning_effort=%s messages=%r",
|
|
171
320
|
connection.provider,
|
|
172
321
|
connection.model,
|
|
173
|
-
|
|
322
|
+
normalized_base_url or "",
|
|
174
323
|
stream,
|
|
175
324
|
bool(tools),
|
|
176
325
|
connection.reasoning_effort,
|
|
@@ -185,7 +334,7 @@ def record_litellm_request_diagnostic(
|
|
|
185
334
|
) -> None:
|
|
186
335
|
write_llm_request_diagnostic(
|
|
187
336
|
{
|
|
188
|
-
"base_url":
|
|
337
|
+
"base_url": request.get("api_base"),
|
|
189
338
|
"litellm_model": request["model"],
|
|
190
339
|
"messages": request["messages"],
|
|
191
340
|
"model": connection.model,
|
|
@@ -204,6 +353,23 @@ async def complete_chat(
|
|
|
204
353
|
completion: CompletionCallable | None = None,
|
|
205
354
|
tools: Sequence[Mapping[str, Any]] | None = None,
|
|
206
355
|
) -> ChatMessage:
|
|
356
|
+
return (
|
|
357
|
+
await complete_chat_with_usage(
|
|
358
|
+
connection,
|
|
359
|
+
messages,
|
|
360
|
+
completion=completion,
|
|
361
|
+
tools=tools,
|
|
362
|
+
)
|
|
363
|
+
).message
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
async def complete_chat_with_usage(
|
|
367
|
+
connection: ProviderConnection,
|
|
368
|
+
messages: Sequence[ChatMessage | Mapping[str, Any]],
|
|
369
|
+
*,
|
|
370
|
+
completion: CompletionCallable | None = None,
|
|
371
|
+
tools: Sequence[Mapping[str, Any]] | None = None,
|
|
372
|
+
) -> ChatCompletionResult:
|
|
207
373
|
if completion is None:
|
|
208
374
|
from litellm import acompletion
|
|
209
375
|
|
|
@@ -217,10 +383,15 @@ async def complete_chat(
|
|
|
217
383
|
)
|
|
218
384
|
request = build_litellm_request(connection, messages, tools=tools)
|
|
219
385
|
record_litellm_request_diagnostic(connection, request)
|
|
220
|
-
response = await completion
|
|
386
|
+
response = await request_litellm_completion(completion, request)
|
|
221
387
|
logger.log(TRACE_LEVEL, "LLM completion response=%r", response)
|
|
222
388
|
choice = response["choices"][0]["message"]
|
|
223
|
-
return
|
|
389
|
+
return ChatCompletionResult(
|
|
390
|
+
message=ChatMessage(
|
|
391
|
+
role=choice.get("role", "assistant"), content=choice["content"]
|
|
392
|
+
),
|
|
393
|
+
usage=token_usage_from_response(response),
|
|
394
|
+
)
|
|
224
395
|
|
|
225
396
|
|
|
226
397
|
def value_at(value: Any, key: str, default: Any = None) -> Any:
|
|
@@ -306,6 +477,10 @@ def chunk_delta_tool_calls(chunk: Any) -> list[ToolCallDelta]:
|
|
|
306
477
|
return tool_call_deltas
|
|
307
478
|
|
|
308
479
|
|
|
480
|
+
def chunk_token_usage(chunk: Any) -> TokenUsage | None:
|
|
481
|
+
return token_usage_from_response(chunk)
|
|
482
|
+
|
|
483
|
+
|
|
309
484
|
async def stream_chat_chunks(
|
|
310
485
|
connection: ProviderConnection,
|
|
311
486
|
messages: Sequence[ChatMessage | Mapping[str, Any]],
|
|
@@ -317,6 +492,7 @@ async def stream_chat_chunks(
|
|
|
317
492
|
from litellm import acompletion
|
|
318
493
|
|
|
319
494
|
configure_litellm_logging()
|
|
495
|
+
configure_litellm_stream_error_handling()
|
|
320
496
|
completion = acompletion
|
|
321
497
|
|
|
322
498
|
logger.debug(
|
|
@@ -326,10 +502,20 @@ async def stream_chat_chunks(
|
|
|
326
502
|
)
|
|
327
503
|
request = build_litellm_request(connection, messages, stream=True, tools=tools)
|
|
328
504
|
record_litellm_request_diagnostic(connection, request)
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
505
|
+
for attempt_number in range(LLM_RETRY_LIMIT + 1):
|
|
506
|
+
yielded_chunk = False
|
|
507
|
+
try:
|
|
508
|
+
response = await completion(**request)
|
|
509
|
+
async for chunk in response:
|
|
510
|
+
raise_for_stream_failure(chunk)
|
|
511
|
+
logger.log(TRACE_LEVEL, "LLM stream chunk=%r", chunk)
|
|
512
|
+
yielded_chunk = True
|
|
513
|
+
yield chunk
|
|
514
|
+
return
|
|
515
|
+
except Exception:
|
|
516
|
+
if yielded_chunk or attempt_number >= LLM_RETRY_LIMIT:
|
|
517
|
+
raise
|
|
518
|
+
await wait_before_llm_retry(attempt_number + 1)
|
|
333
519
|
|
|
334
520
|
|
|
335
521
|
async def stream_chat(
|