flowent 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +1 -1
- package/backend/src/flowent/agent.py +22 -15
- package/backend/src/flowent/api_models.py +13 -8
- package/backend/src/flowent/llm.py +50 -6
- package/backend/src/flowent/mcp.py +4 -3
- package/backend/src/flowent/permissions.py +51 -38
- package/backend/src/flowent/routes/providers.py +33 -10
- package/backend/src/flowent/routes/system.py +5 -6
- package/backend/src/flowent/routes/workspace.py +33 -23
- package/backend/src/flowent/state/models.py +4 -4
- package/backend/src/flowent/state/schema.py +121 -0
- package/backend/src/flowent/state/store.py +9 -3
- package/backend/src/flowent/static/assets/index-BX18a4Jz.js +100 -0
- package/backend/src/flowent/static/assets/index-EC37agAH.css +2 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/tools.py +84 -33
- package/backend/src/flowent/usage.py +66 -0
- package/backend/src/flowent/workspace/context.py +140 -47
- package/backend/src/flowent/workspace/events.py +5 -7
- package/backend/src/flowent/workspace/output.py +129 -4
- package/backend/src/flowent/workspace/runtime.py +393 -185
- package/backend/uv.lock +1 -1
- package/dist/frontend/assets/index-BX18a4Jz.js +100 -0
- package/dist/frontend/assets/index-EC37agAH.css +2 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +8 -10
- package/backend/src/flowent/static/assets/index-CvWZZMtK.css +0 -2
- package/backend/src/flowent/static/assets/index-ma2v8oW7.js +0 -90
- package/dist/frontend/assets/index-CvWZZMtK.css +0 -2
- package/dist/frontend/assets/index-ma2v8oW7.js +0 -90
|
@@ -27,26 +27,31 @@ from flowent.storage import (
|
|
|
27
27
|
StoredState,
|
|
28
28
|
StoredToolItem,
|
|
29
29
|
)
|
|
30
|
-
from flowent.tools import ToolContext
|
|
30
|
+
from flowent.tools import ToolContext, text_tool_result, tool_specs
|
|
31
31
|
from flowent.usage import (
|
|
32
32
|
TokenUsage,
|
|
33
33
|
TokenUsageInfo,
|
|
34
34
|
append_token_usage,
|
|
35
|
+
full_context_usage,
|
|
36
|
+
is_context_window_error,
|
|
35
37
|
recompute_context_usage,
|
|
36
38
|
)
|
|
37
39
|
from flowent.workspace.context import (
|
|
38
40
|
COMPACTED_CONTEXT_MARKER,
|
|
39
41
|
OPTIMIZED_CONTEXT_MARKER,
|
|
42
|
+
compact_prompt_chat_messages,
|
|
40
43
|
context_window_for_settings,
|
|
44
|
+
model_request_messages_data,
|
|
45
|
+
model_visible_assistant_output_messages,
|
|
41
46
|
should_auto_compact,
|
|
42
47
|
update_context_usage_for_response,
|
|
43
48
|
usage_event_data,
|
|
44
49
|
workspace_chat_messages,
|
|
45
50
|
)
|
|
46
51
|
from flowent.workspace.events import (
|
|
47
|
-
|
|
52
|
+
WorkspaceResponse,
|
|
48
53
|
append_or_replace_message,
|
|
49
|
-
|
|
54
|
+
response_snapshot_data_at,
|
|
50
55
|
stream_event,
|
|
51
56
|
stream_message_data,
|
|
52
57
|
)
|
|
@@ -54,14 +59,17 @@ from flowent.workspace.output import (
|
|
|
54
59
|
EMPTY_MODEL_RESPONSE_DETAIL,
|
|
55
60
|
AssistantOutputBuilder,
|
|
56
61
|
approval_transcript,
|
|
62
|
+
assistant_retry_output_start_index,
|
|
57
63
|
run_error_event_data,
|
|
58
64
|
run_error_output_item,
|
|
65
|
+
trim_assistant_message_at_error,
|
|
59
66
|
)
|
|
60
67
|
|
|
61
68
|
logger = logging.getLogger("flowent.workspace.runtime")
|
|
62
69
|
|
|
63
70
|
AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
|
|
64
71
|
WORKSPACE_PROGRESS_FLUSH_INTERVAL_SECONDS = 0.5
|
|
72
|
+
USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE = "Context could not be compacted."
|
|
65
73
|
|
|
66
74
|
|
|
67
75
|
@dataclass
|
|
@@ -84,8 +92,7 @@ class WorkspaceRuntime:
|
|
|
84
92
|
self.cwd = cwd
|
|
85
93
|
self.mcp_manager = mcp_manager
|
|
86
94
|
self.store = store
|
|
87
|
-
self.
|
|
88
|
-
self.active_run_id: str | None = None
|
|
95
|
+
self.active_response: WorkspaceResponse | None = None
|
|
89
96
|
self.generation = 0
|
|
90
97
|
self.active_compact_task: WorkspaceCompactTask | None = None
|
|
91
98
|
|
|
@@ -102,14 +109,13 @@ class WorkspaceRuntime:
|
|
|
102
109
|
compacted_context,
|
|
103
110
|
checkpoint,
|
|
104
111
|
)
|
|
105
|
-
return
|
|
106
|
-
|
|
107
|
-
for message in [
|
|
112
|
+
return model_request_messages_data(
|
|
113
|
+
[
|
|
108
114
|
*runtime_context_messages(self.cwd, state.settings.agent_prompt),
|
|
109
115
|
*explicit_skill_messages(self.cwd, self.store, content),
|
|
110
116
|
*chat_messages,
|
|
111
117
|
]
|
|
112
|
-
|
|
118
|
+
)
|
|
113
119
|
|
|
114
120
|
async def save_context_checkpoint(
|
|
115
121
|
self,
|
|
@@ -117,16 +123,17 @@ class WorkspaceRuntime:
|
|
|
117
123
|
connection: ProviderConnection,
|
|
118
124
|
context_window_limit: int,
|
|
119
125
|
messages: list[StoredMessage],
|
|
120
|
-
model_history:
|
|
126
|
+
model_history: Sequence[ChatMessage | Mapping[str, object]],
|
|
121
127
|
marker_content: str,
|
|
122
128
|
source_message_id: str | None = None,
|
|
123
129
|
trigger: Literal["manual", "auto"],
|
|
124
130
|
) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo]:
|
|
131
|
+
compact_model_history = compact_prompt_chat_messages(model_history)
|
|
125
132
|
compact_result = await self.compact_provider.compact(
|
|
126
133
|
connection,
|
|
127
134
|
CompactInput(
|
|
128
135
|
messages=messages,
|
|
129
|
-
model_history=
|
|
136
|
+
model_history=compact_model_history,
|
|
130
137
|
retained_message_token_budget=AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET,
|
|
131
138
|
trigger=trigger,
|
|
132
139
|
),
|
|
@@ -149,6 +156,7 @@ class WorkspaceRuntime:
|
|
|
149
156
|
author="system",
|
|
150
157
|
content=marker_content,
|
|
151
158
|
id=str(uuid4()),
|
|
159
|
+
summary=compact_result.summary,
|
|
152
160
|
usage_info=usage_info,
|
|
153
161
|
)
|
|
154
162
|
self.store.save_compaction_checkpoint(
|
|
@@ -183,13 +191,16 @@ class WorkspaceRuntime:
|
|
|
183
191
|
*,
|
|
184
192
|
connection: ProviderConnection,
|
|
185
193
|
context_window_limit: int,
|
|
194
|
+
budget_messages: Sequence[ChatMessage | Mapping[str, object]] | None = None,
|
|
186
195
|
messages: list[StoredMessage],
|
|
187
|
-
model_history:
|
|
196
|
+
model_history: Sequence[ChatMessage | Mapping[str, object]],
|
|
188
197
|
source_message_id: str | None = None,
|
|
198
|
+
tools: Sequence[Mapping[str, object]] = (),
|
|
189
199
|
) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
|
|
190
200
|
if not should_auto_compact(
|
|
191
|
-
model_history,
|
|
201
|
+
budget_messages or model_history,
|
|
192
202
|
context_window=context_window_limit,
|
|
203
|
+
tools=tools,
|
|
193
204
|
):
|
|
194
205
|
return None
|
|
195
206
|
logger.info("Workspace auto compact requested")
|
|
@@ -218,7 +229,11 @@ class WorkspaceRuntime:
|
|
|
218
229
|
)
|
|
219
230
|
next_messages = [*state.messages, user_message]
|
|
220
231
|
self.store.save_messages(next_messages)
|
|
221
|
-
|
|
232
|
+
model_tool_specs = [
|
|
233
|
+
*tool_specs(),
|
|
234
|
+
*list(self.mcp_manager.tool_specs()),
|
|
235
|
+
]
|
|
236
|
+
model_history: list[ChatMessage | Mapping[str, object]] = [
|
|
222
237
|
*runtime_context_messages(self.cwd, state.settings.agent_prompt),
|
|
223
238
|
*workspace_chat_messages(
|
|
224
239
|
state.messages,
|
|
@@ -229,9 +244,13 @@ class WorkspaceRuntime:
|
|
|
229
244
|
auto_compaction = await self.auto_compact_messages(
|
|
230
245
|
connection=connection,
|
|
231
246
|
context_window_limit=context_window_limit,
|
|
247
|
+
budget_messages=self.request_messages_for_content(
|
|
248
|
+
state, next_messages, content
|
|
249
|
+
),
|
|
232
250
|
messages=state.messages,
|
|
233
251
|
model_history=model_history,
|
|
234
252
|
source_message_id=None,
|
|
253
|
+
tools=model_tool_specs,
|
|
235
254
|
)
|
|
236
255
|
if auto_compaction is not None:
|
|
237
256
|
marker, _, _ = auto_compaction
|
|
@@ -336,6 +355,7 @@ class WorkspaceRuntime:
|
|
|
336
355
|
tool.model_dump(exclude_none=True)
|
|
337
356
|
for tool in assistant_output.tools.values()
|
|
338
357
|
],
|
|
358
|
+
request_tools=model_tool_specs,
|
|
339
359
|
model_context_window=context_window_limit,
|
|
340
360
|
)
|
|
341
361
|
self.store.save_usage_info(final_usage_info)
|
|
@@ -372,14 +392,14 @@ class WorkspaceRuntime:
|
|
|
372
392
|
exc_info=(type(result), result, result.__traceback__),
|
|
373
393
|
)
|
|
374
394
|
|
|
375
|
-
async def
|
|
395
|
+
async def stop_response_for_shutdown(self) -> None:
|
|
376
396
|
tasks: list[asyncio.Task[None]] = []
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
tasks.append(
|
|
382
|
-
await self.gather_shutdown_tasks("Workspace
|
|
397
|
+
response = self.active_response
|
|
398
|
+
if response is not None and response.task is not None:
|
|
399
|
+
if not response.task.done():
|
|
400
|
+
response.task.cancel()
|
|
401
|
+
tasks.append(response.task)
|
|
402
|
+
await self.gather_shutdown_tasks("Workspace response", tasks)
|
|
383
403
|
|
|
384
404
|
async def stop_compact_for_shutdown(self) -> None:
|
|
385
405
|
if self.active_compact_task is None:
|
|
@@ -393,64 +413,72 @@ class WorkspaceRuntime:
|
|
|
393
413
|
self.store.save_is_compacting(False)
|
|
394
414
|
|
|
395
415
|
async def stop_for_shutdown(self) -> None:
|
|
396
|
-
await self.
|
|
416
|
+
await self.stop_response_for_shutdown()
|
|
397
417
|
await self.stop_compact_for_shutdown()
|
|
398
418
|
|
|
399
|
-
def
|
|
400
|
-
|
|
419
|
+
def current_response(self) -> WorkspaceResponse | None:
|
|
420
|
+
response = self.active_response
|
|
421
|
+
if response is None or response.is_done:
|
|
401
422
|
return None
|
|
402
|
-
|
|
403
|
-
if run is None or run.is_done:
|
|
404
|
-
return None
|
|
405
|
-
return run
|
|
423
|
+
return response
|
|
406
424
|
|
|
407
|
-
def
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
425
|
+
def has_active_response(self) -> bool:
|
|
426
|
+
response = self.active_response
|
|
427
|
+
return (
|
|
428
|
+
response is not None
|
|
429
|
+
and not response.is_done
|
|
430
|
+
and response.task is not None
|
|
431
|
+
and not response.task.done()
|
|
411
432
|
)
|
|
412
433
|
|
|
413
434
|
def clear(self) -> list[StoredMessage]:
|
|
414
435
|
self.generation += 1
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
436
|
+
response = self.active_response
|
|
437
|
+
if response is not None:
|
|
438
|
+
response.is_done = True
|
|
439
|
+
if response.task is not None and not response.task.done():
|
|
440
|
+
response.discard_on_cancel = True
|
|
441
|
+
response.task.cancel()
|
|
421
442
|
return self.store.save_messages([])
|
|
422
443
|
|
|
423
|
-
async def
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
444
|
+
async def notify_cleared_response(self) -> None:
|
|
445
|
+
response = self.active_response
|
|
446
|
+
if response is None:
|
|
447
|
+
return
|
|
448
|
+
async with response.condition:
|
|
449
|
+
response.condition.notify_all()
|
|
427
450
|
|
|
428
451
|
async def append_event(
|
|
429
|
-
self,
|
|
452
|
+
self, response: WorkspaceResponse, event: str, data: dict[str, object]
|
|
430
453
|
) -> None:
|
|
431
|
-
async with
|
|
432
|
-
|
|
433
|
-
|
|
454
|
+
async with response.condition:
|
|
455
|
+
response.events.append((response.latest_event_index + 1, event, data))
|
|
456
|
+
response.condition.notify_all()
|
|
434
457
|
|
|
435
|
-
async def append_snapshot(
|
|
458
|
+
async def append_snapshot(
|
|
459
|
+
self, response: WorkspaceResponse, message: StoredMessage
|
|
460
|
+
) -> None:
|
|
436
461
|
if message.author != "assistant":
|
|
437
462
|
return
|
|
438
|
-
|
|
463
|
+
response.latest_snapshot = message
|
|
439
464
|
await self.append_event(
|
|
440
|
-
|
|
465
|
+
response,
|
|
441
466
|
"snapshot",
|
|
442
|
-
{"message": stream_message_data(message,
|
|
467
|
+
{"message": stream_message_data(message, response.active_output)},
|
|
443
468
|
)
|
|
444
469
|
|
|
445
|
-
def
|
|
470
|
+
def start_response(
|
|
446
471
|
self, content: str, *, message_id: str | None = None
|
|
447
|
-
) ->
|
|
448
|
-
if self.
|
|
449
|
-
active_run = self.active_run()
|
|
472
|
+
) -> WorkspaceResponse:
|
|
473
|
+
if self.has_active_response():
|
|
450
474
|
raise HTTPException(
|
|
451
475
|
status_code=409,
|
|
452
476
|
detail="Response in progress",
|
|
453
|
-
|
|
477
|
+
)
|
|
478
|
+
if self.store.read_is_compacting():
|
|
479
|
+
raise HTTPException(
|
|
480
|
+
status_code=409,
|
|
481
|
+
detail="Context refining in progress. Please wait a moment.",
|
|
454
482
|
)
|
|
455
483
|
state = self.store.read_state()
|
|
456
484
|
user_message_id = message_id or str(uuid4())
|
|
@@ -463,7 +491,7 @@ class WorkspaceRuntime:
|
|
|
463
491
|
)
|
|
464
492
|
next_messages = [*state.messages, user_message]
|
|
465
493
|
self.store.save_messages(next_messages)
|
|
466
|
-
return self.
|
|
494
|
+
return self._start_response_from_messages(
|
|
467
495
|
content=content,
|
|
468
496
|
next_messages=next_messages,
|
|
469
497
|
state=state,
|
|
@@ -476,13 +504,16 @@ class WorkspaceRuntime:
|
|
|
476
504
|
*,
|
|
477
505
|
action: Literal["resend", "save"],
|
|
478
506
|
content: str,
|
|
479
|
-
) -> tuple[list[StoredMessage],
|
|
480
|
-
if self.
|
|
481
|
-
active_run = self.active_run()
|
|
507
|
+
) -> tuple[list[StoredMessage], WorkspaceResponse | None]:
|
|
508
|
+
if self.has_active_response():
|
|
482
509
|
raise HTTPException(
|
|
483
510
|
status_code=409,
|
|
484
511
|
detail="Response in progress",
|
|
485
|
-
|
|
512
|
+
)
|
|
513
|
+
if self.store.read_is_compacting():
|
|
514
|
+
raise HTTPException(
|
|
515
|
+
status_code=409,
|
|
516
|
+
detail="Context refining in progress. Please wait a moment.",
|
|
486
517
|
)
|
|
487
518
|
state = self.store.read_state()
|
|
488
519
|
message_index = next(
|
|
@@ -513,50 +544,140 @@ class WorkspaceRuntime:
|
|
|
513
544
|
previous_messages = state.messages[:message_index]
|
|
514
545
|
next_messages = [*previous_messages, updated_message]
|
|
515
546
|
self.store.save_messages(next_messages)
|
|
516
|
-
|
|
547
|
+
response = self._start_response_from_messages(
|
|
517
548
|
content=content,
|
|
518
549
|
next_messages=next_messages,
|
|
519
550
|
state=state.model_copy(update={"messages": previous_messages}),
|
|
520
551
|
user_message=updated_message,
|
|
521
552
|
)
|
|
522
|
-
return next_messages,
|
|
553
|
+
return next_messages, response
|
|
554
|
+
|
|
555
|
+
def retry_error(
|
|
556
|
+
self,
|
|
557
|
+
message_id: str,
|
|
558
|
+
*,
|
|
559
|
+
error_id: str,
|
|
560
|
+
) -> tuple[list[StoredMessage], WorkspaceResponse]:
|
|
561
|
+
if self.has_active_response():
|
|
562
|
+
raise HTTPException(
|
|
563
|
+
status_code=409,
|
|
564
|
+
detail="Response in progress",
|
|
565
|
+
)
|
|
566
|
+
if self.store.read_is_compacting():
|
|
567
|
+
raise HTTPException(
|
|
568
|
+
status_code=409,
|
|
569
|
+
detail="Context refining in progress. Please wait a moment.",
|
|
570
|
+
)
|
|
571
|
+
state = self.store.read_state()
|
|
572
|
+
message_index = next(
|
|
573
|
+
(
|
|
574
|
+
index
|
|
575
|
+
for index, message in enumerate(state.messages)
|
|
576
|
+
if message.id == message_id
|
|
577
|
+
),
|
|
578
|
+
-1,
|
|
579
|
+
)
|
|
580
|
+
if message_index < 0:
|
|
581
|
+
raise HTTPException(status_code=404, detail="Message not found.")
|
|
582
|
+
message = state.messages[message_index]
|
|
583
|
+
if message.author != "assistant":
|
|
584
|
+
raise HTTPException(
|
|
585
|
+
status_code=400, detail="Only assistant errors can be retried."
|
|
586
|
+
)
|
|
587
|
+
previous_user_message = next(
|
|
588
|
+
(
|
|
589
|
+
current_message
|
|
590
|
+
for current_message in reversed(state.messages[:message_index])
|
|
591
|
+
if current_message.author == "user"
|
|
592
|
+
),
|
|
593
|
+
None,
|
|
594
|
+
)
|
|
595
|
+
if previous_user_message is None:
|
|
596
|
+
raise HTTPException(status_code=400, detail="Message history is invalid.")
|
|
597
|
+
trimmed_message = trim_assistant_message_at_error(
|
|
598
|
+
message,
|
|
599
|
+
error_id,
|
|
600
|
+
status="running",
|
|
601
|
+
)
|
|
602
|
+
if trimmed_message is None:
|
|
603
|
+
raise HTTPException(status_code=404, detail="Error block not found.")
|
|
523
604
|
|
|
524
|
-
|
|
605
|
+
previous_messages = state.messages[:message_index]
|
|
606
|
+
next_messages = [*previous_messages, trimmed_message]
|
|
607
|
+
self.store.save_messages(next_messages)
|
|
608
|
+
state_before_assistant = state.model_copy(
|
|
609
|
+
update={"messages": previous_messages}
|
|
610
|
+
)
|
|
611
|
+
base_request_messages = self.request_messages_for_content(
|
|
612
|
+
state_before_assistant,
|
|
613
|
+
previous_messages,
|
|
614
|
+
previous_user_message.content,
|
|
615
|
+
)
|
|
616
|
+
request_messages = [
|
|
617
|
+
*base_request_messages,
|
|
618
|
+
*model_visible_assistant_output_messages(trimmed_message),
|
|
619
|
+
]
|
|
620
|
+
response = self._start_response_from_messages(
|
|
621
|
+
content=previous_user_message.content,
|
|
622
|
+
initial_assistant_message=trimmed_message,
|
|
623
|
+
next_messages=next_messages,
|
|
624
|
+
output_start_index=assistant_retry_output_start_index(trimmed_message),
|
|
625
|
+
request_messages=request_messages,
|
|
626
|
+
state=state_before_assistant,
|
|
627
|
+
usage_request_messages=base_request_messages,
|
|
628
|
+
user_message=previous_user_message,
|
|
629
|
+
)
|
|
630
|
+
return next_messages, response
|
|
631
|
+
|
|
632
|
+
def _start_response_from_messages(
|
|
525
633
|
self,
|
|
526
634
|
*,
|
|
527
635
|
content: str,
|
|
636
|
+
initial_assistant_message: StoredMessage | None = None,
|
|
528
637
|
next_messages: list[StoredMessage],
|
|
638
|
+
output_start_index: int = 1,
|
|
639
|
+
request_messages: list[dict[str, object]] | None = None,
|
|
529
640
|
state: StoredState,
|
|
641
|
+
usage_request_messages: list[dict[str, object]] | None = None,
|
|
530
642
|
user_message: StoredMessage,
|
|
531
|
-
) ->
|
|
643
|
+
) -> WorkspaceResponse:
|
|
532
644
|
connection = selected_connection(state)
|
|
533
645
|
context_window_limit = context_window_for_settings(state.settings)
|
|
534
|
-
|
|
646
|
+
response = WorkspaceResponse(
|
|
535
647
|
condition=asyncio.Condition(),
|
|
536
648
|
generation=self.generation,
|
|
537
649
|
)
|
|
538
|
-
self.
|
|
539
|
-
self.active_run_id = run.id
|
|
650
|
+
self.active_response = response
|
|
540
651
|
|
|
541
|
-
async def
|
|
652
|
+
async def response_task() -> None:
|
|
542
653
|
nonlocal next_messages
|
|
543
|
-
assistant_message =
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
654
|
+
assistant_message = (
|
|
655
|
+
initial_assistant_message
|
|
656
|
+
if initial_assistant_message is not None
|
|
657
|
+
else StoredMessage(
|
|
658
|
+
author="assistant",
|
|
659
|
+
content="",
|
|
660
|
+
id=str(uuid4()),
|
|
661
|
+
status="running",
|
|
662
|
+
)
|
|
548
663
|
)
|
|
549
|
-
assistant_output =
|
|
664
|
+
assistant_output = (
|
|
665
|
+
AssistantOutputBuilder.from_message(assistant_message)
|
|
666
|
+
if initial_assistant_message is not None
|
|
667
|
+
else AssistantOutputBuilder(assistant_message.id)
|
|
668
|
+
)
|
|
669
|
+
initial_assistant_content = assistant_output.content
|
|
670
|
+
initial_assistant_thinking = assistant_output.thinking
|
|
550
671
|
last_progress_flush_at = 0.0
|
|
551
672
|
|
|
552
673
|
def is_current_generation() -> bool:
|
|
553
|
-
return
|
|
674
|
+
return response.generation == self.generation
|
|
554
675
|
|
|
555
676
|
def update_assistant_message(
|
|
556
677
|
status: str = "running", *, persist: bool
|
|
557
678
|
) -> StoredMessage | None:
|
|
558
679
|
nonlocal next_messages, assistant_message
|
|
559
|
-
if not is_current_generation() or
|
|
680
|
+
if not is_current_generation() or response.discard_on_cancel:
|
|
560
681
|
return None
|
|
561
682
|
assistant_message = StoredMessage(
|
|
562
683
|
author="assistant",
|
|
@@ -603,43 +724,90 @@ class WorkspaceRuntime:
|
|
|
603
724
|
turn_usage_info: TokenUsageInfo | None = None
|
|
604
725
|
current_output_index = 0
|
|
605
726
|
latest_usage_output_index: int | None = None
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
pre_turn_request_messages = self.request_messages_for_content(
|
|
612
|
-
state,
|
|
613
|
-
state.messages,
|
|
614
|
-
content,
|
|
615
|
-
)
|
|
616
|
-
auto_compaction = await self.auto_compact_messages(
|
|
617
|
-
connection=connection,
|
|
618
|
-
context_window_limit=context_window_limit,
|
|
619
|
-
messages=state.messages,
|
|
620
|
-
model_history=[
|
|
621
|
-
ChatMessage.model_validate(message)
|
|
622
|
-
for message in pre_turn_request_messages
|
|
623
|
-
],
|
|
624
|
-
source_message_id=None,
|
|
625
|
-
)
|
|
626
|
-
if auto_compaction is not None:
|
|
627
|
-
marker, _, usage_info = auto_compaction
|
|
628
|
-
next_messages = [*state.messages, marker, user_message]
|
|
629
|
-
self.store.save_messages(next_messages)
|
|
630
|
-
await self.append_event(
|
|
631
|
-
run,
|
|
632
|
-
"context_optimized",
|
|
633
|
-
{
|
|
634
|
-
"message": marker.model_dump(),
|
|
635
|
-
**usage_event_data(usage_info),
|
|
636
|
-
},
|
|
637
|
-
)
|
|
727
|
+
model_tool_specs = [
|
|
728
|
+
*tool_specs(),
|
|
729
|
+
*list(self.mcp_manager.tool_specs()),
|
|
730
|
+
]
|
|
731
|
+
if request_messages is None:
|
|
638
732
|
current_request_messages = self.request_messages_for_content(
|
|
639
733
|
state,
|
|
640
734
|
next_messages,
|
|
641
735
|
content,
|
|
642
736
|
)
|
|
737
|
+
pre_turn_request_messages = self.request_messages_for_content(
|
|
738
|
+
state,
|
|
739
|
+
state.messages,
|
|
740
|
+
content,
|
|
741
|
+
)
|
|
742
|
+
auto_compaction = await self.auto_compact_messages(
|
|
743
|
+
connection=connection,
|
|
744
|
+
context_window_limit=context_window_limit,
|
|
745
|
+
budget_messages=current_request_messages,
|
|
746
|
+
messages=state.messages,
|
|
747
|
+
model_history=pre_turn_request_messages,
|
|
748
|
+
source_message_id=None,
|
|
749
|
+
tools=model_tool_specs,
|
|
750
|
+
)
|
|
751
|
+
if auto_compaction is not None:
|
|
752
|
+
marker, _, usage_info = auto_compaction
|
|
753
|
+
next_messages = [*state.messages, marker, user_message]
|
|
754
|
+
self.store.save_messages(next_messages)
|
|
755
|
+
await self.append_event(
|
|
756
|
+
response,
|
|
757
|
+
"context_optimized",
|
|
758
|
+
{
|
|
759
|
+
"message": marker.model_dump(),
|
|
760
|
+
**usage_event_data(usage_info),
|
|
761
|
+
},
|
|
762
|
+
)
|
|
763
|
+
current_request_messages = self.request_messages_for_content(
|
|
764
|
+
state,
|
|
765
|
+
next_messages,
|
|
766
|
+
content,
|
|
767
|
+
)
|
|
768
|
+
else:
|
|
769
|
+
current_request_messages = request_messages
|
|
770
|
+
auto_compaction = await self.auto_compact_messages(
|
|
771
|
+
connection=connection,
|
|
772
|
+
context_window_limit=context_window_limit,
|
|
773
|
+
messages=next_messages,
|
|
774
|
+
model_history=compact_prompt_chat_messages(
|
|
775
|
+
current_request_messages
|
|
776
|
+
),
|
|
777
|
+
source_message_id=assistant_message.id,
|
|
778
|
+
tools=model_tool_specs,
|
|
779
|
+
)
|
|
780
|
+
if auto_compaction is not None:
|
|
781
|
+
marker, replacement_history, usage_info = auto_compaction
|
|
782
|
+
assistant_message = assistant_message.model_copy(
|
|
783
|
+
update={"usage_info": usage_info}
|
|
784
|
+
)
|
|
785
|
+
next_messages = append_or_replace_message(
|
|
786
|
+
[*next_messages, marker], assistant_message
|
|
787
|
+
)
|
|
788
|
+
self.store.save_messages(next_messages)
|
|
789
|
+
await self.append_event(
|
|
790
|
+
response,
|
|
791
|
+
"context_optimized",
|
|
792
|
+
{
|
|
793
|
+
"message": marker.model_dump(),
|
|
794
|
+
**usage_event_data(usage_info),
|
|
795
|
+
},
|
|
796
|
+
)
|
|
797
|
+
current_request_messages = model_request_messages_data(
|
|
798
|
+
[
|
|
799
|
+
*runtime_context_messages(
|
|
800
|
+
self.cwd, state.settings.agent_prompt
|
|
801
|
+
),
|
|
802
|
+
*explicit_skill_messages(self.cwd, self.store, content),
|
|
803
|
+
*replacement_history,
|
|
804
|
+
]
|
|
805
|
+
)
|
|
806
|
+
context_usage_messages = (
|
|
807
|
+
usage_request_messages
|
|
808
|
+
if usage_request_messages is not None
|
|
809
|
+
else current_request_messages
|
|
810
|
+
)
|
|
643
811
|
|
|
644
812
|
async def review_tool_approval(request: ApprovalReviewRequest):
|
|
645
813
|
return await review_approval_request(
|
|
@@ -672,7 +840,7 @@ class WorkspaceRuntime:
|
|
|
672
840
|
conversation: Sequence[Mapping[str, object]],
|
|
673
841
|
) -> AgentContextUpdate | None:
|
|
674
842
|
nonlocal next_messages
|
|
675
|
-
if not is_current_generation() or
|
|
843
|
+
if not is_current_generation() or response.discard_on_cancel:
|
|
676
844
|
return None
|
|
677
845
|
assistant_snapshot = StoredMessage(
|
|
678
846
|
author="assistant",
|
|
@@ -684,35 +852,13 @@ class WorkspaceRuntime:
|
|
|
684
852
|
tools=list(assistant_output.tools.values()),
|
|
685
853
|
usage_info=self.store.read_usage_info(),
|
|
686
854
|
)
|
|
687
|
-
model_history: list[ChatMessage] = []
|
|
688
|
-
for message in conversation:
|
|
689
|
-
role_value = message.get("role")
|
|
690
|
-
content = str(message.get("content") or "")
|
|
691
|
-
if role_value == "system":
|
|
692
|
-
model_history.append(
|
|
693
|
-
ChatMessage(role="system", content=content)
|
|
694
|
-
)
|
|
695
|
-
if role_value == "user":
|
|
696
|
-
model_history.append(
|
|
697
|
-
ChatMessage(role="user", content=content)
|
|
698
|
-
)
|
|
699
|
-
if role_value == "assistant":
|
|
700
|
-
model_history.append(
|
|
701
|
-
ChatMessage(role="assistant", content=content)
|
|
702
|
-
)
|
|
703
|
-
if role_value == "tool":
|
|
704
|
-
model_history.append(
|
|
705
|
-
ChatMessage(
|
|
706
|
-
role="user",
|
|
707
|
-
content=f"Tool result: {content}",
|
|
708
|
-
)
|
|
709
|
-
)
|
|
710
855
|
auto_result = await self.auto_compact_messages(
|
|
711
856
|
connection=connection,
|
|
712
857
|
context_window_limit=context_window_limit,
|
|
713
858
|
messages=next_messages,
|
|
714
|
-
model_history=
|
|
859
|
+
model_history=compact_prompt_chat_messages(conversation),
|
|
715
860
|
source_message_id=assistant_snapshot.id,
|
|
861
|
+
tools=model_tool_specs,
|
|
716
862
|
)
|
|
717
863
|
if auto_result is None:
|
|
718
864
|
return None
|
|
@@ -747,14 +893,18 @@ class WorkspaceRuntime:
|
|
|
747
893
|
messages=current_request_messages,
|
|
748
894
|
tool_runner=tool_runner,
|
|
749
895
|
):
|
|
750
|
-
if not is_current_generation() or
|
|
896
|
+
if not is_current_generation() or response.discard_on_cancel:
|
|
751
897
|
raise asyncio.CancelledError
|
|
752
898
|
run_event_data = event.data
|
|
753
899
|
should_append_run_event = event.event != "usage"
|
|
754
900
|
snapshot_after_event: StoredMessage | None = None
|
|
755
901
|
if event.event == "start":
|
|
756
902
|
event_id = event.data.get("id")
|
|
757
|
-
if
|
|
903
|
+
if initial_assistant_message is not None:
|
|
904
|
+
assistant_output.set_assistant_id(assistant_message.id)
|
|
905
|
+
run_event_data = {"id": assistant_message.id}
|
|
906
|
+
snapshot_after_event = persist_assistant()
|
|
907
|
+
elif isinstance(event_id, str):
|
|
758
908
|
assistant_message = assistant_message.model_copy(
|
|
759
909
|
update={"id": event_id}
|
|
760
910
|
)
|
|
@@ -763,16 +913,24 @@ class WorkspaceRuntime:
|
|
|
763
913
|
if event.event == "output_start":
|
|
764
914
|
index = event.data.get("index")
|
|
765
915
|
if isinstance(index, int):
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
916
|
+
output_index = index + output_start_index - 1
|
|
917
|
+
current_output_index = output_index
|
|
918
|
+
run_event_data = {**event.data, "index": output_index}
|
|
919
|
+
response.active_output = None
|
|
920
|
+
assistant_output.start_group(output_index)
|
|
769
921
|
snapshot_after_event = persist_assistant()
|
|
770
922
|
if event.event == "output_done":
|
|
771
|
-
|
|
923
|
+
index = event.data.get("index")
|
|
924
|
+
if isinstance(index, int):
|
|
925
|
+
run_event_data = {
|
|
926
|
+
**event.data,
|
|
927
|
+
"index": index + output_start_index - 1,
|
|
928
|
+
}
|
|
929
|
+
response.active_output = None
|
|
772
930
|
if event.event == "tool_start":
|
|
773
931
|
tool = event.data.get("tool")
|
|
774
932
|
if isinstance(tool, dict) and isinstance(tool.get("id"), str):
|
|
775
|
-
|
|
933
|
+
response.active_output = None
|
|
776
934
|
current_tool_id = tool["id"]
|
|
777
935
|
assistant_output.start_tool(
|
|
778
936
|
StoredToolItem.model_validate(tool)
|
|
@@ -790,13 +948,13 @@ class WorkspaceRuntime:
|
|
|
790
948
|
assistant_output.update_tool(tool_id, event.data)
|
|
791
949
|
snapshot_after_event = persist_assistant()
|
|
792
950
|
if event.event == "delta":
|
|
793
|
-
|
|
951
|
+
response.active_output = "text"
|
|
794
952
|
assistant_output.append_text(
|
|
795
953
|
str(event.data.get("content") or "")
|
|
796
954
|
)
|
|
797
955
|
snapshot_after_event = persist_assistant_progress()
|
|
798
956
|
if event.event == "thinking_delta":
|
|
799
|
-
|
|
957
|
+
response.active_output = "thinking"
|
|
800
958
|
assistant_output.append_thinking(
|
|
801
959
|
str(event.data.get("content") or "")
|
|
802
960
|
)
|
|
@@ -824,8 +982,12 @@ class WorkspaceRuntime:
|
|
|
824
982
|
if event.event == "done":
|
|
825
983
|
message = event.data.get("message")
|
|
826
984
|
if isinstance(message, dict):
|
|
827
|
-
|
|
828
|
-
assistant_output.apply_done_message(
|
|
985
|
+
response.active_output = None
|
|
986
|
+
assistant_output.apply_done_message(
|
|
987
|
+
message,
|
|
988
|
+
content_prefix=initial_assistant_content,
|
|
989
|
+
thinking_prefix=initial_assistant_thinking,
|
|
990
|
+
)
|
|
829
991
|
response_usage_info = self.store.read_usage_info()
|
|
830
992
|
final_usage_info = turn_usage_info
|
|
831
993
|
if (
|
|
@@ -834,12 +996,13 @@ class WorkspaceRuntime:
|
|
|
834
996
|
):
|
|
835
997
|
final_usage_info = update_context_usage_for_response(
|
|
836
998
|
final_usage_info or response_usage_info,
|
|
837
|
-
messages=
|
|
999
|
+
messages=context_usage_messages,
|
|
838
1000
|
output_content=assistant_output.content,
|
|
839
1001
|
output_tools=[
|
|
840
1002
|
tool.model_dump(exclude_none=True)
|
|
841
1003
|
for tool in assistant_output.tools.values()
|
|
842
1004
|
],
|
|
1005
|
+
request_tools=model_tool_specs,
|
|
843
1006
|
model_context_window=context_window_limit,
|
|
844
1007
|
)
|
|
845
1008
|
self.store.save_usage_info(final_usage_info)
|
|
@@ -849,27 +1012,35 @@ class WorkspaceRuntime:
|
|
|
849
1012
|
"message": stream_message_data(snapshot_after_event)
|
|
850
1013
|
}
|
|
851
1014
|
if event.event == "done" and snapshot_after_event is not None:
|
|
852
|
-
await self.append_snapshot(
|
|
853
|
-
await self.append_event(
|
|
1015
|
+
await self.append_snapshot(response, snapshot_after_event)
|
|
1016
|
+
await self.append_event(response, event.event, run_event_data)
|
|
854
1017
|
else:
|
|
855
1018
|
if should_append_run_event:
|
|
856
|
-
await self.append_event(
|
|
1019
|
+
await self.append_event(
|
|
1020
|
+
response, event.event, run_event_data
|
|
1021
|
+
)
|
|
857
1022
|
if snapshot_after_event is not None:
|
|
858
|
-
await self.append_snapshot(
|
|
1023
|
+
await self.append_snapshot(response, snapshot_after_event)
|
|
859
1024
|
except asyncio.CancelledError:
|
|
860
|
-
logger.info("Workspace
|
|
861
|
-
if not
|
|
1025
|
+
logger.info("Workspace response stopped")
|
|
1026
|
+
if not response.discard_on_cancel:
|
|
862
1027
|
interrupted_snapshot = persist_assistant("interrupted")
|
|
863
1028
|
if interrupted_snapshot is not None:
|
|
864
|
-
await self.append_snapshot(
|
|
1029
|
+
await self.append_snapshot(response, interrupted_snapshot)
|
|
865
1030
|
await self.append_event(
|
|
866
|
-
|
|
1031
|
+
response,
|
|
867
1032
|
"error",
|
|
868
1033
|
{"message": "Response stopped."},
|
|
869
1034
|
)
|
|
870
1035
|
raise
|
|
871
1036
|
except Exception as error:
|
|
872
1037
|
logger.exception("Workspace response failed")
|
|
1038
|
+
if is_context_window_error(error):
|
|
1039
|
+
usage_info = full_context_usage(
|
|
1040
|
+
self.store.read_usage_info(),
|
|
1041
|
+
model_context_window=context_window_limit,
|
|
1042
|
+
)
|
|
1043
|
+
self.store.save_usage_info(usage_info)
|
|
873
1044
|
if (
|
|
874
1045
|
current_tool_id is not None
|
|
875
1046
|
and current_tool_id in assistant_output.tools
|
|
@@ -877,7 +1048,10 @@ class WorkspaceRuntime:
|
|
|
877
1048
|
):
|
|
878
1049
|
assistant_output.update_tool(
|
|
879
1050
|
current_tool_id,
|
|
880
|
-
{
|
|
1051
|
+
{
|
|
1052
|
+
"result": text_tool_result(str(error) or "Tool failed."),
|
|
1053
|
+
"status": "failed",
|
|
1054
|
+
},
|
|
881
1055
|
)
|
|
882
1056
|
error_item = assistant_output.append_error(
|
|
883
1057
|
run_error_output_item(
|
|
@@ -887,23 +1061,30 @@ class WorkspaceRuntime:
|
|
|
887
1061
|
)
|
|
888
1062
|
failed_snapshot = persist_assistant("failed")
|
|
889
1063
|
if failed_snapshot is not None:
|
|
890
|
-
await self.append_snapshot(
|
|
891
|
-
await self.append_event(
|
|
1064
|
+
await self.append_snapshot(response, failed_snapshot)
|
|
1065
|
+
await self.append_event(
|
|
1066
|
+
response, "error", run_error_event_data(error_item)
|
|
1067
|
+
)
|
|
892
1068
|
finally:
|
|
893
|
-
|
|
894
|
-
async with
|
|
895
|
-
|
|
896
|
-
if self.
|
|
897
|
-
self.
|
|
1069
|
+
response.is_done = True
|
|
1070
|
+
async with response.condition:
|
|
1071
|
+
response.condition.notify_all()
|
|
1072
|
+
if self.active_response is response:
|
|
1073
|
+
self.active_response = None
|
|
898
1074
|
|
|
899
|
-
|
|
900
|
-
return
|
|
1075
|
+
response.task = asyncio.create_task(response_task())
|
|
1076
|
+
return response
|
|
901
1077
|
|
|
902
|
-
async def
|
|
903
|
-
self,
|
|
1078
|
+
async def response_stream(
|
|
1079
|
+
self,
|
|
1080
|
+
response: WorkspaceResponse,
|
|
1081
|
+
after: int = 0,
|
|
1082
|
+
include_snapshots: bool = True,
|
|
904
1083
|
) -> AsyncIterator[str]:
|
|
905
1084
|
next_event_index = after + 1
|
|
906
|
-
reconnect_snapshot =
|
|
1085
|
+
reconnect_snapshot = (
|
|
1086
|
+
response_snapshot_data_at(response, after) if after > 0 else None
|
|
1087
|
+
)
|
|
907
1088
|
if include_snapshots and reconnect_snapshot is not None:
|
|
908
1089
|
yield stream_event(
|
|
909
1090
|
"snapshot",
|
|
@@ -911,15 +1092,17 @@ class WorkspaceRuntime:
|
|
|
911
1092
|
event_id=after,
|
|
912
1093
|
)
|
|
913
1094
|
while True:
|
|
914
|
-
async with
|
|
1095
|
+
async with response.condition:
|
|
915
1096
|
|
|
916
1097
|
def has_next_event(index: int = next_event_index) -> bool:
|
|
917
|
-
return
|
|
918
|
-
event_index >= index for event_index, _, _ in
|
|
1098
|
+
return response.is_done or any(
|
|
1099
|
+
event_index >= index for event_index, _, _ in response.events
|
|
919
1100
|
)
|
|
920
1101
|
|
|
921
|
-
await
|
|
922
|
-
events = [
|
|
1102
|
+
await response.condition.wait_for(has_next_event)
|
|
1103
|
+
events = [
|
|
1104
|
+
event for event in response.events if event[0] >= next_event_index
|
|
1105
|
+
]
|
|
923
1106
|
|
|
924
1107
|
for index, event, data in events:
|
|
925
1108
|
next_event_index = index + 1
|
|
@@ -929,19 +1112,23 @@ class WorkspaceRuntime:
|
|
|
929
1112
|
if event in {"done", "error"}:
|
|
930
1113
|
return
|
|
931
1114
|
|
|
932
|
-
if
|
|
1115
|
+
if response.is_done and not events:
|
|
933
1116
|
return
|
|
934
1117
|
|
|
935
|
-
def
|
|
936
|
-
|
|
937
|
-
if
|
|
938
|
-
raise HTTPException(status_code=404, detail="
|
|
939
|
-
return
|
|
1118
|
+
def stream_current_response(self) -> WorkspaceResponse:
|
|
1119
|
+
response = self.current_response()
|
|
1120
|
+
if response is None:
|
|
1121
|
+
raise HTTPException(status_code=404, detail="Response not found.")
|
|
1122
|
+
return response
|
|
940
1123
|
|
|
941
|
-
def
|
|
942
|
-
|
|
943
|
-
if
|
|
944
|
-
|
|
1124
|
+
def stop_response(self) -> None:
|
|
1125
|
+
response = self.current_response()
|
|
1126
|
+
if (
|
|
1127
|
+
response is not None
|
|
1128
|
+
and response.task is not None
|
|
1129
|
+
and not response.task.done()
|
|
1130
|
+
):
|
|
1131
|
+
response.task.cancel()
|
|
945
1132
|
|
|
946
1133
|
def compact_stream(self) -> AsyncIterator[str]:
|
|
947
1134
|
async def run_manual_compact(
|
|
@@ -953,7 +1140,7 @@ class WorkspaceRuntime:
|
|
|
953
1140
|
) -> tuple[StoredMessage, TokenUsageInfo]:
|
|
954
1141
|
logger.info("Workspace compact requested")
|
|
955
1142
|
try:
|
|
956
|
-
model_history = [
|
|
1143
|
+
model_history: list[ChatMessage | Mapping[str, object]] = [
|
|
957
1144
|
*runtime_context_messages(self.cwd, state.settings.agent_prompt),
|
|
958
1145
|
*workspace_chat_messages(
|
|
959
1146
|
state.messages,
|
|
@@ -999,7 +1186,7 @@ class WorkspaceRuntime:
|
|
|
999
1186
|
self.active_compact_task = None
|
|
1000
1187
|
|
|
1001
1188
|
if self.active_compact_task is None:
|
|
1002
|
-
if self.
|
|
1189
|
+
if self.current_response() is not None:
|
|
1003
1190
|
raise HTTPException(
|
|
1004
1191
|
status_code=409,
|
|
1005
1192
|
detail="Compact is unavailable while Flowent is responding.",
|
|
@@ -1023,10 +1210,31 @@ class WorkspaceRuntime:
|
|
|
1023
1210
|
async def compact_events() -> AsyncIterator[str]:
|
|
1024
1211
|
try:
|
|
1025
1212
|
marker, usage_info = await asyncio.shield(compact_task)
|
|
1026
|
-
except Exception:
|
|
1213
|
+
except Exception as error:
|
|
1214
|
+
assistant_id = str(uuid4())
|
|
1215
|
+
assistant_output = AssistantOutputBuilder(assistant_id)
|
|
1216
|
+
error_item = run_error_output_item(assistant_id, str(error)).model_copy(
|
|
1217
|
+
update={"message": USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE}
|
|
1218
|
+
)
|
|
1219
|
+
assistant_output.append_error(error_item)
|
|
1220
|
+
failed_message = StoredMessage(
|
|
1221
|
+
author="assistant",
|
|
1222
|
+
content="",
|
|
1223
|
+
groups=assistant_output.groups,
|
|
1224
|
+
id=assistant_id,
|
|
1225
|
+
status="failed",
|
|
1226
|
+
)
|
|
1227
|
+
self.store.save_messages(
|
|
1228
|
+
[*self.store.read_state().messages, failed_message]
|
|
1229
|
+
)
|
|
1230
|
+
failed_message_data = stream_message_data(failed_message)
|
|
1231
|
+
yield stream_event("snapshot", {"message": failed_message_data})
|
|
1027
1232
|
yield stream_event(
|
|
1028
1233
|
"error",
|
|
1029
|
-
{
|
|
1234
|
+
{
|
|
1235
|
+
"error": error_item.model_dump(exclude_none=True),
|
|
1236
|
+
"message": USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE,
|
|
1237
|
+
},
|
|
1030
1238
|
)
|
|
1031
1239
|
return
|
|
1032
1240
|
|