flowent 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +1 -1
- package/backend/src/flowent/api_models.py +13 -8
- package/backend/src/flowent/llm.py +50 -6
- package/backend/src/flowent/routes/providers.py +33 -10
- package/backend/src/flowent/routes/system.py +5 -6
- package/backend/src/flowent/routes/workspace.py +33 -23
- package/backend/src/flowent/state/models.py +3 -2
- package/backend/src/flowent/state/schema.py +5 -0
- package/backend/src/flowent/state/store.py +9 -3
- package/backend/src/flowent/static/assets/index-BaZmIi2Y.js +98 -0
- package/backend/src/flowent/static/assets/index-EC37agAH.css +2 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/workspace/context.py +128 -42
- package/backend/src/flowent/workspace/events.py +5 -7
- package/backend/src/flowent/workspace/output.py +125 -3
- package/backend/src/flowent/workspace/runtime.py +299 -180
- package/backend/uv.lock +1 -1
- package/dist/frontend/assets/index-BaZmIi2Y.js +98 -0
- package/dist/frontend/assets/index-EC37agAH.css +2 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +1 -1
- package/backend/src/flowent/static/assets/index-CvWZZMtK.css +0 -2
- package/backend/src/flowent/static/assets/index-ma2v8oW7.js +0 -90
- package/dist/frontend/assets/index-CvWZZMtK.css +0 -2
- package/dist/frontend/assets/index-ma2v8oW7.js +0 -90
|
@@ -37,16 +37,19 @@ from flowent.usage import (
|
|
|
37
37
|
from flowent.workspace.context import (
|
|
38
38
|
COMPACTED_CONTEXT_MARKER,
|
|
39
39
|
OPTIMIZED_CONTEXT_MARKER,
|
|
40
|
+
compact_prompt_chat_messages,
|
|
40
41
|
context_window_for_settings,
|
|
42
|
+
model_request_messages_data,
|
|
43
|
+
model_visible_assistant_output_messages,
|
|
41
44
|
should_auto_compact,
|
|
42
45
|
update_context_usage_for_response,
|
|
43
46
|
usage_event_data,
|
|
44
47
|
workspace_chat_messages,
|
|
45
48
|
)
|
|
46
49
|
from flowent.workspace.events import (
|
|
47
|
-
|
|
50
|
+
WorkspaceResponse,
|
|
48
51
|
append_or_replace_message,
|
|
49
|
-
|
|
52
|
+
response_snapshot_data_at,
|
|
50
53
|
stream_event,
|
|
51
54
|
stream_message_data,
|
|
52
55
|
)
|
|
@@ -54,8 +57,10 @@ from flowent.workspace.output import (
|
|
|
54
57
|
EMPTY_MODEL_RESPONSE_DETAIL,
|
|
55
58
|
AssistantOutputBuilder,
|
|
56
59
|
approval_transcript,
|
|
60
|
+
assistant_retry_output_start_index,
|
|
57
61
|
run_error_event_data,
|
|
58
62
|
run_error_output_item,
|
|
63
|
+
trim_assistant_message_at_error,
|
|
59
64
|
)
|
|
60
65
|
|
|
61
66
|
logger = logging.getLogger("flowent.workspace.runtime")
|
|
@@ -84,8 +89,7 @@ class WorkspaceRuntime:
|
|
|
84
89
|
self.cwd = cwd
|
|
85
90
|
self.mcp_manager = mcp_manager
|
|
86
91
|
self.store = store
|
|
87
|
-
self.
|
|
88
|
-
self.active_run_id: str | None = None
|
|
92
|
+
self.active_response: WorkspaceResponse | None = None
|
|
89
93
|
self.generation = 0
|
|
90
94
|
self.active_compact_task: WorkspaceCompactTask | None = None
|
|
91
95
|
|
|
@@ -102,14 +106,13 @@ class WorkspaceRuntime:
|
|
|
102
106
|
compacted_context,
|
|
103
107
|
checkpoint,
|
|
104
108
|
)
|
|
105
|
-
return
|
|
106
|
-
|
|
107
|
-
for message in [
|
|
109
|
+
return model_request_messages_data(
|
|
110
|
+
[
|
|
108
111
|
*runtime_context_messages(self.cwd, state.settings.agent_prompt),
|
|
109
112
|
*explicit_skill_messages(self.cwd, self.store, content),
|
|
110
113
|
*chat_messages,
|
|
111
114
|
]
|
|
112
|
-
|
|
115
|
+
)
|
|
113
116
|
|
|
114
117
|
async def save_context_checkpoint(
|
|
115
118
|
self,
|
|
@@ -117,16 +120,17 @@ class WorkspaceRuntime:
|
|
|
117
120
|
connection: ProviderConnection,
|
|
118
121
|
context_window_limit: int,
|
|
119
122
|
messages: list[StoredMessage],
|
|
120
|
-
model_history:
|
|
123
|
+
model_history: Sequence[ChatMessage | Mapping[str, object]],
|
|
121
124
|
marker_content: str,
|
|
122
125
|
source_message_id: str | None = None,
|
|
123
126
|
trigger: Literal["manual", "auto"],
|
|
124
127
|
) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo]:
|
|
128
|
+
compact_model_history = compact_prompt_chat_messages(model_history)
|
|
125
129
|
compact_result = await self.compact_provider.compact(
|
|
126
130
|
connection,
|
|
127
131
|
CompactInput(
|
|
128
132
|
messages=messages,
|
|
129
|
-
model_history=
|
|
133
|
+
model_history=compact_model_history,
|
|
130
134
|
retained_message_token_budget=AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET,
|
|
131
135
|
trigger=trigger,
|
|
132
136
|
),
|
|
@@ -149,6 +153,7 @@ class WorkspaceRuntime:
|
|
|
149
153
|
author="system",
|
|
150
154
|
content=marker_content,
|
|
151
155
|
id=str(uuid4()),
|
|
156
|
+
summary=compact_result.summary,
|
|
152
157
|
usage_info=usage_info,
|
|
153
158
|
)
|
|
154
159
|
self.store.save_compaction_checkpoint(
|
|
@@ -184,7 +189,7 @@ class WorkspaceRuntime:
|
|
|
184
189
|
connection: ProviderConnection,
|
|
185
190
|
context_window_limit: int,
|
|
186
191
|
messages: list[StoredMessage],
|
|
187
|
-
model_history:
|
|
192
|
+
model_history: Sequence[ChatMessage | Mapping[str, object]],
|
|
188
193
|
source_message_id: str | None = None,
|
|
189
194
|
) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
|
|
190
195
|
if not should_auto_compact(
|
|
@@ -218,7 +223,7 @@ class WorkspaceRuntime:
|
|
|
218
223
|
)
|
|
219
224
|
next_messages = [*state.messages, user_message]
|
|
220
225
|
self.store.save_messages(next_messages)
|
|
221
|
-
model_history = [
|
|
226
|
+
model_history: list[ChatMessage | Mapping[str, object]] = [
|
|
222
227
|
*runtime_context_messages(self.cwd, state.settings.agent_prompt),
|
|
223
228
|
*workspace_chat_messages(
|
|
224
229
|
state.messages,
|
|
@@ -372,14 +377,14 @@ class WorkspaceRuntime:
|
|
|
372
377
|
exc_info=(type(result), result, result.__traceback__),
|
|
373
378
|
)
|
|
374
379
|
|
|
375
|
-
async def
|
|
380
|
+
async def stop_response_for_shutdown(self) -> None:
|
|
376
381
|
tasks: list[asyncio.Task[None]] = []
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
tasks.append(
|
|
382
|
-
await self.gather_shutdown_tasks("Workspace
|
|
382
|
+
response = self.active_response
|
|
383
|
+
if response is not None and response.task is not None:
|
|
384
|
+
if not response.task.done():
|
|
385
|
+
response.task.cancel()
|
|
386
|
+
tasks.append(response.task)
|
|
387
|
+
await self.gather_shutdown_tasks("Workspace response", tasks)
|
|
383
388
|
|
|
384
389
|
async def stop_compact_for_shutdown(self) -> None:
|
|
385
390
|
if self.active_compact_task is None:
|
|
@@ -393,64 +398,72 @@ class WorkspaceRuntime:
|
|
|
393
398
|
self.store.save_is_compacting(False)
|
|
394
399
|
|
|
395
400
|
async def stop_for_shutdown(self) -> None:
|
|
396
|
-
await self.
|
|
401
|
+
await self.stop_response_for_shutdown()
|
|
397
402
|
await self.stop_compact_for_shutdown()
|
|
398
403
|
|
|
399
|
-
def
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
run = self.runs.get(self.active_run_id)
|
|
403
|
-
if run is None or run.is_done:
|
|
404
|
+
def current_response(self) -> WorkspaceResponse | None:
|
|
405
|
+
response = self.active_response
|
|
406
|
+
if response is None or response.is_done:
|
|
404
407
|
return None
|
|
405
|
-
return
|
|
408
|
+
return response
|
|
406
409
|
|
|
407
|
-
def
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
410
|
+
def has_active_response(self) -> bool:
|
|
411
|
+
response = self.active_response
|
|
412
|
+
return (
|
|
413
|
+
response is not None
|
|
414
|
+
and not response.is_done
|
|
415
|
+
and response.task is not None
|
|
416
|
+
and not response.task.done()
|
|
411
417
|
)
|
|
412
418
|
|
|
413
419
|
def clear(self) -> list[StoredMessage]:
|
|
414
420
|
self.generation += 1
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
+
response = self.active_response
|
|
422
|
+
if response is not None:
|
|
423
|
+
response.is_done = True
|
|
424
|
+
if response.task is not None and not response.task.done():
|
|
425
|
+
response.discard_on_cancel = True
|
|
426
|
+
response.task.cancel()
|
|
421
427
|
return self.store.save_messages([])
|
|
422
428
|
|
|
423
|
-
async def
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
429
|
+
async def notify_cleared_response(self) -> None:
|
|
430
|
+
response = self.active_response
|
|
431
|
+
if response is None:
|
|
432
|
+
return
|
|
433
|
+
async with response.condition:
|
|
434
|
+
response.condition.notify_all()
|
|
427
435
|
|
|
428
436
|
async def append_event(
|
|
429
|
-
self,
|
|
437
|
+
self, response: WorkspaceResponse, event: str, data: dict[str, object]
|
|
430
438
|
) -> None:
|
|
431
|
-
async with
|
|
432
|
-
|
|
433
|
-
|
|
439
|
+
async with response.condition:
|
|
440
|
+
response.events.append((response.latest_event_index + 1, event, data))
|
|
441
|
+
response.condition.notify_all()
|
|
434
442
|
|
|
435
|
-
async def append_snapshot(
|
|
443
|
+
async def append_snapshot(
|
|
444
|
+
self, response: WorkspaceResponse, message: StoredMessage
|
|
445
|
+
) -> None:
|
|
436
446
|
if message.author != "assistant":
|
|
437
447
|
return
|
|
438
|
-
|
|
448
|
+
response.latest_snapshot = message
|
|
439
449
|
await self.append_event(
|
|
440
|
-
|
|
450
|
+
response,
|
|
441
451
|
"snapshot",
|
|
442
|
-
{"message": stream_message_data(message,
|
|
452
|
+
{"message": stream_message_data(message, response.active_output)},
|
|
443
453
|
)
|
|
444
454
|
|
|
445
|
-
def
|
|
455
|
+
def start_response(
|
|
446
456
|
self, content: str, *, message_id: str | None = None
|
|
447
|
-
) ->
|
|
448
|
-
if self.
|
|
449
|
-
active_run = self.active_run()
|
|
457
|
+
) -> WorkspaceResponse:
|
|
458
|
+
if self.has_active_response():
|
|
450
459
|
raise HTTPException(
|
|
451
460
|
status_code=409,
|
|
452
461
|
detail="Response in progress",
|
|
453
|
-
|
|
462
|
+
)
|
|
463
|
+
if self.store.read_is_compacting():
|
|
464
|
+
raise HTTPException(
|
|
465
|
+
status_code=409,
|
|
466
|
+
detail="Context refining in progress. Please wait a moment.",
|
|
454
467
|
)
|
|
455
468
|
state = self.store.read_state()
|
|
456
469
|
user_message_id = message_id or str(uuid4())
|
|
@@ -463,7 +476,7 @@ class WorkspaceRuntime:
|
|
|
463
476
|
)
|
|
464
477
|
next_messages = [*state.messages, user_message]
|
|
465
478
|
self.store.save_messages(next_messages)
|
|
466
|
-
return self.
|
|
479
|
+
return self._start_response_from_messages(
|
|
467
480
|
content=content,
|
|
468
481
|
next_messages=next_messages,
|
|
469
482
|
state=state,
|
|
@@ -476,13 +489,16 @@ class WorkspaceRuntime:
|
|
|
476
489
|
*,
|
|
477
490
|
action: Literal["resend", "save"],
|
|
478
491
|
content: str,
|
|
479
|
-
) -> tuple[list[StoredMessage],
|
|
480
|
-
if self.
|
|
481
|
-
active_run = self.active_run()
|
|
492
|
+
) -> tuple[list[StoredMessage], WorkspaceResponse | None]:
|
|
493
|
+
if self.has_active_response():
|
|
482
494
|
raise HTTPException(
|
|
483
495
|
status_code=409,
|
|
484
496
|
detail="Response in progress",
|
|
485
|
-
|
|
497
|
+
)
|
|
498
|
+
if self.store.read_is_compacting():
|
|
499
|
+
raise HTTPException(
|
|
500
|
+
status_code=409,
|
|
501
|
+
detail="Context refining in progress. Please wait a moment.",
|
|
486
502
|
)
|
|
487
503
|
state = self.store.read_state()
|
|
488
504
|
message_index = next(
|
|
@@ -513,50 +529,140 @@ class WorkspaceRuntime:
|
|
|
513
529
|
previous_messages = state.messages[:message_index]
|
|
514
530
|
next_messages = [*previous_messages, updated_message]
|
|
515
531
|
self.store.save_messages(next_messages)
|
|
516
|
-
|
|
532
|
+
response = self._start_response_from_messages(
|
|
517
533
|
content=content,
|
|
518
534
|
next_messages=next_messages,
|
|
519
535
|
state=state.model_copy(update={"messages": previous_messages}),
|
|
520
536
|
user_message=updated_message,
|
|
521
537
|
)
|
|
522
|
-
return next_messages,
|
|
538
|
+
return next_messages, response
|
|
539
|
+
|
|
540
|
+
def retry_error(
|
|
541
|
+
self,
|
|
542
|
+
message_id: str,
|
|
543
|
+
*,
|
|
544
|
+
error_id: str,
|
|
545
|
+
) -> tuple[list[StoredMessage], WorkspaceResponse]:
|
|
546
|
+
if self.has_active_response():
|
|
547
|
+
raise HTTPException(
|
|
548
|
+
status_code=409,
|
|
549
|
+
detail="Response in progress",
|
|
550
|
+
)
|
|
551
|
+
if self.store.read_is_compacting():
|
|
552
|
+
raise HTTPException(
|
|
553
|
+
status_code=409,
|
|
554
|
+
detail="Context refining in progress. Please wait a moment.",
|
|
555
|
+
)
|
|
556
|
+
state = self.store.read_state()
|
|
557
|
+
message_index = next(
|
|
558
|
+
(
|
|
559
|
+
index
|
|
560
|
+
for index, message in enumerate(state.messages)
|
|
561
|
+
if message.id == message_id
|
|
562
|
+
),
|
|
563
|
+
-1,
|
|
564
|
+
)
|
|
565
|
+
if message_index < 0:
|
|
566
|
+
raise HTTPException(status_code=404, detail="Message not found.")
|
|
567
|
+
message = state.messages[message_index]
|
|
568
|
+
if message.author != "assistant":
|
|
569
|
+
raise HTTPException(
|
|
570
|
+
status_code=400, detail="Only assistant errors can be retried."
|
|
571
|
+
)
|
|
572
|
+
previous_user_message = next(
|
|
573
|
+
(
|
|
574
|
+
current_message
|
|
575
|
+
for current_message in reversed(state.messages[:message_index])
|
|
576
|
+
if current_message.author == "user"
|
|
577
|
+
),
|
|
578
|
+
None,
|
|
579
|
+
)
|
|
580
|
+
if previous_user_message is None:
|
|
581
|
+
raise HTTPException(status_code=400, detail="Message history is invalid.")
|
|
582
|
+
trimmed_message = trim_assistant_message_at_error(
|
|
583
|
+
message,
|
|
584
|
+
error_id,
|
|
585
|
+
status="running",
|
|
586
|
+
)
|
|
587
|
+
if trimmed_message is None:
|
|
588
|
+
raise HTTPException(status_code=404, detail="Error block not found.")
|
|
589
|
+
|
|
590
|
+
previous_messages = state.messages[:message_index]
|
|
591
|
+
next_messages = [*previous_messages, trimmed_message]
|
|
592
|
+
self.store.save_messages(next_messages)
|
|
593
|
+
state_before_assistant = state.model_copy(
|
|
594
|
+
update={"messages": previous_messages}
|
|
595
|
+
)
|
|
596
|
+
base_request_messages = self.request_messages_for_content(
|
|
597
|
+
state_before_assistant,
|
|
598
|
+
previous_messages,
|
|
599
|
+
previous_user_message.content,
|
|
600
|
+
)
|
|
601
|
+
request_messages = [
|
|
602
|
+
*base_request_messages,
|
|
603
|
+
*model_visible_assistant_output_messages(trimmed_message),
|
|
604
|
+
]
|
|
605
|
+
response = self._start_response_from_messages(
|
|
606
|
+
content=previous_user_message.content,
|
|
607
|
+
initial_assistant_message=trimmed_message,
|
|
608
|
+
next_messages=next_messages,
|
|
609
|
+
output_start_index=assistant_retry_output_start_index(trimmed_message),
|
|
610
|
+
request_messages=request_messages,
|
|
611
|
+
state=state_before_assistant,
|
|
612
|
+
usage_request_messages=base_request_messages,
|
|
613
|
+
user_message=previous_user_message,
|
|
614
|
+
)
|
|
615
|
+
return next_messages, response
|
|
523
616
|
|
|
524
|
-
def
|
|
617
|
+
def _start_response_from_messages(
|
|
525
618
|
self,
|
|
526
619
|
*,
|
|
527
620
|
content: str,
|
|
621
|
+
initial_assistant_message: StoredMessage | None = None,
|
|
528
622
|
next_messages: list[StoredMessage],
|
|
623
|
+
output_start_index: int = 1,
|
|
624
|
+
request_messages: list[dict[str, object]] | None = None,
|
|
529
625
|
state: StoredState,
|
|
626
|
+
usage_request_messages: list[dict[str, object]] | None = None,
|
|
530
627
|
user_message: StoredMessage,
|
|
531
|
-
) ->
|
|
628
|
+
) -> WorkspaceResponse:
|
|
532
629
|
connection = selected_connection(state)
|
|
533
630
|
context_window_limit = context_window_for_settings(state.settings)
|
|
534
|
-
|
|
631
|
+
response = WorkspaceResponse(
|
|
535
632
|
condition=asyncio.Condition(),
|
|
536
633
|
generation=self.generation,
|
|
537
634
|
)
|
|
538
|
-
self.
|
|
539
|
-
self.active_run_id = run.id
|
|
635
|
+
self.active_response = response
|
|
540
636
|
|
|
541
|
-
async def
|
|
637
|
+
async def response_task() -> None:
|
|
542
638
|
nonlocal next_messages
|
|
543
|
-
assistant_message =
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
639
|
+
assistant_message = (
|
|
640
|
+
initial_assistant_message
|
|
641
|
+
if initial_assistant_message is not None
|
|
642
|
+
else StoredMessage(
|
|
643
|
+
author="assistant",
|
|
644
|
+
content="",
|
|
645
|
+
id=str(uuid4()),
|
|
646
|
+
status="running",
|
|
647
|
+
)
|
|
648
|
+
)
|
|
649
|
+
assistant_output = (
|
|
650
|
+
AssistantOutputBuilder.from_message(assistant_message)
|
|
651
|
+
if initial_assistant_message is not None
|
|
652
|
+
else AssistantOutputBuilder(assistant_message.id)
|
|
548
653
|
)
|
|
549
|
-
|
|
654
|
+
initial_assistant_content = assistant_output.content
|
|
655
|
+
initial_assistant_thinking = assistant_output.thinking
|
|
550
656
|
last_progress_flush_at = 0.0
|
|
551
657
|
|
|
552
658
|
def is_current_generation() -> bool:
|
|
553
|
-
return
|
|
659
|
+
return response.generation == self.generation
|
|
554
660
|
|
|
555
661
|
def update_assistant_message(
|
|
556
662
|
status: str = "running", *, persist: bool
|
|
557
663
|
) -> StoredMessage | None:
|
|
558
664
|
nonlocal next_messages, assistant_message
|
|
559
|
-
if not is_current_generation() or
|
|
665
|
+
if not is_current_generation() or response.discard_on_cancel:
|
|
560
666
|
return None
|
|
561
667
|
assistant_message = StoredMessage(
|
|
562
668
|
author="assistant",
|
|
@@ -603,43 +709,48 @@ class WorkspaceRuntime:
|
|
|
603
709
|
turn_usage_info: TokenUsageInfo | None = None
|
|
604
710
|
current_output_index = 0
|
|
605
711
|
latest_usage_output_index: int | None = None
|
|
606
|
-
|
|
607
|
-
state,
|
|
608
|
-
next_messages,
|
|
609
|
-
content,
|
|
610
|
-
)
|
|
611
|
-
pre_turn_request_messages = self.request_messages_for_content(
|
|
612
|
-
state,
|
|
613
|
-
state.messages,
|
|
614
|
-
content,
|
|
615
|
-
)
|
|
616
|
-
auto_compaction = await self.auto_compact_messages(
|
|
617
|
-
connection=connection,
|
|
618
|
-
context_window_limit=context_window_limit,
|
|
619
|
-
messages=state.messages,
|
|
620
|
-
model_history=[
|
|
621
|
-
ChatMessage.model_validate(message)
|
|
622
|
-
for message in pre_turn_request_messages
|
|
623
|
-
],
|
|
624
|
-
source_message_id=None,
|
|
625
|
-
)
|
|
626
|
-
if auto_compaction is not None:
|
|
627
|
-
marker, _, usage_info = auto_compaction
|
|
628
|
-
next_messages = [*state.messages, marker, user_message]
|
|
629
|
-
self.store.save_messages(next_messages)
|
|
630
|
-
await self.append_event(
|
|
631
|
-
run,
|
|
632
|
-
"context_optimized",
|
|
633
|
-
{
|
|
634
|
-
"message": marker.model_dump(),
|
|
635
|
-
**usage_event_data(usage_info),
|
|
636
|
-
},
|
|
637
|
-
)
|
|
712
|
+
if request_messages is None:
|
|
638
713
|
current_request_messages = self.request_messages_for_content(
|
|
639
714
|
state,
|
|
640
715
|
next_messages,
|
|
641
716
|
content,
|
|
642
717
|
)
|
|
718
|
+
pre_turn_request_messages = self.request_messages_for_content(
|
|
719
|
+
state,
|
|
720
|
+
state.messages,
|
|
721
|
+
content,
|
|
722
|
+
)
|
|
723
|
+
auto_compaction = await self.auto_compact_messages(
|
|
724
|
+
connection=connection,
|
|
725
|
+
context_window_limit=context_window_limit,
|
|
726
|
+
messages=state.messages,
|
|
727
|
+
model_history=pre_turn_request_messages,
|
|
728
|
+
source_message_id=None,
|
|
729
|
+
)
|
|
730
|
+
if auto_compaction is not None:
|
|
731
|
+
marker, _, usage_info = auto_compaction
|
|
732
|
+
next_messages = [*state.messages, marker, user_message]
|
|
733
|
+
self.store.save_messages(next_messages)
|
|
734
|
+
await self.append_event(
|
|
735
|
+
response,
|
|
736
|
+
"context_optimized",
|
|
737
|
+
{
|
|
738
|
+
"message": marker.model_dump(),
|
|
739
|
+
**usage_event_data(usage_info),
|
|
740
|
+
},
|
|
741
|
+
)
|
|
742
|
+
current_request_messages = self.request_messages_for_content(
|
|
743
|
+
state,
|
|
744
|
+
next_messages,
|
|
745
|
+
content,
|
|
746
|
+
)
|
|
747
|
+
else:
|
|
748
|
+
current_request_messages = request_messages
|
|
749
|
+
context_usage_messages = (
|
|
750
|
+
usage_request_messages
|
|
751
|
+
if usage_request_messages is not None
|
|
752
|
+
else current_request_messages
|
|
753
|
+
)
|
|
643
754
|
|
|
644
755
|
async def review_tool_approval(request: ApprovalReviewRequest):
|
|
645
756
|
return await review_approval_request(
|
|
@@ -672,7 +783,7 @@ class WorkspaceRuntime:
|
|
|
672
783
|
conversation: Sequence[Mapping[str, object]],
|
|
673
784
|
) -> AgentContextUpdate | None:
|
|
674
785
|
nonlocal next_messages
|
|
675
|
-
if not is_current_generation() or
|
|
786
|
+
if not is_current_generation() or response.discard_on_cancel:
|
|
676
787
|
return None
|
|
677
788
|
assistant_snapshot = StoredMessage(
|
|
678
789
|
author="assistant",
|
|
@@ -684,34 +795,11 @@ class WorkspaceRuntime:
|
|
|
684
795
|
tools=list(assistant_output.tools.values()),
|
|
685
796
|
usage_info=self.store.read_usage_info(),
|
|
686
797
|
)
|
|
687
|
-
model_history: list[ChatMessage] = []
|
|
688
|
-
for message in conversation:
|
|
689
|
-
role_value = message.get("role")
|
|
690
|
-
content = str(message.get("content") or "")
|
|
691
|
-
if role_value == "system":
|
|
692
|
-
model_history.append(
|
|
693
|
-
ChatMessage(role="system", content=content)
|
|
694
|
-
)
|
|
695
|
-
if role_value == "user":
|
|
696
|
-
model_history.append(
|
|
697
|
-
ChatMessage(role="user", content=content)
|
|
698
|
-
)
|
|
699
|
-
if role_value == "assistant":
|
|
700
|
-
model_history.append(
|
|
701
|
-
ChatMessage(role="assistant", content=content)
|
|
702
|
-
)
|
|
703
|
-
if role_value == "tool":
|
|
704
|
-
model_history.append(
|
|
705
|
-
ChatMessage(
|
|
706
|
-
role="user",
|
|
707
|
-
content=f"Tool result: {content}",
|
|
708
|
-
)
|
|
709
|
-
)
|
|
710
798
|
auto_result = await self.auto_compact_messages(
|
|
711
799
|
connection=connection,
|
|
712
800
|
context_window_limit=context_window_limit,
|
|
713
801
|
messages=next_messages,
|
|
714
|
-
model_history=
|
|
802
|
+
model_history=compact_prompt_chat_messages(conversation),
|
|
715
803
|
source_message_id=assistant_snapshot.id,
|
|
716
804
|
)
|
|
717
805
|
if auto_result is None:
|
|
@@ -747,14 +835,18 @@ class WorkspaceRuntime:
|
|
|
747
835
|
messages=current_request_messages,
|
|
748
836
|
tool_runner=tool_runner,
|
|
749
837
|
):
|
|
750
|
-
if not is_current_generation() or
|
|
838
|
+
if not is_current_generation() or response.discard_on_cancel:
|
|
751
839
|
raise asyncio.CancelledError
|
|
752
840
|
run_event_data = event.data
|
|
753
841
|
should_append_run_event = event.event != "usage"
|
|
754
842
|
snapshot_after_event: StoredMessage | None = None
|
|
755
843
|
if event.event == "start":
|
|
756
844
|
event_id = event.data.get("id")
|
|
757
|
-
if
|
|
845
|
+
if initial_assistant_message is not None:
|
|
846
|
+
assistant_output.set_assistant_id(assistant_message.id)
|
|
847
|
+
run_event_data = {"id": assistant_message.id}
|
|
848
|
+
snapshot_after_event = persist_assistant()
|
|
849
|
+
elif isinstance(event_id, str):
|
|
758
850
|
assistant_message = assistant_message.model_copy(
|
|
759
851
|
update={"id": event_id}
|
|
760
852
|
)
|
|
@@ -763,16 +855,24 @@ class WorkspaceRuntime:
|
|
|
763
855
|
if event.event == "output_start":
|
|
764
856
|
index = event.data.get("index")
|
|
765
857
|
if isinstance(index, int):
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
858
|
+
output_index = index + output_start_index - 1
|
|
859
|
+
current_output_index = output_index
|
|
860
|
+
run_event_data = {**event.data, "index": output_index}
|
|
861
|
+
response.active_output = None
|
|
862
|
+
assistant_output.start_group(output_index)
|
|
769
863
|
snapshot_after_event = persist_assistant()
|
|
770
864
|
if event.event == "output_done":
|
|
771
|
-
|
|
865
|
+
index = event.data.get("index")
|
|
866
|
+
if isinstance(index, int):
|
|
867
|
+
run_event_data = {
|
|
868
|
+
**event.data,
|
|
869
|
+
"index": index + output_start_index - 1,
|
|
870
|
+
}
|
|
871
|
+
response.active_output = None
|
|
772
872
|
if event.event == "tool_start":
|
|
773
873
|
tool = event.data.get("tool")
|
|
774
874
|
if isinstance(tool, dict) and isinstance(tool.get("id"), str):
|
|
775
|
-
|
|
875
|
+
response.active_output = None
|
|
776
876
|
current_tool_id = tool["id"]
|
|
777
877
|
assistant_output.start_tool(
|
|
778
878
|
StoredToolItem.model_validate(tool)
|
|
@@ -790,13 +890,13 @@ class WorkspaceRuntime:
|
|
|
790
890
|
assistant_output.update_tool(tool_id, event.data)
|
|
791
891
|
snapshot_after_event = persist_assistant()
|
|
792
892
|
if event.event == "delta":
|
|
793
|
-
|
|
893
|
+
response.active_output = "text"
|
|
794
894
|
assistant_output.append_text(
|
|
795
895
|
str(event.data.get("content") or "")
|
|
796
896
|
)
|
|
797
897
|
snapshot_after_event = persist_assistant_progress()
|
|
798
898
|
if event.event == "thinking_delta":
|
|
799
|
-
|
|
899
|
+
response.active_output = "thinking"
|
|
800
900
|
assistant_output.append_thinking(
|
|
801
901
|
str(event.data.get("content") or "")
|
|
802
902
|
)
|
|
@@ -824,8 +924,12 @@ class WorkspaceRuntime:
|
|
|
824
924
|
if event.event == "done":
|
|
825
925
|
message = event.data.get("message")
|
|
826
926
|
if isinstance(message, dict):
|
|
827
|
-
|
|
828
|
-
assistant_output.apply_done_message(
|
|
927
|
+
response.active_output = None
|
|
928
|
+
assistant_output.apply_done_message(
|
|
929
|
+
message,
|
|
930
|
+
content_prefix=initial_assistant_content,
|
|
931
|
+
thinking_prefix=initial_assistant_thinking,
|
|
932
|
+
)
|
|
829
933
|
response_usage_info = self.store.read_usage_info()
|
|
830
934
|
final_usage_info = turn_usage_info
|
|
831
935
|
if (
|
|
@@ -834,7 +938,7 @@ class WorkspaceRuntime:
|
|
|
834
938
|
):
|
|
835
939
|
final_usage_info = update_context_usage_for_response(
|
|
836
940
|
final_usage_info or response_usage_info,
|
|
837
|
-
messages=
|
|
941
|
+
messages=context_usage_messages,
|
|
838
942
|
output_content=assistant_output.content,
|
|
839
943
|
output_tools=[
|
|
840
944
|
tool.model_dump(exclude_none=True)
|
|
@@ -849,21 +953,23 @@ class WorkspaceRuntime:
|
|
|
849
953
|
"message": stream_message_data(snapshot_after_event)
|
|
850
954
|
}
|
|
851
955
|
if event.event == "done" and snapshot_after_event is not None:
|
|
852
|
-
await self.append_snapshot(
|
|
853
|
-
await self.append_event(
|
|
956
|
+
await self.append_snapshot(response, snapshot_after_event)
|
|
957
|
+
await self.append_event(response, event.event, run_event_data)
|
|
854
958
|
else:
|
|
855
959
|
if should_append_run_event:
|
|
856
|
-
await self.append_event(
|
|
960
|
+
await self.append_event(
|
|
961
|
+
response, event.event, run_event_data
|
|
962
|
+
)
|
|
857
963
|
if snapshot_after_event is not None:
|
|
858
|
-
await self.append_snapshot(
|
|
964
|
+
await self.append_snapshot(response, snapshot_after_event)
|
|
859
965
|
except asyncio.CancelledError:
|
|
860
|
-
logger.info("Workspace
|
|
861
|
-
if not
|
|
966
|
+
logger.info("Workspace response stopped")
|
|
967
|
+
if not response.discard_on_cancel:
|
|
862
968
|
interrupted_snapshot = persist_assistant("interrupted")
|
|
863
969
|
if interrupted_snapshot is not None:
|
|
864
|
-
await self.append_snapshot(
|
|
970
|
+
await self.append_snapshot(response, interrupted_snapshot)
|
|
865
971
|
await self.append_event(
|
|
866
|
-
|
|
972
|
+
response,
|
|
867
973
|
"error",
|
|
868
974
|
{"message": "Response stopped."},
|
|
869
975
|
)
|
|
@@ -887,23 +993,30 @@ class WorkspaceRuntime:
|
|
|
887
993
|
)
|
|
888
994
|
failed_snapshot = persist_assistant("failed")
|
|
889
995
|
if failed_snapshot is not None:
|
|
890
|
-
await self.append_snapshot(
|
|
891
|
-
await self.append_event(
|
|
996
|
+
await self.append_snapshot(response, failed_snapshot)
|
|
997
|
+
await self.append_event(
|
|
998
|
+
response, "error", run_error_event_data(error_item)
|
|
999
|
+
)
|
|
892
1000
|
finally:
|
|
893
|
-
|
|
894
|
-
async with
|
|
895
|
-
|
|
896
|
-
if self.
|
|
897
|
-
self.
|
|
1001
|
+
response.is_done = True
|
|
1002
|
+
async with response.condition:
|
|
1003
|
+
response.condition.notify_all()
|
|
1004
|
+
if self.active_response is response:
|
|
1005
|
+
self.active_response = None
|
|
898
1006
|
|
|
899
|
-
|
|
900
|
-
return
|
|
1007
|
+
response.task = asyncio.create_task(response_task())
|
|
1008
|
+
return response
|
|
901
1009
|
|
|
902
|
-
async def
|
|
903
|
-
self,
|
|
1010
|
+
async def response_stream(
|
|
1011
|
+
self,
|
|
1012
|
+
response: WorkspaceResponse,
|
|
1013
|
+
after: int = 0,
|
|
1014
|
+
include_snapshots: bool = True,
|
|
904
1015
|
) -> AsyncIterator[str]:
|
|
905
1016
|
next_event_index = after + 1
|
|
906
|
-
reconnect_snapshot =
|
|
1017
|
+
reconnect_snapshot = (
|
|
1018
|
+
response_snapshot_data_at(response, after) if after > 0 else None
|
|
1019
|
+
)
|
|
907
1020
|
if include_snapshots and reconnect_snapshot is not None:
|
|
908
1021
|
yield stream_event(
|
|
909
1022
|
"snapshot",
|
|
@@ -911,15 +1024,17 @@ class WorkspaceRuntime:
|
|
|
911
1024
|
event_id=after,
|
|
912
1025
|
)
|
|
913
1026
|
while True:
|
|
914
|
-
async with
|
|
1027
|
+
async with response.condition:
|
|
915
1028
|
|
|
916
1029
|
def has_next_event(index: int = next_event_index) -> bool:
|
|
917
|
-
return
|
|
918
|
-
event_index >= index for event_index, _, _ in
|
|
1030
|
+
return response.is_done or any(
|
|
1031
|
+
event_index >= index for event_index, _, _ in response.events
|
|
919
1032
|
)
|
|
920
1033
|
|
|
921
|
-
await
|
|
922
|
-
events = [
|
|
1034
|
+
await response.condition.wait_for(has_next_event)
|
|
1035
|
+
events = [
|
|
1036
|
+
event for event in response.events if event[0] >= next_event_index
|
|
1037
|
+
]
|
|
923
1038
|
|
|
924
1039
|
for index, event, data in events:
|
|
925
1040
|
next_event_index = index + 1
|
|
@@ -929,19 +1044,23 @@ class WorkspaceRuntime:
|
|
|
929
1044
|
if event in {"done", "error"}:
|
|
930
1045
|
return
|
|
931
1046
|
|
|
932
|
-
if
|
|
1047
|
+
if response.is_done and not events:
|
|
933
1048
|
return
|
|
934
1049
|
|
|
935
|
-
def
|
|
936
|
-
|
|
937
|
-
if
|
|
938
|
-
raise HTTPException(status_code=404, detail="
|
|
939
|
-
return
|
|
1050
|
+
def stream_current_response(self) -> WorkspaceResponse:
|
|
1051
|
+
response = self.current_response()
|
|
1052
|
+
if response is None:
|
|
1053
|
+
raise HTTPException(status_code=404, detail="Response not found.")
|
|
1054
|
+
return response
|
|
940
1055
|
|
|
941
|
-
def
|
|
942
|
-
|
|
943
|
-
if
|
|
944
|
-
|
|
1056
|
+
def stop_response(self) -> None:
|
|
1057
|
+
response = self.current_response()
|
|
1058
|
+
if (
|
|
1059
|
+
response is not None
|
|
1060
|
+
and response.task is not None
|
|
1061
|
+
and not response.task.done()
|
|
1062
|
+
):
|
|
1063
|
+
response.task.cancel()
|
|
945
1064
|
|
|
946
1065
|
def compact_stream(self) -> AsyncIterator[str]:
|
|
947
1066
|
async def run_manual_compact(
|
|
@@ -953,7 +1072,7 @@ class WorkspaceRuntime:
|
|
|
953
1072
|
) -> tuple[StoredMessage, TokenUsageInfo]:
|
|
954
1073
|
logger.info("Workspace compact requested")
|
|
955
1074
|
try:
|
|
956
|
-
model_history = [
|
|
1075
|
+
model_history: list[ChatMessage | Mapping[str, object]] = [
|
|
957
1076
|
*runtime_context_messages(self.cwd, state.settings.agent_prompt),
|
|
958
1077
|
*workspace_chat_messages(
|
|
959
1078
|
state.messages,
|
|
@@ -999,7 +1118,7 @@ class WorkspaceRuntime:
|
|
|
999
1118
|
self.active_compact_task = None
|
|
1000
1119
|
|
|
1001
1120
|
if self.active_compact_task is None:
|
|
1002
|
-
if self.
|
|
1121
|
+
if self.current_response() is not None:
|
|
1003
1122
|
raise HTTPException(
|
|
1004
1123
|
status_code=409,
|
|
1005
1124
|
detail="Compact is unavailable while Flowent is responding.",
|