flowent 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/backend/pyproject.toml +1 -1
  2. package/backend/src/flowent/agent.py +22 -15
  3. package/backend/src/flowent/api_models.py +13 -8
  4. package/backend/src/flowent/llm.py +50 -6
  5. package/backend/src/flowent/mcp.py +4 -3
  6. package/backend/src/flowent/permissions.py +51 -38
  7. package/backend/src/flowent/routes/providers.py +33 -10
  8. package/backend/src/flowent/routes/system.py +5 -6
  9. package/backend/src/flowent/routes/workspace.py +33 -23
  10. package/backend/src/flowent/state/models.py +4 -4
  11. package/backend/src/flowent/state/schema.py +121 -0
  12. package/backend/src/flowent/state/store.py +9 -3
  13. package/backend/src/flowent/static/assets/index-BX18a4Jz.js +100 -0
  14. package/backend/src/flowent/static/assets/index-EC37agAH.css +2 -0
  15. package/backend/src/flowent/static/index.html +2 -2
  16. package/backend/src/flowent/tools.py +84 -33
  17. package/backend/src/flowent/usage.py +66 -0
  18. package/backend/src/flowent/workspace/context.py +140 -47
  19. package/backend/src/flowent/workspace/events.py +5 -7
  20. package/backend/src/flowent/workspace/output.py +129 -4
  21. package/backend/src/flowent/workspace/runtime.py +393 -185
  22. package/backend/uv.lock +1 -1
  23. package/dist/frontend/assets/index-BX18a4Jz.js +100 -0
  24. package/dist/frontend/assets/index-EC37agAH.css +2 -0
  25. package/dist/frontend/index.html +2 -2
  26. package/package.json +8 -10
  27. package/backend/src/flowent/static/assets/index-CvWZZMtK.css +0 -2
  28. package/backend/src/flowent/static/assets/index-ma2v8oW7.js +0 -90
  29. package/dist/frontend/assets/index-CvWZZMtK.css +0 -2
  30. package/dist/frontend/assets/index-ma2v8oW7.js +0 -90
@@ -27,26 +27,31 @@ from flowent.storage import (
27
27
  StoredState,
28
28
  StoredToolItem,
29
29
  )
30
- from flowent.tools import ToolContext
30
+ from flowent.tools import ToolContext, text_tool_result, tool_specs
31
31
  from flowent.usage import (
32
32
  TokenUsage,
33
33
  TokenUsageInfo,
34
34
  append_token_usage,
35
+ full_context_usage,
36
+ is_context_window_error,
35
37
  recompute_context_usage,
36
38
  )
37
39
  from flowent.workspace.context import (
38
40
  COMPACTED_CONTEXT_MARKER,
39
41
  OPTIMIZED_CONTEXT_MARKER,
42
+ compact_prompt_chat_messages,
40
43
  context_window_for_settings,
44
+ model_request_messages_data,
45
+ model_visible_assistant_output_messages,
41
46
  should_auto_compact,
42
47
  update_context_usage_for_response,
43
48
  usage_event_data,
44
49
  workspace_chat_messages,
45
50
  )
46
51
  from flowent.workspace.events import (
47
- WorkspaceRun,
52
+ WorkspaceResponse,
48
53
  append_or_replace_message,
49
- run_snapshot_data_at,
54
+ response_snapshot_data_at,
50
55
  stream_event,
51
56
  stream_message_data,
52
57
  )
@@ -54,14 +59,17 @@ from flowent.workspace.output import (
54
59
  EMPTY_MODEL_RESPONSE_DETAIL,
55
60
  AssistantOutputBuilder,
56
61
  approval_transcript,
62
+ assistant_retry_output_start_index,
57
63
  run_error_event_data,
58
64
  run_error_output_item,
65
+ trim_assistant_message_at_error,
59
66
  )
60
67
 
61
68
  logger = logging.getLogger("flowent.workspace.runtime")
62
69
 
63
70
  AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
64
71
  WORKSPACE_PROGRESS_FLUSH_INTERVAL_SECONDS = 0.5
72
+ USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE = "Context could not be compacted."
65
73
 
66
74
 
67
75
  @dataclass
@@ -84,8 +92,7 @@ class WorkspaceRuntime:
84
92
  self.cwd = cwd
85
93
  self.mcp_manager = mcp_manager
86
94
  self.store = store
87
- self.runs: dict[str, WorkspaceRun] = {}
88
- self.active_run_id: str | None = None
95
+ self.active_response: WorkspaceResponse | None = None
89
96
  self.generation = 0
90
97
  self.active_compact_task: WorkspaceCompactTask | None = None
91
98
 
@@ -102,14 +109,13 @@ class WorkspaceRuntime:
102
109
  compacted_context,
103
110
  checkpoint,
104
111
  )
105
- return [
106
- message.model_dump()
107
- for message in [
112
+ return model_request_messages_data(
113
+ [
108
114
  *runtime_context_messages(self.cwd, state.settings.agent_prompt),
109
115
  *explicit_skill_messages(self.cwd, self.store, content),
110
116
  *chat_messages,
111
117
  ]
112
- ]
118
+ )
113
119
 
114
120
  async def save_context_checkpoint(
115
121
  self,
@@ -117,16 +123,17 @@ class WorkspaceRuntime:
117
123
  connection: ProviderConnection,
118
124
  context_window_limit: int,
119
125
  messages: list[StoredMessage],
120
- model_history: list[ChatMessage],
126
+ model_history: Sequence[ChatMessage | Mapping[str, object]],
121
127
  marker_content: str,
122
128
  source_message_id: str | None = None,
123
129
  trigger: Literal["manual", "auto"],
124
130
  ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo]:
131
+ compact_model_history = compact_prompt_chat_messages(model_history)
125
132
  compact_result = await self.compact_provider.compact(
126
133
  connection,
127
134
  CompactInput(
128
135
  messages=messages,
129
- model_history=model_history,
136
+ model_history=compact_model_history,
130
137
  retained_message_token_budget=AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET,
131
138
  trigger=trigger,
132
139
  ),
@@ -149,6 +156,7 @@ class WorkspaceRuntime:
149
156
  author="system",
150
157
  content=marker_content,
151
158
  id=str(uuid4()),
159
+ summary=compact_result.summary,
152
160
  usage_info=usage_info,
153
161
  )
154
162
  self.store.save_compaction_checkpoint(
@@ -183,13 +191,16 @@ class WorkspaceRuntime:
183
191
  *,
184
192
  connection: ProviderConnection,
185
193
  context_window_limit: int,
194
+ budget_messages: Sequence[ChatMessage | Mapping[str, object]] | None = None,
186
195
  messages: list[StoredMessage],
187
- model_history: list[ChatMessage],
196
+ model_history: Sequence[ChatMessage | Mapping[str, object]],
188
197
  source_message_id: str | None = None,
198
+ tools: Sequence[Mapping[str, object]] = (),
189
199
  ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
190
200
  if not should_auto_compact(
191
- model_history,
201
+ budget_messages or model_history,
192
202
  context_window=context_window_limit,
203
+ tools=tools,
193
204
  ):
194
205
  return None
195
206
  logger.info("Workspace auto compact requested")
@@ -218,7 +229,11 @@ class WorkspaceRuntime:
218
229
  )
219
230
  next_messages = [*state.messages, user_message]
220
231
  self.store.save_messages(next_messages)
221
- model_history = [
232
+ model_tool_specs = [
233
+ *tool_specs(),
234
+ *list(self.mcp_manager.tool_specs()),
235
+ ]
236
+ model_history: list[ChatMessage | Mapping[str, object]] = [
222
237
  *runtime_context_messages(self.cwd, state.settings.agent_prompt),
223
238
  *workspace_chat_messages(
224
239
  state.messages,
@@ -229,9 +244,13 @@ class WorkspaceRuntime:
229
244
  auto_compaction = await self.auto_compact_messages(
230
245
  connection=connection,
231
246
  context_window_limit=context_window_limit,
247
+ budget_messages=self.request_messages_for_content(
248
+ state, next_messages, content
249
+ ),
232
250
  messages=state.messages,
233
251
  model_history=model_history,
234
252
  source_message_id=None,
253
+ tools=model_tool_specs,
235
254
  )
236
255
  if auto_compaction is not None:
237
256
  marker, _, _ = auto_compaction
@@ -336,6 +355,7 @@ class WorkspaceRuntime:
336
355
  tool.model_dump(exclude_none=True)
337
356
  for tool in assistant_output.tools.values()
338
357
  ],
358
+ request_tools=model_tool_specs,
339
359
  model_context_window=context_window_limit,
340
360
  )
341
361
  self.store.save_usage_info(final_usage_info)
@@ -372,14 +392,14 @@ class WorkspaceRuntime:
372
392
  exc_info=(type(result), result, result.__traceback__),
373
393
  )
374
394
 
375
- async def stop_runs_for_shutdown(self) -> None:
395
+ async def stop_response_for_shutdown(self) -> None:
376
396
  tasks: list[asyncio.Task[None]] = []
377
- for run in self.runs.values():
378
- if run.task is None or run.task.done():
379
- continue
380
- run.task.cancel()
381
- tasks.append(run.task)
382
- await self.gather_shutdown_tasks("Workspace run", tasks)
397
+ response = self.active_response
398
+ if response is not None and response.task is not None:
399
+ if not response.task.done():
400
+ response.task.cancel()
401
+ tasks.append(response.task)
402
+ await self.gather_shutdown_tasks("Workspace response", tasks)
383
403
 
384
404
  async def stop_compact_for_shutdown(self) -> None:
385
405
  if self.active_compact_task is None:
@@ -393,64 +413,72 @@ class WorkspaceRuntime:
393
413
  self.store.save_is_compacting(False)
394
414
 
395
415
  async def stop_for_shutdown(self) -> None:
396
- await self.stop_runs_for_shutdown()
416
+ await self.stop_response_for_shutdown()
397
417
  await self.stop_compact_for_shutdown()
398
418
 
399
- def active_run(self) -> WorkspaceRun | None:
400
- if self.active_run_id is None:
419
+ def current_response(self) -> WorkspaceResponse | None:
420
+ response = self.active_response
421
+ if response is None or response.is_done:
401
422
  return None
402
- run = self.runs.get(self.active_run_id)
403
- if run is None or run.is_done:
404
- return None
405
- return run
423
+ return response
406
424
 
407
- def has_active_run(self) -> bool:
408
- return any(
409
- not run.is_done and run.task is not None and not run.task.done()
410
- for run in self.runs.values()
425
+ def has_active_response(self) -> bool:
426
+ response = self.active_response
427
+ return (
428
+ response is not None
429
+ and not response.is_done
430
+ and response.task is not None
431
+ and not response.task.done()
411
432
  )
412
433
 
413
434
  def clear(self) -> list[StoredMessage]:
414
435
  self.generation += 1
415
- for run in self.runs.values():
416
- run.is_done = True
417
- if run.task is not None and not run.task.done():
418
- run.discard_on_cancel = True
419
- run.task.cancel()
420
- self.active_run_id = None
436
+ response = self.active_response
437
+ if response is not None:
438
+ response.is_done = True
439
+ if response.task is not None and not response.task.done():
440
+ response.discard_on_cancel = True
441
+ response.task.cancel()
421
442
  return self.store.save_messages([])
422
443
 
423
- async def notify_cleared_runs(self) -> None:
424
- for run in self.runs.values():
425
- async with run.condition:
426
- run.condition.notify_all()
444
+ async def notify_cleared_response(self) -> None:
445
+ response = self.active_response
446
+ if response is None:
447
+ return
448
+ async with response.condition:
449
+ response.condition.notify_all()
427
450
 
428
451
  async def append_event(
429
- self, run: WorkspaceRun, event: str, data: dict[str, object]
452
+ self, response: WorkspaceResponse, event: str, data: dict[str, object]
430
453
  ) -> None:
431
- async with run.condition:
432
- run.events.append((run.latest_event_index + 1, event, data))
433
- run.condition.notify_all()
454
+ async with response.condition:
455
+ response.events.append((response.latest_event_index + 1, event, data))
456
+ response.condition.notify_all()
434
457
 
435
- async def append_snapshot(self, run: WorkspaceRun, message: StoredMessage) -> None:
458
+ async def append_snapshot(
459
+ self, response: WorkspaceResponse, message: StoredMessage
460
+ ) -> None:
436
461
  if message.author != "assistant":
437
462
  return
438
- run.latest_snapshot = message
463
+ response.latest_snapshot = message
439
464
  await self.append_event(
440
- run,
465
+ response,
441
466
  "snapshot",
442
- {"message": stream_message_data(message, run.active_output)},
467
+ {"message": stream_message_data(message, response.active_output)},
443
468
  )
444
469
 
445
- def create_run(
470
+ def start_response(
446
471
  self, content: str, *, message_id: str | None = None
447
- ) -> WorkspaceRun:
448
- if self.has_active_run():
449
- active_run = self.active_run()
472
+ ) -> WorkspaceResponse:
473
+ if self.has_active_response():
450
474
  raise HTTPException(
451
475
  status_code=409,
452
476
  detail="Response in progress",
453
- headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
477
+ )
478
+ if self.store.read_is_compacting():
479
+ raise HTTPException(
480
+ status_code=409,
481
+ detail="Context refining in progress. Please wait a moment.",
454
482
  )
455
483
  state = self.store.read_state()
456
484
  user_message_id = message_id or str(uuid4())
@@ -463,7 +491,7 @@ class WorkspaceRuntime:
463
491
  )
464
492
  next_messages = [*state.messages, user_message]
465
493
  self.store.save_messages(next_messages)
466
- return self._create_run_from_messages(
494
+ return self._start_response_from_messages(
467
495
  content=content,
468
496
  next_messages=next_messages,
469
497
  state=state,
@@ -476,13 +504,16 @@ class WorkspaceRuntime:
476
504
  *,
477
505
  action: Literal["resend", "save"],
478
506
  content: str,
479
- ) -> tuple[list[StoredMessage], WorkspaceRun | None]:
480
- if self.has_active_run():
481
- active_run = self.active_run()
507
+ ) -> tuple[list[StoredMessage], WorkspaceResponse | None]:
508
+ if self.has_active_response():
482
509
  raise HTTPException(
483
510
  status_code=409,
484
511
  detail="Response in progress",
485
- headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
512
+ )
513
+ if self.store.read_is_compacting():
514
+ raise HTTPException(
515
+ status_code=409,
516
+ detail="Context refining in progress. Please wait a moment.",
486
517
  )
487
518
  state = self.store.read_state()
488
519
  message_index = next(
@@ -513,50 +544,140 @@ class WorkspaceRuntime:
513
544
  previous_messages = state.messages[:message_index]
514
545
  next_messages = [*previous_messages, updated_message]
515
546
  self.store.save_messages(next_messages)
516
- run = self._create_run_from_messages(
547
+ response = self._start_response_from_messages(
517
548
  content=content,
518
549
  next_messages=next_messages,
519
550
  state=state.model_copy(update={"messages": previous_messages}),
520
551
  user_message=updated_message,
521
552
  )
522
- return next_messages, run
553
+ return next_messages, response
554
+
555
+ def retry_error(
556
+ self,
557
+ message_id: str,
558
+ *,
559
+ error_id: str,
560
+ ) -> tuple[list[StoredMessage], WorkspaceResponse]:
561
+ if self.has_active_response():
562
+ raise HTTPException(
563
+ status_code=409,
564
+ detail="Response in progress",
565
+ )
566
+ if self.store.read_is_compacting():
567
+ raise HTTPException(
568
+ status_code=409,
569
+ detail="Context refining in progress. Please wait a moment.",
570
+ )
571
+ state = self.store.read_state()
572
+ message_index = next(
573
+ (
574
+ index
575
+ for index, message in enumerate(state.messages)
576
+ if message.id == message_id
577
+ ),
578
+ -1,
579
+ )
580
+ if message_index < 0:
581
+ raise HTTPException(status_code=404, detail="Message not found.")
582
+ message = state.messages[message_index]
583
+ if message.author != "assistant":
584
+ raise HTTPException(
585
+ status_code=400, detail="Only assistant errors can be retried."
586
+ )
587
+ previous_user_message = next(
588
+ (
589
+ current_message
590
+ for current_message in reversed(state.messages[:message_index])
591
+ if current_message.author == "user"
592
+ ),
593
+ None,
594
+ )
595
+ if previous_user_message is None:
596
+ raise HTTPException(status_code=400, detail="Message history is invalid.")
597
+ trimmed_message = trim_assistant_message_at_error(
598
+ message,
599
+ error_id,
600
+ status="running",
601
+ )
602
+ if trimmed_message is None:
603
+ raise HTTPException(status_code=404, detail="Error block not found.")
523
604
 
524
- def _create_run_from_messages(
605
+ previous_messages = state.messages[:message_index]
606
+ next_messages = [*previous_messages, trimmed_message]
607
+ self.store.save_messages(next_messages)
608
+ state_before_assistant = state.model_copy(
609
+ update={"messages": previous_messages}
610
+ )
611
+ base_request_messages = self.request_messages_for_content(
612
+ state_before_assistant,
613
+ previous_messages,
614
+ previous_user_message.content,
615
+ )
616
+ request_messages = [
617
+ *base_request_messages,
618
+ *model_visible_assistant_output_messages(trimmed_message),
619
+ ]
620
+ response = self._start_response_from_messages(
621
+ content=previous_user_message.content,
622
+ initial_assistant_message=trimmed_message,
623
+ next_messages=next_messages,
624
+ output_start_index=assistant_retry_output_start_index(trimmed_message),
625
+ request_messages=request_messages,
626
+ state=state_before_assistant,
627
+ usage_request_messages=base_request_messages,
628
+ user_message=previous_user_message,
629
+ )
630
+ return next_messages, response
631
+
632
+ def _start_response_from_messages(
525
633
  self,
526
634
  *,
527
635
  content: str,
636
+ initial_assistant_message: StoredMessage | None = None,
528
637
  next_messages: list[StoredMessage],
638
+ output_start_index: int = 1,
639
+ request_messages: list[dict[str, object]] | None = None,
529
640
  state: StoredState,
641
+ usage_request_messages: list[dict[str, object]] | None = None,
530
642
  user_message: StoredMessage,
531
- ) -> WorkspaceRun:
643
+ ) -> WorkspaceResponse:
532
644
  connection = selected_connection(state)
533
645
  context_window_limit = context_window_for_settings(state.settings)
534
- run = WorkspaceRun(
646
+ response = WorkspaceResponse(
535
647
  condition=asyncio.Condition(),
536
648
  generation=self.generation,
537
649
  )
538
- self.runs[run.id] = run
539
- self.active_run_id = run.id
650
+ self.active_response = response
540
651
 
541
- async def run_task() -> None:
652
+ async def response_task() -> None:
542
653
  nonlocal next_messages
543
- assistant_message = StoredMessage(
544
- author="assistant",
545
- content="",
546
- id=str(uuid4()),
547
- status="running",
654
+ assistant_message = (
655
+ initial_assistant_message
656
+ if initial_assistant_message is not None
657
+ else StoredMessage(
658
+ author="assistant",
659
+ content="",
660
+ id=str(uuid4()),
661
+ status="running",
662
+ )
548
663
  )
549
- assistant_output = AssistantOutputBuilder(assistant_message.id)
664
+ assistant_output = (
665
+ AssistantOutputBuilder.from_message(assistant_message)
666
+ if initial_assistant_message is not None
667
+ else AssistantOutputBuilder(assistant_message.id)
668
+ )
669
+ initial_assistant_content = assistant_output.content
670
+ initial_assistant_thinking = assistant_output.thinking
550
671
  last_progress_flush_at = 0.0
551
672
 
552
673
  def is_current_generation() -> bool:
553
- return run.generation == self.generation
674
+ return response.generation == self.generation
554
675
 
555
676
  def update_assistant_message(
556
677
  status: str = "running", *, persist: bool
557
678
  ) -> StoredMessage | None:
558
679
  nonlocal next_messages, assistant_message
559
- if not is_current_generation() or run.discard_on_cancel:
680
+ if not is_current_generation() or response.discard_on_cancel:
560
681
  return None
561
682
  assistant_message = StoredMessage(
562
683
  author="assistant",
@@ -603,43 +724,90 @@ class WorkspaceRuntime:
603
724
  turn_usage_info: TokenUsageInfo | None = None
604
725
  current_output_index = 0
605
726
  latest_usage_output_index: int | None = None
606
- current_request_messages = self.request_messages_for_content(
607
- state,
608
- next_messages,
609
- content,
610
- )
611
- pre_turn_request_messages = self.request_messages_for_content(
612
- state,
613
- state.messages,
614
- content,
615
- )
616
- auto_compaction = await self.auto_compact_messages(
617
- connection=connection,
618
- context_window_limit=context_window_limit,
619
- messages=state.messages,
620
- model_history=[
621
- ChatMessage.model_validate(message)
622
- for message in pre_turn_request_messages
623
- ],
624
- source_message_id=None,
625
- )
626
- if auto_compaction is not None:
627
- marker, _, usage_info = auto_compaction
628
- next_messages = [*state.messages, marker, user_message]
629
- self.store.save_messages(next_messages)
630
- await self.append_event(
631
- run,
632
- "context_optimized",
633
- {
634
- "message": marker.model_dump(),
635
- **usage_event_data(usage_info),
636
- },
637
- )
727
+ model_tool_specs = [
728
+ *tool_specs(),
729
+ *list(self.mcp_manager.tool_specs()),
730
+ ]
731
+ if request_messages is None:
638
732
  current_request_messages = self.request_messages_for_content(
639
733
  state,
640
734
  next_messages,
641
735
  content,
642
736
  )
737
+ pre_turn_request_messages = self.request_messages_for_content(
738
+ state,
739
+ state.messages,
740
+ content,
741
+ )
742
+ auto_compaction = await self.auto_compact_messages(
743
+ connection=connection,
744
+ context_window_limit=context_window_limit,
745
+ budget_messages=current_request_messages,
746
+ messages=state.messages,
747
+ model_history=pre_turn_request_messages,
748
+ source_message_id=None,
749
+ tools=model_tool_specs,
750
+ )
751
+ if auto_compaction is not None:
752
+ marker, _, usage_info = auto_compaction
753
+ next_messages = [*state.messages, marker, user_message]
754
+ self.store.save_messages(next_messages)
755
+ await self.append_event(
756
+ response,
757
+ "context_optimized",
758
+ {
759
+ "message": marker.model_dump(),
760
+ **usage_event_data(usage_info),
761
+ },
762
+ )
763
+ current_request_messages = self.request_messages_for_content(
764
+ state,
765
+ next_messages,
766
+ content,
767
+ )
768
+ else:
769
+ current_request_messages = request_messages
770
+ auto_compaction = await self.auto_compact_messages(
771
+ connection=connection,
772
+ context_window_limit=context_window_limit,
773
+ messages=next_messages,
774
+ model_history=compact_prompt_chat_messages(
775
+ current_request_messages
776
+ ),
777
+ source_message_id=assistant_message.id,
778
+ tools=model_tool_specs,
779
+ )
780
+ if auto_compaction is not None:
781
+ marker, replacement_history, usage_info = auto_compaction
782
+ assistant_message = assistant_message.model_copy(
783
+ update={"usage_info": usage_info}
784
+ )
785
+ next_messages = append_or_replace_message(
786
+ [*next_messages, marker], assistant_message
787
+ )
788
+ self.store.save_messages(next_messages)
789
+ await self.append_event(
790
+ response,
791
+ "context_optimized",
792
+ {
793
+ "message": marker.model_dump(),
794
+ **usage_event_data(usage_info),
795
+ },
796
+ )
797
+ current_request_messages = model_request_messages_data(
798
+ [
799
+ *runtime_context_messages(
800
+ self.cwd, state.settings.agent_prompt
801
+ ),
802
+ *explicit_skill_messages(self.cwd, self.store, content),
803
+ *replacement_history,
804
+ ]
805
+ )
806
+ context_usage_messages = (
807
+ usage_request_messages
808
+ if usage_request_messages is not None
809
+ else current_request_messages
810
+ )
643
811
 
644
812
  async def review_tool_approval(request: ApprovalReviewRequest):
645
813
  return await review_approval_request(
@@ -672,7 +840,7 @@ class WorkspaceRuntime:
672
840
  conversation: Sequence[Mapping[str, object]],
673
841
  ) -> AgentContextUpdate | None:
674
842
  nonlocal next_messages
675
- if not is_current_generation() or run.discard_on_cancel:
843
+ if not is_current_generation() or response.discard_on_cancel:
676
844
  return None
677
845
  assistant_snapshot = StoredMessage(
678
846
  author="assistant",
@@ -684,35 +852,13 @@ class WorkspaceRuntime:
684
852
  tools=list(assistant_output.tools.values()),
685
853
  usage_info=self.store.read_usage_info(),
686
854
  )
687
- model_history: list[ChatMessage] = []
688
- for message in conversation:
689
- role_value = message.get("role")
690
- content = str(message.get("content") or "")
691
- if role_value == "system":
692
- model_history.append(
693
- ChatMessage(role="system", content=content)
694
- )
695
- if role_value == "user":
696
- model_history.append(
697
- ChatMessage(role="user", content=content)
698
- )
699
- if role_value == "assistant":
700
- model_history.append(
701
- ChatMessage(role="assistant", content=content)
702
- )
703
- if role_value == "tool":
704
- model_history.append(
705
- ChatMessage(
706
- role="user",
707
- content=f"Tool result: {content}",
708
- )
709
- )
710
855
  auto_result = await self.auto_compact_messages(
711
856
  connection=connection,
712
857
  context_window_limit=context_window_limit,
713
858
  messages=next_messages,
714
- model_history=model_history,
859
+ model_history=compact_prompt_chat_messages(conversation),
715
860
  source_message_id=assistant_snapshot.id,
861
+ tools=model_tool_specs,
716
862
  )
717
863
  if auto_result is None:
718
864
  return None
@@ -747,14 +893,18 @@ class WorkspaceRuntime:
747
893
  messages=current_request_messages,
748
894
  tool_runner=tool_runner,
749
895
  ):
750
- if not is_current_generation() or run.discard_on_cancel:
896
+ if not is_current_generation() or response.discard_on_cancel:
751
897
  raise asyncio.CancelledError
752
898
  run_event_data = event.data
753
899
  should_append_run_event = event.event != "usage"
754
900
  snapshot_after_event: StoredMessage | None = None
755
901
  if event.event == "start":
756
902
  event_id = event.data.get("id")
757
- if isinstance(event_id, str):
903
+ if initial_assistant_message is not None:
904
+ assistant_output.set_assistant_id(assistant_message.id)
905
+ run_event_data = {"id": assistant_message.id}
906
+ snapshot_after_event = persist_assistant()
907
+ elif isinstance(event_id, str):
758
908
  assistant_message = assistant_message.model_copy(
759
909
  update={"id": event_id}
760
910
  )
@@ -763,16 +913,24 @@ class WorkspaceRuntime:
763
913
  if event.event == "output_start":
764
914
  index = event.data.get("index")
765
915
  if isinstance(index, int):
766
- current_output_index = index
767
- run.active_output = None
768
- assistant_output.start_group(index)
916
+ output_index = index + output_start_index - 1
917
+ current_output_index = output_index
918
+ run_event_data = {**event.data, "index": output_index}
919
+ response.active_output = None
920
+ assistant_output.start_group(output_index)
769
921
  snapshot_after_event = persist_assistant()
770
922
  if event.event == "output_done":
771
- run.active_output = None
923
+ index = event.data.get("index")
924
+ if isinstance(index, int):
925
+ run_event_data = {
926
+ **event.data,
927
+ "index": index + output_start_index - 1,
928
+ }
929
+ response.active_output = None
772
930
  if event.event == "tool_start":
773
931
  tool = event.data.get("tool")
774
932
  if isinstance(tool, dict) and isinstance(tool.get("id"), str):
775
- run.active_output = None
933
+ response.active_output = None
776
934
  current_tool_id = tool["id"]
777
935
  assistant_output.start_tool(
778
936
  StoredToolItem.model_validate(tool)
@@ -790,13 +948,13 @@ class WorkspaceRuntime:
790
948
  assistant_output.update_tool(tool_id, event.data)
791
949
  snapshot_after_event = persist_assistant()
792
950
  if event.event == "delta":
793
- run.active_output = "text"
951
+ response.active_output = "text"
794
952
  assistant_output.append_text(
795
953
  str(event.data.get("content") or "")
796
954
  )
797
955
  snapshot_after_event = persist_assistant_progress()
798
956
  if event.event == "thinking_delta":
799
- run.active_output = "thinking"
957
+ response.active_output = "thinking"
800
958
  assistant_output.append_thinking(
801
959
  str(event.data.get("content") or "")
802
960
  )
@@ -824,8 +982,12 @@ class WorkspaceRuntime:
824
982
  if event.event == "done":
825
983
  message = event.data.get("message")
826
984
  if isinstance(message, dict):
827
- run.active_output = None
828
- assistant_output.apply_done_message(message)
985
+ response.active_output = None
986
+ assistant_output.apply_done_message(
987
+ message,
988
+ content_prefix=initial_assistant_content,
989
+ thinking_prefix=initial_assistant_thinking,
990
+ )
829
991
  response_usage_info = self.store.read_usage_info()
830
992
  final_usage_info = turn_usage_info
831
993
  if (
@@ -834,12 +996,13 @@ class WorkspaceRuntime:
834
996
  ):
835
997
  final_usage_info = update_context_usage_for_response(
836
998
  final_usage_info or response_usage_info,
837
- messages=current_request_messages,
999
+ messages=context_usage_messages,
838
1000
  output_content=assistant_output.content,
839
1001
  output_tools=[
840
1002
  tool.model_dump(exclude_none=True)
841
1003
  for tool in assistant_output.tools.values()
842
1004
  ],
1005
+ request_tools=model_tool_specs,
843
1006
  model_context_window=context_window_limit,
844
1007
  )
845
1008
  self.store.save_usage_info(final_usage_info)
@@ -849,27 +1012,35 @@ class WorkspaceRuntime:
849
1012
  "message": stream_message_data(snapshot_after_event)
850
1013
  }
851
1014
  if event.event == "done" and snapshot_after_event is not None:
852
- await self.append_snapshot(run, snapshot_after_event)
853
- await self.append_event(run, event.event, run_event_data)
1015
+ await self.append_snapshot(response, snapshot_after_event)
1016
+ await self.append_event(response, event.event, run_event_data)
854
1017
  else:
855
1018
  if should_append_run_event:
856
- await self.append_event(run, event.event, run_event_data)
1019
+ await self.append_event(
1020
+ response, event.event, run_event_data
1021
+ )
857
1022
  if snapshot_after_event is not None:
858
- await self.append_snapshot(run, snapshot_after_event)
1023
+ await self.append_snapshot(response, snapshot_after_event)
859
1024
  except asyncio.CancelledError:
860
- logger.info("Workspace run stopped")
861
- if not run.discard_on_cancel:
1025
+ logger.info("Workspace response stopped")
1026
+ if not response.discard_on_cancel:
862
1027
  interrupted_snapshot = persist_assistant("interrupted")
863
1028
  if interrupted_snapshot is not None:
864
- await self.append_snapshot(run, interrupted_snapshot)
1029
+ await self.append_snapshot(response, interrupted_snapshot)
865
1030
  await self.append_event(
866
- run,
1031
+ response,
867
1032
  "error",
868
1033
  {"message": "Response stopped."},
869
1034
  )
870
1035
  raise
871
1036
  except Exception as error:
872
1037
  logger.exception("Workspace response failed")
1038
+ if is_context_window_error(error):
1039
+ usage_info = full_context_usage(
1040
+ self.store.read_usage_info(),
1041
+ model_context_window=context_window_limit,
1042
+ )
1043
+ self.store.save_usage_info(usage_info)
873
1044
  if (
874
1045
  current_tool_id is not None
875
1046
  and current_tool_id in assistant_output.tools
@@ -877,7 +1048,10 @@ class WorkspaceRuntime:
877
1048
  ):
878
1049
  assistant_output.update_tool(
879
1050
  current_tool_id,
880
- {"content": str(error) or "Tool failed.", "status": "failed"},
1051
+ {
1052
+ "result": text_tool_result(str(error) or "Tool failed."),
1053
+ "status": "failed",
1054
+ },
881
1055
  )
882
1056
  error_item = assistant_output.append_error(
883
1057
  run_error_output_item(
@@ -887,23 +1061,30 @@ class WorkspaceRuntime:
887
1061
  )
888
1062
  failed_snapshot = persist_assistant("failed")
889
1063
  if failed_snapshot is not None:
890
- await self.append_snapshot(run, failed_snapshot)
891
- await self.append_event(run, "error", run_error_event_data(error_item))
1064
+ await self.append_snapshot(response, failed_snapshot)
1065
+ await self.append_event(
1066
+ response, "error", run_error_event_data(error_item)
1067
+ )
892
1068
  finally:
893
- run.is_done = True
894
- async with run.condition:
895
- run.condition.notify_all()
896
- if self.active_run_id == run.id:
897
- self.active_run_id = None
1069
+ response.is_done = True
1070
+ async with response.condition:
1071
+ response.condition.notify_all()
1072
+ if self.active_response is response:
1073
+ self.active_response = None
898
1074
 
899
- run.task = asyncio.create_task(run_task())
900
- return run
1075
+ response.task = asyncio.create_task(response_task())
1076
+ return response
901
1077
 
902
- async def run_stream(
903
- self, run: WorkspaceRun, after: int = 0, include_snapshots: bool = True
1078
+ async def response_stream(
1079
+ self,
1080
+ response: WorkspaceResponse,
1081
+ after: int = 0,
1082
+ include_snapshots: bool = True,
904
1083
  ) -> AsyncIterator[str]:
905
1084
  next_event_index = after + 1
906
- reconnect_snapshot = run_snapshot_data_at(run, after) if after > 0 else None
1085
+ reconnect_snapshot = (
1086
+ response_snapshot_data_at(response, after) if after > 0 else None
1087
+ )
907
1088
  if include_snapshots and reconnect_snapshot is not None:
908
1089
  yield stream_event(
909
1090
  "snapshot",
@@ -911,15 +1092,17 @@ class WorkspaceRuntime:
911
1092
  event_id=after,
912
1093
  )
913
1094
  while True:
914
- async with run.condition:
1095
+ async with response.condition:
915
1096
 
916
1097
  def has_next_event(index: int = next_event_index) -> bool:
917
- return run.is_done or any(
918
- event_index >= index for event_index, _, _ in run.events
1098
+ return response.is_done or any(
1099
+ event_index >= index for event_index, _, _ in response.events
919
1100
  )
920
1101
 
921
- await run.condition.wait_for(has_next_event)
922
- events = [event for event in run.events if event[0] >= next_event_index]
1102
+ await response.condition.wait_for(has_next_event)
1103
+ events = [
1104
+ event for event in response.events if event[0] >= next_event_index
1105
+ ]
923
1106
 
924
1107
  for index, event, data in events:
925
1108
  next_event_index = index + 1
@@ -929,19 +1112,23 @@ class WorkspaceRuntime:
929
1112
  if event in {"done", "error"}:
930
1113
  return
931
1114
 
932
- if run.is_done and not events:
1115
+ if response.is_done and not events:
933
1116
  return
934
1117
 
935
- def run_by_id(self, run_id: str) -> WorkspaceRun:
936
- run = self.runs.get(run_id)
937
- if run is None:
938
- raise HTTPException(status_code=404, detail="Run not found.")
939
- return run
1118
+ def stream_current_response(self) -> WorkspaceResponse:
1119
+ response = self.current_response()
1120
+ if response is None:
1121
+ raise HTTPException(status_code=404, detail="Response not found.")
1122
+ return response
940
1123
 
941
- def stop_run(self, run_id: str) -> None:
942
- run = self.run_by_id(run_id)
943
- if run.task is not None and not run.task.done():
944
- run.task.cancel()
1124
+ def stop_response(self) -> None:
1125
+ response = self.current_response()
1126
+ if (
1127
+ response is not None
1128
+ and response.task is not None
1129
+ and not response.task.done()
1130
+ ):
1131
+ response.task.cancel()
945
1132
 
946
1133
  def compact_stream(self) -> AsyncIterator[str]:
947
1134
  async def run_manual_compact(
@@ -953,7 +1140,7 @@ class WorkspaceRuntime:
953
1140
  ) -> tuple[StoredMessage, TokenUsageInfo]:
954
1141
  logger.info("Workspace compact requested")
955
1142
  try:
956
- model_history = [
1143
+ model_history: list[ChatMessage | Mapping[str, object]] = [
957
1144
  *runtime_context_messages(self.cwd, state.settings.agent_prompt),
958
1145
  *workspace_chat_messages(
959
1146
  state.messages,
@@ -999,7 +1186,7 @@ class WorkspaceRuntime:
999
1186
  self.active_compact_task = None
1000
1187
 
1001
1188
  if self.active_compact_task is None:
1002
- if self.active_run() is not None:
1189
+ if self.current_response() is not None:
1003
1190
  raise HTTPException(
1004
1191
  status_code=409,
1005
1192
  detail="Compact is unavailable while Flowent is responding.",
@@ -1023,10 +1210,31 @@ class WorkspaceRuntime:
1023
1210
  async def compact_events() -> AsyncIterator[str]:
1024
1211
  try:
1025
1212
  marker, usage_info = await asyncio.shield(compact_task)
1026
- except Exception:
1213
+ except Exception as error:
1214
+ assistant_id = str(uuid4())
1215
+ assistant_output = AssistantOutputBuilder(assistant_id)
1216
+ error_item = run_error_output_item(assistant_id, str(error)).model_copy(
1217
+ update={"message": USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE}
1218
+ )
1219
+ assistant_output.append_error(error_item)
1220
+ failed_message = StoredMessage(
1221
+ author="assistant",
1222
+ content="",
1223
+ groups=assistant_output.groups,
1224
+ id=assistant_id,
1225
+ status="failed",
1226
+ )
1227
+ self.store.save_messages(
1228
+ [*self.store.read_state().messages, failed_message]
1229
+ )
1230
+ failed_message_data = stream_message_data(failed_message)
1231
+ yield stream_event("snapshot", {"message": failed_message_data})
1027
1232
  yield stream_event(
1028
1233
  "error",
1029
- {"message": "Context could not be compacted."},
1234
+ {
1235
+ "error": error_item.model_dump(exclude_none=True),
1236
+ "message": USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE,
1237
+ },
1030
1238
  )
1031
1239
  return
1032
1240