flowent 0.2.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +3 -3
  2. package/backend/README.md +3 -3
  3. package/backend/pyproject.toml +1 -1
  4. package/backend/src/flowent/agent.py +1 -1
  5. package/backend/src/flowent/api_models.py +108 -0
  6. package/backend/src/flowent/app.py +151 -0
  7. package/backend/src/flowent/cli.py +13 -4
  8. package/backend/src/flowent/compact.py +34 -13
  9. package/backend/src/flowent/llm.py +52 -6
  10. package/backend/src/flowent/main.py +18 -1994
  11. package/backend/src/flowent/mcp.py +100 -2
  12. package/backend/src/flowent/network.py +5 -0
  13. package/backend/src/flowent/provider_connections.py +42 -0
  14. package/backend/src/flowent/routes/__init__.py +0 -0
  15. package/backend/src/flowent/routes/integrations.py +105 -0
  16. package/backend/src/flowent/routes/permissions.py +36 -0
  17. package/backend/src/flowent/routes/providers.py +53 -0
  18. package/backend/src/flowent/routes/system.py +48 -0
  19. package/backend/src/flowent/routes/workflow_routes.py +63 -0
  20. package/backend/src/flowent/routes/workspace.py +115 -0
  21. package/backend/src/flowent/state/__init__.py +53 -0
  22. package/backend/src/flowent/state/models.py +258 -0
  23. package/backend/src/flowent/state/schema.py +191 -0
  24. package/backend/src/flowent/state/store.py +1019 -0
  25. package/backend/src/flowent/static/assets/index-BaZmIi2Y.js +98 -0
  26. package/backend/src/flowent/static/assets/index-EC37agAH.css +2 -0
  27. package/backend/src/flowent/static/index.html +2 -2
  28. package/backend/src/flowent/storage.py +52 -1318
  29. package/backend/src/flowent/system_tools.py +25 -0
  30. package/backend/src/flowent/tools.py +4 -2
  31. package/backend/src/flowent/usage.py +9 -4
  32. package/backend/src/flowent/workflows.py +282 -0
  33. package/backend/src/flowent/workspace/__init__.py +0 -0
  34. package/backend/src/flowent/workspace/context.py +335 -0
  35. package/backend/src/flowent/workspace/events.py +178 -0
  36. package/backend/src/flowent/workspace/output.py +396 -0
  37. package/backend/src/flowent/workspace/runtime.py +1160 -0
  38. package/backend/uv.lock +1 -1
  39. package/dist/frontend/assets/index-BaZmIi2Y.js +98 -0
  40. package/dist/frontend/assets/index-EC37agAH.css +2 -0
  41. package/dist/frontend/index.html +2 -2
  42. package/package.json +1 -1
  43. package/backend/src/flowent/static/assets/index-BH30iLzb.css +0 -2
  44. package/backend/src/flowent/static/assets/index-sBFt3ORj.js +0 -84
  45. package/dist/frontend/assets/index-BH30iLzb.css +0 -2
  46. package/dist/frontend/assets/index-sBFt3ORj.js +0 -84
@@ -0,0 +1,1160 @@
1
+ import asyncio
2
+ import logging
3
+ import time
4
+ from collections.abc import AsyncIterator, Mapping, Sequence
5
+ from contextlib import suppress
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Any, Literal
9
+ from uuid import uuid4
10
+
11
+ from fastapi import HTTPException
12
+
13
+ from flowent.agent import AgentContextUpdate, run_agent_stream
14
+ from flowent.approval import ApprovalReviewRequest, review_approval_request
15
+ from flowent.compact import CompactInput, CompactProvider
16
+ from flowent.context import runtime_context_messages
17
+ from flowent.llm import ChatMessage, CompletionCallable, ProviderConnection
18
+ from flowent.logging import TRACE_LEVEL
19
+ from flowent.mcp import McpManager
20
+ from flowent.permissions import run_tool_with_path_permissions
21
+ from flowent.provider_connections import selected_connection
22
+ from flowent.skills import explicit_skill_messages
23
+ from flowent.storage import (
24
+ StateStore,
25
+ StoredCompactionCheckpoint,
26
+ StoredMessage,
27
+ StoredState,
28
+ StoredToolItem,
29
+ )
30
+ from flowent.tools import ToolContext
31
+ from flowent.usage import (
32
+ TokenUsage,
33
+ TokenUsageInfo,
34
+ append_token_usage,
35
+ recompute_context_usage,
36
+ )
37
+ from flowent.workspace.context import (
38
+ COMPACTED_CONTEXT_MARKER,
39
+ OPTIMIZED_CONTEXT_MARKER,
40
+ compact_prompt_chat_messages,
41
+ context_window_for_settings,
42
+ model_request_messages_data,
43
+ model_visible_assistant_output_messages,
44
+ should_auto_compact,
45
+ update_context_usage_for_response,
46
+ usage_event_data,
47
+ workspace_chat_messages,
48
+ )
49
+ from flowent.workspace.events import (
50
+ WorkspaceResponse,
51
+ append_or_replace_message,
52
+ response_snapshot_data_at,
53
+ stream_event,
54
+ stream_message_data,
55
+ )
56
+ from flowent.workspace.output import (
57
+ EMPTY_MODEL_RESPONSE_DETAIL,
58
+ AssistantOutputBuilder,
59
+ approval_transcript,
60
+ assistant_retry_output_start_index,
61
+ run_error_event_data,
62
+ run_error_output_item,
63
+ trim_assistant_message_at_error,
64
+ )
65
+
66
+ logger = logging.getLogger("flowent.workspace.runtime")
67
+
68
+ AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
69
+ WORKSPACE_PROGRESS_FLUSH_INTERVAL_SECONDS = 0.5
70
+
71
+
72
+ @dataclass
73
+ class WorkspaceCompactTask:
74
+ task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]]
75
+
76
+
77
+ class WorkspaceRuntime:
78
+ def __init__(
79
+ self,
80
+ *,
81
+ chat_completion: CompletionCallable | None,
82
+ compact_provider: CompactProvider,
83
+ cwd: Path,
84
+ mcp_manager: McpManager,
85
+ store: StateStore,
86
+ ) -> None:
87
+ self.chat_completion = chat_completion
88
+ self.compact_provider = compact_provider
89
+ self.cwd = cwd
90
+ self.mcp_manager = mcp_manager
91
+ self.store = store
92
+ self.active_response: WorkspaceResponse | None = None
93
+ self.generation = 0
94
+ self.active_compact_task: WorkspaceCompactTask | None = None
95
+
96
+ def request_messages_for_content(
97
+ self,
98
+ state: StoredState,
99
+ messages: list[StoredMessage],
100
+ content: str,
101
+ ) -> list[dict[str, object]]:
102
+ compacted_context = self.store.read_compacted_context()
103
+ checkpoint = self.store.read_active_compaction_checkpoint()
104
+ chat_messages = workspace_chat_messages(
105
+ messages,
106
+ compacted_context,
107
+ checkpoint,
108
+ )
109
+ return model_request_messages_data(
110
+ [
111
+ *runtime_context_messages(self.cwd, state.settings.agent_prompt),
112
+ *explicit_skill_messages(self.cwd, self.store, content),
113
+ *chat_messages,
114
+ ]
115
+ )
116
+
117
+ async def save_context_checkpoint(
118
+ self,
119
+ *,
120
+ connection: ProviderConnection,
121
+ context_window_limit: int,
122
+ messages: list[StoredMessage],
123
+ model_history: Sequence[ChatMessage | Mapping[str, object]],
124
+ marker_content: str,
125
+ source_message_id: str | None = None,
126
+ trigger: Literal["manual", "auto"],
127
+ ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo]:
128
+ compact_model_history = compact_prompt_chat_messages(model_history)
129
+ compact_result = await self.compact_provider.compact(
130
+ connection,
131
+ CompactInput(
132
+ messages=messages,
133
+ model_history=compact_model_history,
134
+ retained_message_token_budget=AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET,
135
+ trigger=trigger,
136
+ ),
137
+ completion=self.chat_completion,
138
+ )
139
+ usage_info = self.store.read_usage_info()
140
+ if compact_result.summary_usage is not None:
141
+ usage_info = append_token_usage(
142
+ usage_info,
143
+ compact_result.summary_usage,
144
+ model_context_window=context_window_limit,
145
+ )
146
+ usage_info = recompute_context_usage(
147
+ usage_info,
148
+ compact_result.token_after,
149
+ model_context_window=context_window_limit,
150
+ )
151
+ self.store.save_usage_info(usage_info)
152
+ marker = StoredMessage(
153
+ author="system",
154
+ content=marker_content,
155
+ id=str(uuid4()),
156
+ summary=compact_result.summary,
157
+ usage_info=usage_info,
158
+ )
159
+ self.store.save_compaction_checkpoint(
160
+ StoredCompactionCheckpoint(
161
+ id=str(uuid4()),
162
+ method=compact_result.method,
163
+ replacement_history=compact_result.replacement_history,
164
+ source_message_id=source_message_id or marker.id,
165
+ summary=compact_result.summary,
166
+ token_after=compact_result.token_after,
167
+ token_before=compact_result.token_before,
168
+ trigger=trigger,
169
+ )
170
+ )
171
+ logger.info(
172
+ "Workspace compact checkpoint saved trigger=%s method=%s summary_length=%s token_before=%s token_after=%s",
173
+ trigger,
174
+ compact_result.method,
175
+ len(compact_result.summary),
176
+ compact_result.token_before,
177
+ compact_result.token_after,
178
+ )
179
+ logger.log(TRACE_LEVEL, "Workspace compact summary=%r", compact_result.summary)
180
+ return (
181
+ marker,
182
+ [message.model_dump() for message in compact_result.replacement_history],
183
+ usage_info,
184
+ )
185
+
186
+ async def auto_compact_messages(
187
+ self,
188
+ *,
189
+ connection: ProviderConnection,
190
+ context_window_limit: int,
191
+ messages: list[StoredMessage],
192
+ model_history: Sequence[ChatMessage | Mapping[str, object]],
193
+ source_message_id: str | None = None,
194
+ ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
195
+ if not should_auto_compact(
196
+ model_history,
197
+ context_window=context_window_limit,
198
+ ):
199
+ return None
200
+ logger.info("Workspace auto compact requested")
201
+ try:
202
+ return await self.save_context_checkpoint(
203
+ connection=connection,
204
+ context_window_limit=context_window_limit,
205
+ marker_content=OPTIMIZED_CONTEXT_MARKER,
206
+ messages=messages,
207
+ model_history=model_history,
208
+ source_message_id=source_message_id,
209
+ trigger="auto",
210
+ )
211
+ except Exception as error:
212
+ logger.exception("Workspace auto compact failed")
213
+ raise RuntimeError("Context could not be optimized.") from error
214
+
215
+ async def run_turn(self, content: str) -> StoredMessage:
216
+ state = self.store.read_state()
217
+ connection = selected_connection(state)
218
+ context_window_limit = context_window_for_settings(state.settings)
219
+ user_message = StoredMessage(
220
+ author="user",
221
+ content=content,
222
+ id=str(uuid4()),
223
+ )
224
+ next_messages = [*state.messages, user_message]
225
+ self.store.save_messages(next_messages)
226
+ model_history: list[ChatMessage | Mapping[str, object]] = [
227
+ *runtime_context_messages(self.cwd, state.settings.agent_prompt),
228
+ *workspace_chat_messages(
229
+ state.messages,
230
+ self.store.read_compacted_context(),
231
+ self.store.read_active_compaction_checkpoint(),
232
+ ),
233
+ ]
234
+ auto_compaction = await self.auto_compact_messages(
235
+ connection=connection,
236
+ context_window_limit=context_window_limit,
237
+ messages=state.messages,
238
+ model_history=model_history,
239
+ source_message_id=None,
240
+ )
241
+ if auto_compaction is not None:
242
+ marker, _, _ = auto_compaction
243
+ next_messages = [*state.messages, marker, user_message]
244
+ self.store.save_messages(next_messages)
245
+ request_messages = self.request_messages_for_content(
246
+ state, next_messages, content
247
+ )
248
+ assistant_id = str(uuid4())
249
+ assistant_output = AssistantOutputBuilder(assistant_id)
250
+ turn_usage_info: TokenUsageInfo | None = None
251
+ current_output_index = 0
252
+ latest_usage_output_index: int | None = None
253
+
254
+ async def review_tool_approval(request: ApprovalReviewRequest):
255
+ return await review_approval_request(
256
+ connection,
257
+ request.model_copy(
258
+ update={
259
+ "transcript": approval_transcript(next_messages),
260
+ "user_request": content,
261
+ }
262
+ ),
263
+ completion=self.chat_completion,
264
+ )
265
+
266
+ async def tool_runner(
267
+ name: str,
268
+ arguments: dict[str, object],
269
+ context: ToolContext,
270
+ ):
271
+ return await run_tool_with_path_permissions(
272
+ name,
273
+ arguments,
274
+ context,
275
+ review_approval=review_tool_approval,
276
+ writable_paths=[
277
+ Path(path.path) for path in self.store.read_writable_paths()
278
+ ],
279
+ )
280
+
281
+ async for event in run_agent_stream(
282
+ completion=self.chat_completion,
283
+ connection=connection,
284
+ cwd=self.cwd,
285
+ extra_tool_runner=self.mcp_manager.run_tool,
286
+ extra_tool_specs=self.mcp_manager.tool_specs(),
287
+ extra_tool_title=self.mcp_manager.tool_title,
288
+ messages=request_messages,
289
+ tool_runner=tool_runner,
290
+ ):
291
+ if event.event == "start":
292
+ event_id = event.data.get("id")
293
+ if isinstance(event_id, str):
294
+ assistant_id = event_id
295
+ assistant_output.set_assistant_id(event_id)
296
+ if event.event == "output_start":
297
+ index = event.data.get("index")
298
+ if isinstance(index, int):
299
+ current_output_index = index
300
+ assistant_output.start_group(index)
301
+ if event.event == "delta":
302
+ assistant_output.append_text(str(event.data.get("content") or ""))
303
+ if event.event == "thinking_delta":
304
+ assistant_output.append_thinking(str(event.data.get("content") or ""))
305
+ if event.event == "usage":
306
+ usage_data = event.data.get("usage")
307
+ if isinstance(usage_data, dict):
308
+ usage_info = append_token_usage(
309
+ self.store.read_usage_info(),
310
+ TokenUsage.model_validate(usage_data),
311
+ model_context_window=context_window_limit,
312
+ )
313
+ self.store.save_usage_info(usage_info)
314
+ turn_usage_info = usage_info
315
+ latest_usage_output_index = current_output_index
316
+ if event.event == "tool_start":
317
+ tool = event.data.get("tool")
318
+ if isinstance(tool, dict) and isinstance(tool.get("id"), str):
319
+ assistant_output.start_tool(StoredToolItem.model_validate(tool))
320
+ if event.event in {"tool_done", "tool_error"}:
321
+ tool_id = event.data.get("id")
322
+ if isinstance(tool_id, str):
323
+ assistant_output.update_tool(tool_id, event.data)
324
+ if event.event == "done":
325
+ message = event.data.get("message")
326
+ if isinstance(message, dict):
327
+ assistant_id = str(message.get("id") or assistant_id)
328
+ assistant_output.set_assistant_id(assistant_id)
329
+ assistant_output.apply_done_message(message)
330
+
331
+ final_usage_info = turn_usage_info
332
+ if (
333
+ final_usage_info is None
334
+ or latest_usage_output_index != current_output_index
335
+ ):
336
+ final_usage_info = update_context_usage_for_response(
337
+ final_usage_info or self.store.read_usage_info(),
338
+ messages=request_messages,
339
+ output_content=assistant_output.content,
340
+ output_tools=[
341
+ tool.model_dump(exclude_none=True)
342
+ for tool in assistant_output.tools.values()
343
+ ],
344
+ model_context_window=context_window_limit,
345
+ )
346
+ self.store.save_usage_info(final_usage_info)
347
+
348
+ assistant_message = StoredMessage(
349
+ author="assistant",
350
+ content=assistant_output.content,
351
+ groups=assistant_output.groups,
352
+ id=assistant_id,
353
+ status="completed",
354
+ thinking=assistant_output.thinking,
355
+ tools=list(assistant_output.tools.values()),
356
+ usage_info=final_usage_info,
357
+ )
358
+ self.store.save_messages([*next_messages, assistant_message])
359
+ return assistant_message
360
+
361
+ async def reply_text(self, content: str) -> str:
362
+ return (await self.run_turn(content)).content
363
+
364
+ async def gather_shutdown_tasks(
365
+ self, label: str, tasks: Sequence[asyncio.Task[Any]]
366
+ ) -> None:
367
+ if not tasks:
368
+ return
369
+ results = await asyncio.gather(*tasks, return_exceptions=True)
370
+ for result in results:
371
+ if result is None or isinstance(result, asyncio.CancelledError):
372
+ continue
373
+ if isinstance(result, BaseException):
374
+ logger.error(
375
+ "%s cleanup task failed",
376
+ label,
377
+ exc_info=(type(result), result, result.__traceback__),
378
+ )
379
+
380
+ async def stop_response_for_shutdown(self) -> None:
381
+ tasks: list[asyncio.Task[None]] = []
382
+ response = self.active_response
383
+ if response is not None and response.task is not None:
384
+ if not response.task.done():
385
+ response.task.cancel()
386
+ tasks.append(response.task)
387
+ await self.gather_shutdown_tasks("Workspace response", tasks)
388
+
389
+ async def stop_compact_for_shutdown(self) -> None:
390
+ if self.active_compact_task is None:
391
+ self.store.save_is_compacting(False)
392
+ return
393
+ task = self.active_compact_task.task
394
+ self.active_compact_task = None
395
+ if not task.done():
396
+ task.cancel()
397
+ await self.gather_shutdown_tasks("Workspace compact", [task])
398
+ self.store.save_is_compacting(False)
399
+
400
+ async def stop_for_shutdown(self) -> None:
401
+ await self.stop_response_for_shutdown()
402
+ await self.stop_compact_for_shutdown()
403
+
404
+ def current_response(self) -> WorkspaceResponse | None:
405
+ response = self.active_response
406
+ if response is None or response.is_done:
407
+ return None
408
+ return response
409
+
410
+ def has_active_response(self) -> bool:
411
+ response = self.active_response
412
+ return (
413
+ response is not None
414
+ and not response.is_done
415
+ and response.task is not None
416
+ and not response.task.done()
417
+ )
418
+
419
+ def clear(self) -> list[StoredMessage]:
420
+ self.generation += 1
421
+ response = self.active_response
422
+ if response is not None:
423
+ response.is_done = True
424
+ if response.task is not None and not response.task.done():
425
+ response.discard_on_cancel = True
426
+ response.task.cancel()
427
+ return self.store.save_messages([])
428
+
429
+ async def notify_cleared_response(self) -> None:
430
+ response = self.active_response
431
+ if response is None:
432
+ return
433
+ async with response.condition:
434
+ response.condition.notify_all()
435
+
436
+ async def append_event(
437
+ self, response: WorkspaceResponse, event: str, data: dict[str, object]
438
+ ) -> None:
439
+ async with response.condition:
440
+ response.events.append((response.latest_event_index + 1, event, data))
441
+ response.condition.notify_all()
442
+
443
+ async def append_snapshot(
444
+ self, response: WorkspaceResponse, message: StoredMessage
445
+ ) -> None:
446
+ if message.author != "assistant":
447
+ return
448
+ response.latest_snapshot = message
449
+ await self.append_event(
450
+ response,
451
+ "snapshot",
452
+ {"message": stream_message_data(message, response.active_output)},
453
+ )
454
+
455
+ def start_response(
456
+ self, content: str, *, message_id: str | None = None
457
+ ) -> WorkspaceResponse:
458
+ if self.has_active_response():
459
+ raise HTTPException(
460
+ status_code=409,
461
+ detail="Response in progress",
462
+ )
463
+ if self.store.read_is_compacting():
464
+ raise HTTPException(
465
+ status_code=409,
466
+ detail="Context refining in progress. Please wait a moment.",
467
+ )
468
+ state = self.store.read_state()
469
+ user_message_id = message_id or str(uuid4())
470
+ if any(message.id == user_message_id for message in state.messages):
471
+ raise HTTPException(status_code=409, detail="Message already exists.")
472
+ user_message = StoredMessage(
473
+ author="user",
474
+ content=content,
475
+ id=user_message_id,
476
+ )
477
+ next_messages = [*state.messages, user_message]
478
+ self.store.save_messages(next_messages)
479
+ return self._start_response_from_messages(
480
+ content=content,
481
+ next_messages=next_messages,
482
+ state=state,
483
+ user_message=user_message,
484
+ )
485
+
486
+ def edit_message(
487
+ self,
488
+ message_id: str,
489
+ *,
490
+ action: Literal["resend", "save"],
491
+ content: str,
492
+ ) -> tuple[list[StoredMessage], WorkspaceResponse | None]:
493
+ if self.has_active_response():
494
+ raise HTTPException(
495
+ status_code=409,
496
+ detail="Response in progress",
497
+ )
498
+ if self.store.read_is_compacting():
499
+ raise HTTPException(
500
+ status_code=409,
501
+ detail="Context refining in progress. Please wait a moment.",
502
+ )
503
+ state = self.store.read_state()
504
+ message_index = next(
505
+ (
506
+ index
507
+ for index, message in enumerate(state.messages)
508
+ if message.id == message_id
509
+ ),
510
+ -1,
511
+ )
512
+ if message_index < 0:
513
+ raise HTTPException(status_code=404, detail="Message not found.")
514
+ message = state.messages[message_index]
515
+ if message.author != "user":
516
+ raise HTTPException(
517
+ status_code=400, detail="Only user messages can be edited."
518
+ )
519
+
520
+ updated_message = message.model_copy(update={"content": content})
521
+ if action == "save":
522
+ next_messages = [
523
+ *state.messages[:message_index],
524
+ updated_message,
525
+ *state.messages[message_index + 1 :],
526
+ ]
527
+ return self.store.save_messages(next_messages), None
528
+
529
+ previous_messages = state.messages[:message_index]
530
+ next_messages = [*previous_messages, updated_message]
531
+ self.store.save_messages(next_messages)
532
+ response = self._start_response_from_messages(
533
+ content=content,
534
+ next_messages=next_messages,
535
+ state=state.model_copy(update={"messages": previous_messages}),
536
+ user_message=updated_message,
537
+ )
538
+ return next_messages, response
539
+
540
+ def retry_error(
541
+ self,
542
+ message_id: str,
543
+ *,
544
+ error_id: str,
545
+ ) -> tuple[list[StoredMessage], WorkspaceResponse]:
546
+ if self.has_active_response():
547
+ raise HTTPException(
548
+ status_code=409,
549
+ detail="Response in progress",
550
+ )
551
+ if self.store.read_is_compacting():
552
+ raise HTTPException(
553
+ status_code=409,
554
+ detail="Context refining in progress. Please wait a moment.",
555
+ )
556
+ state = self.store.read_state()
557
+ message_index = next(
558
+ (
559
+ index
560
+ for index, message in enumerate(state.messages)
561
+ if message.id == message_id
562
+ ),
563
+ -1,
564
+ )
565
+ if message_index < 0:
566
+ raise HTTPException(status_code=404, detail="Message not found.")
567
+ message = state.messages[message_index]
568
+ if message.author != "assistant":
569
+ raise HTTPException(
570
+ status_code=400, detail="Only assistant errors can be retried."
571
+ )
572
+ previous_user_message = next(
573
+ (
574
+ current_message
575
+ for current_message in reversed(state.messages[:message_index])
576
+ if current_message.author == "user"
577
+ ),
578
+ None,
579
+ )
580
+ if previous_user_message is None:
581
+ raise HTTPException(status_code=400, detail="Message history is invalid.")
582
+ trimmed_message = trim_assistant_message_at_error(
583
+ message,
584
+ error_id,
585
+ status="running",
586
+ )
587
+ if trimmed_message is None:
588
+ raise HTTPException(status_code=404, detail="Error block not found.")
589
+
590
+ previous_messages = state.messages[:message_index]
591
+ next_messages = [*previous_messages, trimmed_message]
592
+ self.store.save_messages(next_messages)
593
+ state_before_assistant = state.model_copy(
594
+ update={"messages": previous_messages}
595
+ )
596
+ base_request_messages = self.request_messages_for_content(
597
+ state_before_assistant,
598
+ previous_messages,
599
+ previous_user_message.content,
600
+ )
601
+ request_messages = [
602
+ *base_request_messages,
603
+ *model_visible_assistant_output_messages(trimmed_message),
604
+ ]
605
+ response = self._start_response_from_messages(
606
+ content=previous_user_message.content,
607
+ initial_assistant_message=trimmed_message,
608
+ next_messages=next_messages,
609
+ output_start_index=assistant_retry_output_start_index(trimmed_message),
610
+ request_messages=request_messages,
611
+ state=state_before_assistant,
612
+ usage_request_messages=base_request_messages,
613
+ user_message=previous_user_message,
614
+ )
615
+ return next_messages, response
616
+
617
+ def _start_response_from_messages(
618
+ self,
619
+ *,
620
+ content: str,
621
+ initial_assistant_message: StoredMessage | None = None,
622
+ next_messages: list[StoredMessage],
623
+ output_start_index: int = 1,
624
+ request_messages: list[dict[str, object]] | None = None,
625
+ state: StoredState,
626
+ usage_request_messages: list[dict[str, object]] | None = None,
627
+ user_message: StoredMessage,
628
+ ) -> WorkspaceResponse:
629
+ connection = selected_connection(state)
630
+ context_window_limit = context_window_for_settings(state.settings)
631
+ response = WorkspaceResponse(
632
+ condition=asyncio.Condition(),
633
+ generation=self.generation,
634
+ )
635
+ self.active_response = response
636
+
637
+ async def response_task() -> None:
638
+ nonlocal next_messages
639
+ assistant_message = (
640
+ initial_assistant_message
641
+ if initial_assistant_message is not None
642
+ else StoredMessage(
643
+ author="assistant",
644
+ content="",
645
+ id=str(uuid4()),
646
+ status="running",
647
+ )
648
+ )
649
+ assistant_output = (
650
+ AssistantOutputBuilder.from_message(assistant_message)
651
+ if initial_assistant_message is not None
652
+ else AssistantOutputBuilder(assistant_message.id)
653
+ )
654
+ initial_assistant_content = assistant_output.content
655
+ initial_assistant_thinking = assistant_output.thinking
656
+ last_progress_flush_at = 0.0
657
+
658
+ def is_current_generation() -> bool:
659
+ return response.generation == self.generation
660
+
661
+ def update_assistant_message(
662
+ status: str = "running", *, persist: bool
663
+ ) -> StoredMessage | None:
664
+ nonlocal next_messages, assistant_message
665
+ if not is_current_generation() or response.discard_on_cancel:
666
+ return None
667
+ assistant_message = StoredMessage(
668
+ author="assistant",
669
+ content=assistant_output.content,
670
+ groups=assistant_output.groups,
671
+ id=assistant_message.id,
672
+ status=status,
673
+ thinking=assistant_output.thinking,
674
+ tools=list(assistant_output.tools.values()),
675
+ usage_info=self.store.read_usage_info(),
676
+ )
677
+ next_messages = append_or_replace_message(
678
+ next_messages, assistant_message
679
+ )
680
+ if persist:
681
+ self.store.upsert_message(assistant_message)
682
+ return assistant_message
683
+
684
+ def persist_assistant(status: str = "running") -> StoredMessage | None:
685
+ nonlocal last_progress_flush_at
686
+ message = update_assistant_message(status, persist=True)
687
+ if status == "running" and message is not None:
688
+ last_progress_flush_at = time.monotonic()
689
+ return message
690
+
691
+ def refresh_assistant(status: str = "running") -> StoredMessage | None:
692
+ return update_assistant_message(status, persist=False)
693
+
694
+ def persist_assistant_progress() -> StoredMessage | None:
695
+ nonlocal last_progress_flush_at
696
+ now = time.monotonic()
697
+ if (
698
+ last_progress_flush_at > 0
699
+ and now - last_progress_flush_at
700
+ < WORKSPACE_PROGRESS_FLUSH_INTERVAL_SECONDS
701
+ ):
702
+ refresh_assistant()
703
+ return None
704
+ last_progress_flush_at = now
705
+ return update_assistant_message("running", persist=True)
706
+
707
+ try:
708
+ current_tool_id: str | None = None
709
+ turn_usage_info: TokenUsageInfo | None = None
710
+ current_output_index = 0
711
+ latest_usage_output_index: int | None = None
712
+ if request_messages is None:
713
+ current_request_messages = self.request_messages_for_content(
714
+ state,
715
+ next_messages,
716
+ content,
717
+ )
718
+ pre_turn_request_messages = self.request_messages_for_content(
719
+ state,
720
+ state.messages,
721
+ content,
722
+ )
723
+ auto_compaction = await self.auto_compact_messages(
724
+ connection=connection,
725
+ context_window_limit=context_window_limit,
726
+ messages=state.messages,
727
+ model_history=pre_turn_request_messages,
728
+ source_message_id=None,
729
+ )
730
+ if auto_compaction is not None:
731
+ marker, _, usage_info = auto_compaction
732
+ next_messages = [*state.messages, marker, user_message]
733
+ self.store.save_messages(next_messages)
734
+ await self.append_event(
735
+ response,
736
+ "context_optimized",
737
+ {
738
+ "message": marker.model_dump(),
739
+ **usage_event_data(usage_info),
740
+ },
741
+ )
742
+ current_request_messages = self.request_messages_for_content(
743
+ state,
744
+ next_messages,
745
+ content,
746
+ )
747
+ else:
748
+ current_request_messages = request_messages
749
+ context_usage_messages = (
750
+ usage_request_messages
751
+ if usage_request_messages is not None
752
+ else current_request_messages
753
+ )
754
+
755
+ async def review_tool_approval(request: ApprovalReviewRequest):
756
+ return await review_approval_request(
757
+ connection,
758
+ request.model_copy(
759
+ update={
760
+ "transcript": approval_transcript(next_messages),
761
+ "user_request": content,
762
+ }
763
+ ),
764
+ completion=self.chat_completion,
765
+ )
766
+
767
+ async def tool_runner(
768
+ name: str,
769
+ arguments: dict[str, object],
770
+ context: ToolContext,
771
+ ):
772
+ return await run_tool_with_path_permissions(
773
+ name,
774
+ arguments,
775
+ context,
776
+ review_approval=review_tool_approval,
777
+ writable_paths=[
778
+ Path(path.path) for path in self.store.read_writable_paths()
779
+ ],
780
+ )
781
+
782
+ async def context_compactor(
783
+ conversation: Sequence[Mapping[str, object]],
784
+ ) -> AgentContextUpdate | None:
785
+ nonlocal next_messages
786
+ if not is_current_generation() or response.discard_on_cancel:
787
+ return None
788
+ assistant_snapshot = StoredMessage(
789
+ author="assistant",
790
+ content=assistant_output.content,
791
+ groups=assistant_output.groups,
792
+ id=assistant_message.id,
793
+ status="running",
794
+ thinking=assistant_output.thinking,
795
+ tools=list(assistant_output.tools.values()),
796
+ usage_info=self.store.read_usage_info(),
797
+ )
798
+ auto_result = await self.auto_compact_messages(
799
+ connection=connection,
800
+ context_window_limit=context_window_limit,
801
+ messages=next_messages,
802
+ model_history=compact_prompt_chat_messages(conversation),
803
+ source_message_id=assistant_snapshot.id,
804
+ )
805
+ if auto_result is None:
806
+ return None
807
+ marker, replacement_history, usage_info = auto_result
808
+ assistant_snapshot = assistant_snapshot.model_copy(
809
+ update={"usage_info": usage_info}
810
+ )
811
+ next_messages = append_or_replace_message(
812
+ [*next_messages, marker], assistant_snapshot
813
+ )
814
+ self.store.save_messages(next_messages)
815
+ compacted_conversation = [
816
+ dict(conversation[0]),
817
+ *replacement_history,
818
+ ]
819
+ return AgentContextUpdate(
820
+ conversation=compacted_conversation,
821
+ message={
822
+ **marker.model_dump(),
823
+ "usage_info": usage_info.model_dump(),
824
+ },
825
+ )
826
+
827
+ async for event in run_agent_stream(
828
+ completion=self.chat_completion,
829
+ connection=connection,
830
+ context_compactor=context_compactor,
831
+ cwd=self.cwd,
832
+ extra_tool_runner=self.mcp_manager.run_tool,
833
+ extra_tool_specs=self.mcp_manager.tool_specs(),
834
+ extra_tool_title=self.mcp_manager.tool_title,
835
+ messages=current_request_messages,
836
+ tool_runner=tool_runner,
837
+ ):
838
+ if not is_current_generation() or response.discard_on_cancel:
839
+ raise asyncio.CancelledError
840
+ run_event_data = event.data
841
+ should_append_run_event = event.event != "usage"
842
+ snapshot_after_event: StoredMessage | None = None
843
+ if event.event == "start":
844
+ event_id = event.data.get("id")
845
+ if initial_assistant_message is not None:
846
+ assistant_output.set_assistant_id(assistant_message.id)
847
+ run_event_data = {"id": assistant_message.id}
848
+ snapshot_after_event = persist_assistant()
849
+ elif isinstance(event_id, str):
850
+ assistant_message = assistant_message.model_copy(
851
+ update={"id": event_id}
852
+ )
853
+ assistant_output.set_assistant_id(event_id)
854
+ snapshot_after_event = persist_assistant()
855
+ if event.event == "output_start":
856
+ index = event.data.get("index")
857
+ if isinstance(index, int):
858
+ output_index = index + output_start_index - 1
859
+ current_output_index = output_index
860
+ run_event_data = {**event.data, "index": output_index}
861
+ response.active_output = None
862
+ assistant_output.start_group(output_index)
863
+ snapshot_after_event = persist_assistant()
864
+ if event.event == "output_done":
865
+ index = event.data.get("index")
866
+ if isinstance(index, int):
867
+ run_event_data = {
868
+ **event.data,
869
+ "index": index + output_start_index - 1,
870
+ }
871
+ response.active_output = None
872
+ if event.event == "tool_start":
873
+ tool = event.data.get("tool")
874
+ if isinstance(tool, dict) and isinstance(tool.get("id"), str):
875
+ response.active_output = None
876
+ current_tool_id = tool["id"]
877
+ assistant_output.start_tool(
878
+ StoredToolItem.model_validate(tool)
879
+ )
880
+ snapshot_after_event = persist_assistant()
881
+ if event.event in {"tool_done", "tool_error"}:
882
+ tool_id = event.data.get("id")
883
+ if (
884
+ isinstance(tool_id, str)
885
+ and tool_id in assistant_output.tools
886
+ ):
887
+ current_tool_id = (
888
+ None if current_tool_id == tool_id else current_tool_id
889
+ )
890
+ assistant_output.update_tool(tool_id, event.data)
891
+ snapshot_after_event = persist_assistant()
892
+ if event.event == "delta":
893
+ response.active_output = "text"
894
+ assistant_output.append_text(
895
+ str(event.data.get("content") or "")
896
+ )
897
+ snapshot_after_event = persist_assistant_progress()
898
+ if event.event == "thinking_delta":
899
+ response.active_output = "thinking"
900
+ assistant_output.append_thinking(
901
+ str(event.data.get("content") or "")
902
+ )
903
+ snapshot_after_event = persist_assistant_progress()
904
+ if event.event == "usage":
905
+ usage_data = event.data.get("usage")
906
+ if isinstance(usage_data, dict):
907
+ usage_info = append_token_usage(
908
+ self.store.read_usage_info(),
909
+ TokenUsage.model_validate(usage_data),
910
+ model_context_window=context_window_limit,
911
+ )
912
+ self.store.save_usage_info(usage_info)
913
+ turn_usage_info = usage_info
914
+ latest_usage_output_index = current_output_index
915
+ run_event_data = usage_event_data(usage_info)
916
+ should_append_run_event = True
917
+ snapshot_after_event = persist_assistant()
918
+ logger.log(
919
+ TRACE_LEVEL,
920
+ "Workspace stream event=%s data=%r",
921
+ event.event,
922
+ event.data,
923
+ )
924
+ if event.event == "done":
925
+ message = event.data.get("message")
926
+ if isinstance(message, dict):
927
+ response.active_output = None
928
+ assistant_output.apply_done_message(
929
+ message,
930
+ content_prefix=initial_assistant_content,
931
+ thinking_prefix=initial_assistant_thinking,
932
+ )
933
+ response_usage_info = self.store.read_usage_info()
934
+ final_usage_info = turn_usage_info
935
+ if (
936
+ final_usage_info is None
937
+ or latest_usage_output_index != current_output_index
938
+ ):
939
+ final_usage_info = update_context_usage_for_response(
940
+ final_usage_info or response_usage_info,
941
+ messages=context_usage_messages,
942
+ output_content=assistant_output.content,
943
+ output_tools=[
944
+ tool.model_dump(exclude_none=True)
945
+ for tool in assistant_output.tools.values()
946
+ ],
947
+ model_context_window=context_window_limit,
948
+ )
949
+ self.store.save_usage_info(final_usage_info)
950
+ snapshot_after_event = persist_assistant("completed")
951
+ if snapshot_after_event is not None:
952
+ run_event_data = {
953
+ "message": stream_message_data(snapshot_after_event)
954
+ }
955
+ if event.event == "done" and snapshot_after_event is not None:
956
+ await self.append_snapshot(response, snapshot_after_event)
957
+ await self.append_event(response, event.event, run_event_data)
958
+ else:
959
+ if should_append_run_event:
960
+ await self.append_event(
961
+ response, event.event, run_event_data
962
+ )
963
+ if snapshot_after_event is not None:
964
+ await self.append_snapshot(response, snapshot_after_event)
965
+ except asyncio.CancelledError:
966
+ logger.info("Workspace response stopped")
967
+ if not response.discard_on_cancel:
968
+ interrupted_snapshot = persist_assistant("interrupted")
969
+ if interrupted_snapshot is not None:
970
+ await self.append_snapshot(response, interrupted_snapshot)
971
+ await self.append_event(
972
+ response,
973
+ "error",
974
+ {"message": "Response stopped."},
975
+ )
976
+ raise
977
+ except Exception as error:
978
+ logger.exception("Workspace response failed")
979
+ if (
980
+ current_tool_id is not None
981
+ and current_tool_id in assistant_output.tools
982
+ and assistant_output.tools[current_tool_id].status == "running"
983
+ ):
984
+ assistant_output.update_tool(
985
+ current_tool_id,
986
+ {"content": str(error) or "Tool failed.", "status": "failed"},
987
+ )
988
+ error_item = assistant_output.append_error(
989
+ run_error_output_item(
990
+ assistant_message.id,
991
+ str(error) or EMPTY_MODEL_RESPONSE_DETAIL,
992
+ )
993
+ )
994
+ failed_snapshot = persist_assistant("failed")
995
+ if failed_snapshot is not None:
996
+ await self.append_snapshot(response, failed_snapshot)
997
+ await self.append_event(
998
+ response, "error", run_error_event_data(error_item)
999
+ )
1000
+ finally:
1001
+ response.is_done = True
1002
+ async with response.condition:
1003
+ response.condition.notify_all()
1004
+ if self.active_response is response:
1005
+ self.active_response = None
1006
+
1007
+ response.task = asyncio.create_task(response_task())
1008
+ return response
1009
+
1010
+ async def response_stream(
1011
+ self,
1012
+ response: WorkspaceResponse,
1013
+ after: int = 0,
1014
+ include_snapshots: bool = True,
1015
+ ) -> AsyncIterator[str]:
1016
+ next_event_index = after + 1
1017
+ reconnect_snapshot = (
1018
+ response_snapshot_data_at(response, after) if after > 0 else None
1019
+ )
1020
+ if include_snapshots and reconnect_snapshot is not None:
1021
+ yield stream_event(
1022
+ "snapshot",
1023
+ {"message": reconnect_snapshot},
1024
+ event_id=after,
1025
+ )
1026
+ while True:
1027
+ async with response.condition:
1028
+
1029
+ def has_next_event(index: int = next_event_index) -> bool:
1030
+ return response.is_done or any(
1031
+ event_index >= index for event_index, _, _ in response.events
1032
+ )
1033
+
1034
+ await response.condition.wait_for(has_next_event)
1035
+ events = [
1036
+ event for event in response.events if event[0] >= next_event_index
1037
+ ]
1038
+
1039
+ for index, event, data in events:
1040
+ next_event_index = index + 1
1041
+ if event == "snapshot" and not include_snapshots:
1042
+ continue
1043
+ yield stream_event(event, data, event_id=index)
1044
+ if event in {"done", "error"}:
1045
+ return
1046
+
1047
+ if response.is_done and not events:
1048
+ return
1049
+
1050
+ def stream_current_response(self) -> WorkspaceResponse:
1051
+ response = self.current_response()
1052
+ if response is None:
1053
+ raise HTTPException(status_code=404, detail="Response not found.")
1054
+ return response
1055
+
1056
+ def stop_response(self) -> None:
1057
+ response = self.current_response()
1058
+ if (
1059
+ response is not None
1060
+ and response.task is not None
1061
+ and not response.task.done()
1062
+ ):
1063
+ response.task.cancel()
1064
+
1065
+ def compact_stream(self) -> AsyncIterator[str]:
1066
+ async def run_manual_compact(
1067
+ *,
1068
+ checkpoint: StoredCompactionCheckpoint | None,
1069
+ connection: ProviderConnection,
1070
+ context_window_limit: int,
1071
+ state: StoredState,
1072
+ ) -> tuple[StoredMessage, TokenUsageInfo]:
1073
+ logger.info("Workspace compact requested")
1074
+ try:
1075
+ model_history: list[ChatMessage | Mapping[str, object]] = [
1076
+ *runtime_context_messages(self.cwd, state.settings.agent_prompt),
1077
+ *workspace_chat_messages(
1078
+ state.messages,
1079
+ self.store.read_compacted_context(),
1080
+ checkpoint,
1081
+ ),
1082
+ ]
1083
+
1084
+ marker, _, usage_info = await self.save_context_checkpoint(
1085
+ connection=connection,
1086
+ context_window_limit=context_window_limit,
1087
+ marker_content=COMPACTED_CONTEXT_MARKER,
1088
+ messages=state.messages,
1089
+ model_history=model_history,
1090
+ source_message_id=None,
1091
+ trigger="manual",
1092
+ )
1093
+ self.store.save_messages([*state.messages, marker])
1094
+ logger.info("Workspace compact completed")
1095
+ return marker, usage_info
1096
+ except Exception:
1097
+ logger.exception("Workspace compact failed")
1098
+ raise
1099
+ finally:
1100
+ self.store.save_is_compacting(False)
1101
+
1102
+ def clear_active_compact_task(
1103
+ task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]],
1104
+ ) -> None:
1105
+ if (
1106
+ self.active_compact_task is not None
1107
+ and self.active_compact_task.task is task
1108
+ ):
1109
+ self.active_compact_task = None
1110
+ with suppress(asyncio.CancelledError):
1111
+ task.exception()
1112
+
1113
+ compact_task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]]
1114
+ if self.active_compact_task is not None:
1115
+ if not self.active_compact_task.task.done():
1116
+ compact_task = self.active_compact_task.task
1117
+ else:
1118
+ self.active_compact_task = None
1119
+
1120
+ if self.active_compact_task is None:
1121
+ if self.current_response() is not None:
1122
+ raise HTTPException(
1123
+ status_code=409,
1124
+ detail="Compact is unavailable while Flowent is responding.",
1125
+ )
1126
+ state = self.store.read_state()
1127
+ connection = selected_connection(state)
1128
+ context_window_limit = context_window_for_settings(state.settings)
1129
+ checkpoint = self.store.read_active_compaction_checkpoint()
1130
+ self.store.save_is_compacting(True)
1131
+ compact_task = asyncio.create_task(
1132
+ run_manual_compact(
1133
+ checkpoint=checkpoint,
1134
+ connection=connection,
1135
+ context_window_limit=context_window_limit,
1136
+ state=state,
1137
+ )
1138
+ )
1139
+ compact_task.add_done_callback(clear_active_compact_task)
1140
+ self.active_compact_task = WorkspaceCompactTask(task=compact_task)
1141
+
1142
+ async def compact_events() -> AsyncIterator[str]:
1143
+ try:
1144
+ marker, usage_info = await asyncio.shield(compact_task)
1145
+ except Exception:
1146
+ yield stream_event(
1147
+ "error",
1148
+ {"message": "Context could not be compacted."},
1149
+ )
1150
+ return
1151
+
1152
+ marker_data = marker.model_dump()
1153
+ yield stream_event("usage", usage_event_data(usage_info))
1154
+ yield stream_event(
1155
+ "context_optimized",
1156
+ {"message": marker_data, **usage_event_data(usage_info)},
1157
+ )
1158
+ yield stream_event("done", {"message": marker_data})
1159
+
1160
+ return compact_events()