flowent 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,14 @@
1
1
  import asyncio
2
+ import copy
2
3
  import json
3
4
  import logging
4
5
  import os
5
- from collections.abc import AsyncIterator, Mapping, Sequence
6
+ import time
7
+ from collections.abc import AsyncIterator, Awaitable, Mapping, Sequence
6
8
  from contextlib import asynccontextmanager, suppress
7
9
  from dataclasses import dataclass, field
8
10
  from pathlib import Path
9
- from typing import Literal
11
+ from typing import Any, Literal
10
12
  from uuid import uuid4
11
13
 
12
14
  from fastapi import FastAPI, HTTPException, Query
@@ -85,10 +87,11 @@ logger = logging.getLogger("flowent.main")
85
87
  DEFAULT_STATIC_DIR = Path(__file__).parent / "static"
86
88
  COMPACTED_CONTEXT_MARKER = "Context compacted"
87
89
  OPTIMIZED_CONTEXT_MARKER = "Context optimized"
88
- DEFAULT_AUTO_COMPACT_TOKEN_LIMIT = 120_000
90
+ DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO = 0.95
89
91
  AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
90
92
  APPROVAL_TRANSCRIPT_MESSAGE_LIMIT = 12
91
93
  APPROVAL_TRANSCRIPT_TEXT_LIMIT = 2_000
94
+ WORKSPACE_PROGRESS_FLUSH_INTERVAL_SECONDS = 0.5
92
95
 
93
96
 
94
97
  class ProviderModelsRequest(BaseModel):
@@ -121,6 +124,14 @@ class WorkspaceRunResponse(BaseModel):
121
124
  run_id: str
122
125
 
123
126
 
127
+ class WorkspaceClearResponse(BaseModel):
128
+ model_config = ConfigDict(extra="forbid")
129
+
130
+ active_run_id: str | None = None
131
+ messages: list[StoredMessage]
132
+ usage_info: TokenUsageInfo | None = None
133
+
134
+
124
135
  @dataclass
125
136
  class WorkspaceCompactTask:
126
137
  task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]]
@@ -172,10 +183,13 @@ class WritablePathListResponse(BaseModel):
172
183
  @dataclass
173
184
  class WorkspaceRun:
174
185
  condition: asyncio.Condition
186
+ active_output: Literal["text", "thinking"] | None = None
175
187
  discard_on_cancel: bool = False
176
188
  events: list[tuple[int, str, dict[str, object]]] = field(default_factory=list)
189
+ generation: int = 0
177
190
  id: str = field(default_factory=lambda: str(uuid4()))
178
191
  is_done: bool = False
192
+ latest_snapshot: StoredMessage | None = None
179
193
  task: asyncio.Task[None] | None = None
180
194
 
181
195
  @property
@@ -183,8 +197,20 @@ class WorkspaceRun:
183
197
  return self.events[-1][0] if self.events else 0
184
198
 
185
199
 
186
- def stream_event(event: str, data: dict[str, object]) -> str:
187
- return f"event: {event}\ndata: {json.dumps(data)}\n\n"
200
+ def stream_event(
201
+ event: str, data: dict[str, object], event_id: int | None = None
202
+ ) -> str:
203
+ id_line = f"id: {event_id}\n" if event_id is not None else ""
204
+ return f"{id_line}event: {event}\ndata: {json.dumps(data)}\n\n"
205
+
206
+
207
+ def stream_message_data(
208
+ message: StoredMessage, active_output: Literal["text", "thinking"] | None = None
209
+ ) -> dict[str, object]:
210
+ data = {**message.model_dump(), "status": message.status}
211
+ if active_output is not None:
212
+ data["active_output"] = active_output
213
+ return data
188
214
 
189
215
 
190
216
  def append_or_replace_message(
@@ -196,6 +222,136 @@ def append_or_replace_message(
196
222
  ]
197
223
 
198
224
 
225
+ def run_snapshot_data_at(
226
+ run: WorkspaceRun, event_index: int
227
+ ) -> dict[str, object] | None:
228
+ snapshot_event_index = 0
229
+ snapshot: dict[str, object] | None = None
230
+ for current_event_index, event, data in run.events:
231
+ if current_event_index > event_index:
232
+ break
233
+ if event != "snapshot":
234
+ if event == "start" and snapshot is None:
235
+ assistant_id = data.get("id")
236
+ if isinstance(assistant_id, str):
237
+ snapshot_event_index = current_event_index
238
+ snapshot = {
239
+ "author": "assistant",
240
+ "content": "",
241
+ "groups": [],
242
+ "id": assistant_id,
243
+ "status": "running",
244
+ "tools": [],
245
+ }
246
+ continue
247
+ message = data.get("message")
248
+ if isinstance(message, dict):
249
+ snapshot_event_index = current_event_index
250
+ snapshot = copy.deepcopy(message)
251
+ if snapshot is None:
252
+ return None
253
+ for current_event_index, event, data in run.events:
254
+ if current_event_index <= snapshot_event_index:
255
+ continue
256
+ if current_event_index > event_index:
257
+ break
258
+ apply_stream_event_to_snapshot(snapshot, event, data)
259
+ return snapshot
260
+
261
+
262
+ def apply_stream_event_to_snapshot(
263
+ snapshot: dict[str, object], event: str, data: dict[str, object]
264
+ ) -> None:
265
+ if event == "output_start":
266
+ snapshot.pop("active_output", None)
267
+ index = data.get("index")
268
+ if isinstance(index, int):
269
+ append_snapshot_group(snapshot, index)
270
+ if event == "delta":
271
+ append_snapshot_text(snapshot, str(data.get("content") or ""))
272
+ if event == "thinking_delta":
273
+ append_snapshot_thinking(snapshot, str(data.get("content") or ""))
274
+ if event == "output_done":
275
+ snapshot.pop("active_output", None)
276
+
277
+
278
+ def snapshot_groups(snapshot: dict[str, object]) -> list[dict[str, object]]:
279
+ groups = snapshot.get("groups")
280
+ if not isinstance(groups, list):
281
+ groups = []
282
+ snapshot["groups"] = groups
283
+ return groups
284
+
285
+
286
+ def append_snapshot_group(
287
+ snapshot: dict[str, object], index: int | None = None
288
+ ) -> None:
289
+ groups = snapshot_groups(snapshot)
290
+ assistant_id = str(snapshot.get("id") or "assistant")
291
+ group_index = index if index is not None else len(groups) + 1
292
+ group_id = f"{assistant_id}-group-{group_index}"
293
+ if groups and groups[-1].get("id") == group_id:
294
+ return
295
+ groups.append({"id": group_id, "items": []})
296
+
297
+
298
+ def append_snapshot_text(snapshot: dict[str, object], content: str) -> None:
299
+ if not content:
300
+ return
301
+ snapshot["active_output"] = "text"
302
+ snapshot["content"] = f"{snapshot.get('content') or ''}{content}"
303
+ append_snapshot_item_content(snapshot, content, "text")
304
+
305
+
306
+ def append_snapshot_thinking(snapshot: dict[str, object], content: str) -> None:
307
+ if not content:
308
+ return
309
+ snapshot["active_output"] = "thinking"
310
+ snapshot["thinking"] = f"{snapshot.get('thinking') or ''}{content}"
311
+ append_snapshot_item_content(snapshot, content, "thinking")
312
+
313
+
314
+ def append_snapshot_item_content(
315
+ snapshot: dict[str, object], content: str, item_type: Literal["text", "thinking"]
316
+ ) -> None:
317
+ groups = snapshot_groups(snapshot)
318
+ if not groups:
319
+ append_snapshot_group(snapshot)
320
+ group = groups[-1]
321
+ items = group.get("items")
322
+ if not isinstance(items, list):
323
+ items = []
324
+ group["items"] = items
325
+ item = next(
326
+ (
327
+ current
328
+ for current in reversed(items)
329
+ if isinstance(current, dict) and current.get("type") == item_type
330
+ ),
331
+ None,
332
+ )
333
+ if item is None:
334
+ assistant_id = str(snapshot.get("id") or "assistant")
335
+ snapshot_item_count = 0
336
+ for current_group in groups:
337
+ current_items = current_group.get("items")
338
+ if not isinstance(current_items, list):
339
+ continue
340
+ snapshot_item_count += sum(
341
+ 1
342
+ for current_item in current_items
343
+ if isinstance(current_item, dict)
344
+ and current_item.get("type") == item_type
345
+ )
346
+ item = {
347
+ "content": "",
348
+ "id": f"{assistant_id}-{item_type}-{snapshot_item_count + 1}",
349
+ "type": item_type,
350
+ }
351
+ items.append(item)
352
+ item["content"] = f"{item.get('content') or ''}{content}"
353
+
354
+
199
355
  USER_VISIBLE_RUN_ERROR_TITLE = "Request failed"
200
356
  USER_VISIBLE_RUN_ERROR_MESSAGE = "Check the model connection settings and try again."
201
357
  USER_VISIBLE_CONTEXT_OPTIMIZATION_ERROR_MESSAGE = "Context could not be optimized."
@@ -511,16 +667,22 @@ def is_context_marker(message: StoredMessage) -> bool:
511
667
  return message.content in {COMPACTED_CONTEXT_MARKER, OPTIMIZED_CONTEXT_MARKER}
512
668
 
513
669
 
514
- def auto_compact_token_limit() -> int:
670
+ def auto_compact_token_limit(context_window: int) -> int:
515
671
  raw_limit = os.environ.get("FLOWENT_AUTO_COMPACT_TOKEN_LIMIT", "")
672
+ if not raw_limit:
673
+ return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
516
674
  try:
517
675
  return max(0, int(raw_limit))
518
676
  except ValueError:
519
- return DEFAULT_AUTO_COMPACT_TOKEN_LIMIT
677
+ return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
520
678
 
521
679
 
522
- def should_auto_compact(messages: list[ChatMessage]) -> bool:
523
- token_limit = auto_compact_token_limit()
680
+ def should_auto_compact(
681
+ messages: list[ChatMessage],
682
+ *,
683
+ context_window: int,
684
+ ) -> bool:
685
+ token_limit = auto_compact_token_limit(context_window)
524
686
  if token_limit <= 0:
525
687
  return False
526
688
  return (
@@ -543,19 +705,40 @@ def usage_event_data(usage_info: TokenUsageInfo) -> dict[str, object]:
543
705
  return {"usage_info": usage_info.model_dump()}
544
706
 
545
707
 
708
+ def update_context_usage_for_response(
709
+ usage_info: TokenUsageInfo | None,
710
+ *,
711
+ messages: Sequence[Mapping[str, object]],
712
+ output_content: str,
713
+ model_context_window: int,
714
+ ) -> TokenUsageInfo:
715
+ return recompute_context_usage(
716
+ usage_info,
717
+ estimated_token_usage_for_messages(
718
+ model_visible_messages_for_usage(messages),
719
+ output_content=output_content,
720
+ ).total_tokens,
721
+ model_context_window=model_context_window,
722
+ )
723
+
724
+
546
725
  def usage_info_for_model(
547
726
  usage_info: TokenUsageInfo | None,
548
- model_name: str | None,
727
+ model_context_window: int,
549
728
  ) -> TokenUsageInfo | None:
550
729
  if usage_info is None:
551
730
  return None
552
- return usage_info.model_copy(
553
- update={"model_context_window": current_model_context_window(model_name)}
554
- )
731
+ return usage_info.model_copy(update={"model_context_window": model_context_window})
732
+
733
+
734
+ def context_window_for_settings(settings: StoredSettings) -> int:
735
+ if settings.context_window_limit is not None:
736
+ return settings.context_window_limit
737
+ return current_model_context_window(settings.selected_model)
555
738
 
556
739
 
557
740
  def state_with_current_model_context_window(state: StoredState) -> StoredState:
558
- selected_model = state.settings.selected_model
741
+ model_context_window = context_window_for_settings(state.settings)
559
742
  return state.model_copy(
560
743
  update={
561
744
  "messages": [
@@ -563,7 +746,7 @@ def state_with_current_model_context_window(state: StoredState) -> StoredState:
563
746
  update={
564
747
  "usage_info": usage_info_for_model(
565
748
  message.usage_info,
566
- selected_model,
749
+ model_context_window,
567
750
  )
568
751
  }
569
752
  )
@@ -571,7 +754,10 @@ def state_with_current_model_context_window(state: StoredState) -> StoredState:
571
754
  else message
572
755
  for message in state.messages
573
756
  ],
574
- "usage_info": usage_info_for_model(state.usage_info, selected_model),
757
+ "usage_info": usage_info_for_model(
758
+ state.usage_info,
759
+ model_context_window,
760
+ ),
575
761
  }
576
762
  )
577
763
 
@@ -671,6 +857,7 @@ def create_app(
671
857
  telegram_bot_manager: TelegramBotManager | None = None
672
858
  workspace_runs: dict[str, WorkspaceRun] = {}
673
859
  active_workspace_run_id: str | None = None
860
+ workspace_generation = 0
674
861
  active_compact_task: WorkspaceCompactTask | None = None
675
862
 
676
863
  static_dir = frontend_static_directory().resolve(strict=False)
@@ -702,6 +889,7 @@ def create_app(
702
889
  async def save_context_checkpoint(
703
890
  *,
704
891
  connection: ProviderConnection,
892
+ context_window_limit: int,
705
893
  messages: list[StoredMessage],
706
894
  model_history: list[ChatMessage],
707
895
  marker_content: str,
@@ -723,12 +911,12 @@ def create_app(
723
911
  usage_info = append_token_usage(
724
912
  usage_info,
725
913
  compact_result.summary_usage,
726
- model_context_window=current_model_context_window(connection.model),
914
+ model_context_window=context_window_limit,
727
915
  )
728
916
  usage_info = recompute_context_usage(
729
917
  usage_info,
730
918
  compact_result.token_after,
731
- model_context_window=current_model_context_window(connection.model),
919
+ model_context_window=context_window_limit,
732
920
  )
733
921
  store.save_usage_info(usage_info)
734
922
  marker = StoredMessage(
@@ -767,16 +955,21 @@ def create_app(
767
955
  async def auto_compact_workspace_messages(
768
956
  *,
769
957
  connection: ProviderConnection,
958
+ context_window_limit: int,
770
959
  messages: list[StoredMessage],
771
960
  model_history: list[ChatMessage],
772
961
  source_message_id: str | None = None,
773
962
  ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
774
- if not should_auto_compact(model_history):
963
+ if not should_auto_compact(
964
+ model_history,
965
+ context_window=context_window_limit,
966
+ ):
775
967
  return None
776
968
  logger.info("Workspace auto compact requested")
777
969
  try:
778
970
  return await save_context_checkpoint(
779
971
  connection=connection,
972
+ context_window_limit=context_window_limit,
780
973
  marker_content=OPTIMIZED_CONTEXT_MARKER,
781
974
  messages=messages,
782
975
  model_history=model_history,
@@ -790,6 +983,7 @@ def create_app(
790
983
  async def run_workspace_turn(content: str) -> StoredMessage:
791
984
  state = store.read_state()
792
985
  connection = selected_connection(state)
986
+ context_window_limit = context_window_for_settings(state.settings)
793
987
  user_message = StoredMessage(
794
988
  author="user",
795
989
  content=content,
@@ -807,6 +1001,7 @@ def create_app(
807
1001
  ]
808
1002
  auto_compaction = await auto_compact_workspace_messages(
809
1003
  connection=connection,
1004
+ context_window_limit=context_window_limit,
810
1005
  messages=state.messages,
811
1006
  model_history=model_history,
812
1007
  source_message_id=None,
@@ -873,12 +1068,15 @@ def create_app(
873
1068
  if event.event == "usage":
874
1069
  usage_data = event.data.get("usage")
875
1070
  if isinstance(usage_data, dict):
876
- usage_info = append_token_usage(
877
- store.read_usage_info(),
878
- TokenUsage.model_validate(usage_data),
879
- model_context_window=current_model_context_window(
880
- connection.model
1071
+ usage_info = update_context_usage_for_response(
1072
+ append_token_usage(
1073
+ store.read_usage_info(),
1074
+ TokenUsage.model_validate(usage_data),
1075
+ model_context_window=context_window_limit,
881
1076
  ),
1077
+ messages=request_messages,
1078
+ output_content=assistant_output.content,
1079
+ model_context_window=context_window_limit,
882
1080
  )
883
1081
  store.save_usage_info(usage_info)
884
1082
  turn_usage_info = usage_info
@@ -899,15 +1097,20 @@ def create_app(
899
1097
 
900
1098
  final_usage_info = turn_usage_info
901
1099
  if final_usage_info is None:
902
- final_usage_info = recompute_context_usage(
1100
+ final_usage_info = update_context_usage_for_response(
903
1101
  store.read_usage_info(),
904
- estimated_token_usage_for_messages(
905
- model_visible_messages_for_usage(request_messages),
906
- output_content=assistant_output.content,
907
- ).total_tokens,
908
- model_context_window=current_model_context_window(connection.model),
1102
+ messages=request_messages,
1103
+ output_content=assistant_output.content,
1104
+ model_context_window=context_window_limit,
909
1105
  )
910
- store.save_usage_info(final_usage_info)
1106
+ else:
1107
+ final_usage_info = update_context_usage_for_response(
1108
+ final_usage_info,
1109
+ messages=request_messages,
1110
+ output_content=assistant_output.content,
1111
+ model_context_window=context_window_limit,
1112
+ )
1113
+ store.save_usage_info(final_usage_info)
911
1114
 
912
1115
  assistant_message = StoredMessage(
913
1116
  author="assistant",
@@ -931,6 +1134,58 @@ def create_app(
931
1134
  telegram_transport=telegram_transport,
932
1135
  )
933
1136
 
1137
+ async def gather_shutdown_tasks(
1138
+ label: str, tasks: Sequence[asyncio.Task[Any]]
1139
+ ) -> None:
1140
+ if not tasks:
1141
+ return
1142
+ results = await asyncio.gather(*tasks, return_exceptions=True)
1143
+ for result in results:
1144
+ if result is None or isinstance(result, asyncio.CancelledError):
1145
+ continue
1146
+ if isinstance(result, BaseException):
1147
+ logger.error(
1148
+ "%s cleanup task failed",
1149
+ label,
1150
+ exc_info=(type(result), result, result.__traceback__),
1151
+ )
1152
+
1153
+ async def stop_workspace_runs_for_shutdown() -> None:
1154
+ tasks: list[asyncio.Task[None]] = []
1155
+ for run in workspace_runs.values():
1156
+ if run.task is None or run.task.done():
1157
+ continue
1158
+ run.task.cancel()
1159
+ tasks.append(run.task)
1160
+ await gather_shutdown_tasks("Workspace run", tasks)
1161
+
1162
+ async def stop_workspace_compact_for_shutdown() -> None:
1163
+ nonlocal active_compact_task
1164
+ if active_compact_task is None:
1165
+ store.save_is_compacting(False)
1166
+ return
1167
+ task = active_compact_task.task
1168
+ active_compact_task = None
1169
+ if not task.done():
1170
+ task.cancel()
1171
+ await gather_shutdown_tasks("Workspace compact", [task])
1172
+ store.save_is_compacting(False)
1173
+
1174
+ async def run_shutdown_step(label: str, cleanup: Awaitable[object]) -> None:
1175
+ try:
1176
+ await cleanup
1177
+ except Exception:
1178
+ logger.exception("%s cleanup failed during shutdown", label)
1179
+
1180
+ async def graceful_shutdown() -> None:
1181
+ await run_shutdown_step("Workspace run", stop_workspace_runs_for_shutdown())
1182
+ await run_shutdown_step(
1183
+ "Workspace compact", stop_workspace_compact_for_shutdown()
1184
+ )
1185
+ if telegram_bot_manager is not None:
1186
+ await run_shutdown_step("Telegram", telegram_bot_manager.stop_all())
1187
+ await run_shutdown_step("MCP", mcp_manager.stop_all())
1188
+
934
1189
  @asynccontextmanager
935
1190
  async def lifespan(app: FastAPI) -> AsyncIterator[None]:
936
1191
  app.state.mcp_manager = mcp_manager
@@ -941,9 +1196,7 @@ def create_app(
941
1196
  try:
942
1197
  yield
943
1198
  finally:
944
- if telegram_bot_manager is not None:
945
- await telegram_bot_manager.stop_all()
946
- await mcp_manager.stop_all()
1199
+ await graceful_shutdown()
947
1200
 
948
1201
  app = FastAPI(title="Flowent", lifespan=lifespan)
949
1202
  app.state.mcp_manager = mcp_manager
@@ -1096,14 +1349,23 @@ def create_app(
1096
1349
  async def save_workspace_messages(
1097
1350
  request: WorkspaceMessagesRequest,
1098
1351
  ) -> WorkspaceMessagesRequest:
1352
+ return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
1353
+
1354
+ @app.post("/api/workspace/clear")
1355
+ async def clear_workspace() -> WorkspaceClearResponse:
1099
1356
  nonlocal active_workspace_run_id
1100
- if not request.messages:
1101
- run = active_workspace_run()
1102
- if run is not None and run.task is not None and not run.task.done():
1357
+ nonlocal workspace_generation
1358
+ workspace_generation += 1
1359
+ for run in workspace_runs.values():
1360
+ run.is_done = True
1361
+ if run.task is not None and not run.task.done():
1103
1362
  run.discard_on_cancel = True
1104
1363
  run.task.cancel()
1105
- active_workspace_run_id = None
1106
- return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
1364
+ async with run.condition:
1365
+ run.condition.notify_all()
1366
+ active_workspace_run_id = None
1367
+ messages = store.save_messages([])
1368
+ return WorkspaceClearResponse(messages=messages)
1107
1369
 
1108
1370
  async def append_run_event(
1109
1371
  run: WorkspaceRun, event: str, data: dict[str, object]
@@ -1112,15 +1374,42 @@ def create_app(
1112
1374
  run.events.append((run.latest_event_index + 1, event, data))
1113
1375
  run.condition.notify_all()
1114
1376
 
1377
+ async def append_run_snapshot(run: WorkspaceRun, message: StoredMessage) -> None:
1378
+ if message.author != "assistant":
1379
+ return
1380
+ run.latest_snapshot = message
1381
+ await append_run_event(
1382
+ run,
1383
+ "snapshot",
1384
+ {"message": stream_message_data(message, run.active_output)},
1385
+ )
1386
+
1115
1387
  def active_workspace_run() -> WorkspaceRun | None:
1116
1388
  if active_workspace_run_id is None:
1117
1389
  return None
1118
- return workspace_runs.get(active_workspace_run_id)
1390
+ run = workspace_runs.get(active_workspace_run_id)
1391
+ if run is None or run.is_done:
1392
+ return None
1393
+ return run
1394
+
1395
+ def has_active_workspace_run() -> bool:
1396
+ return any(
1397
+ not run.is_done and run.task is not None and not run.task.done()
1398
+ for run in workspace_runs.values()
1399
+ )
1119
1400
 
1120
1401
  def create_workspace_run(content: str) -> WorkspaceRun:
1121
1402
  nonlocal active_workspace_run_id
1403
+ if has_active_workspace_run():
1404
+ active_run = active_workspace_run()
1405
+ raise HTTPException(
1406
+ status_code=409,
1407
+ detail="Response in progress",
1408
+ headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
1409
+ )
1122
1410
  state = store.read_state()
1123
1411
  connection = selected_connection(state)
1412
+ context_window_limit = context_window_for_settings(state.settings)
1124
1413
 
1125
1414
  user_message = StoredMessage(
1126
1415
  author="user",
@@ -1129,7 +1418,10 @@ def create_app(
1129
1418
  )
1130
1419
  next_messages = [*state.messages, user_message]
1131
1420
  store.save_messages(next_messages)
1132
- run = WorkspaceRun(condition=asyncio.Condition())
1421
+ run = WorkspaceRun(
1422
+ condition=asyncio.Condition(),
1423
+ generation=workspace_generation,
1424
+ )
1133
1425
  workspace_runs[run.id] = run
1134
1426
  active_workspace_run_id = run.id
1135
1427
 
@@ -1143,9 +1435,17 @@ def create_app(
1143
1435
  status="running",
1144
1436
  )
1145
1437
  assistant_output = AssistantOutputBuilder(assistant_message.id)
1438
+ last_progress_flush_at = 0.0
1146
1439
 
1147
- def persist_assistant(status: str = "running") -> None:
1440
+ def is_current_generation() -> bool:
1441
+ return run.generation == workspace_generation
1442
+
1443
+ def update_assistant_message(
1444
+ status: str = "running", *, persist: bool
1445
+ ) -> StoredMessage | None:
1148
1446
  nonlocal next_messages, assistant_message
1447
+ if not is_current_generation() or run.discard_on_cancel:
1448
+ return None
1149
1449
  assistant_message = StoredMessage(
1150
1450
  author="assistant",
1151
1451
  content=assistant_output.content,
@@ -1159,7 +1459,32 @@ def create_app(
1159
1459
  next_messages = append_or_replace_message(
1160
1460
  next_messages, assistant_message
1161
1461
  )
1162
- store.upsert_message(assistant_message)
1462
+ if persist:
1463
+ store.upsert_message(assistant_message)
1464
+ return assistant_message
1465
+
1466
+ def persist_assistant(status: str = "running") -> StoredMessage | None:
1467
+ nonlocal last_progress_flush_at
1468
+ message = update_assistant_message(status, persist=True)
1469
+ if status == "running" and message is not None:
1470
+ last_progress_flush_at = time.monotonic()
1471
+ return message
1472
+
1473
+ def refresh_assistant(status: str = "running") -> StoredMessage | None:
1474
+ return update_assistant_message(status, persist=False)
1475
+
1476
+ def persist_assistant_progress() -> StoredMessage | None:
1477
+ nonlocal last_progress_flush_at
1478
+ now = time.monotonic()
1479
+ if (
1480
+ last_progress_flush_at > 0
1481
+ and now - last_progress_flush_at
1482
+ < WORKSPACE_PROGRESS_FLUSH_INTERVAL_SECONDS
1483
+ ):
1484
+ refresh_assistant()
1485
+ return None
1486
+ last_progress_flush_at = now
1487
+ return update_assistant_message("running", persist=True)
1163
1488
 
1164
1489
  try:
1165
1490
  current_tool_id: str | None = None
@@ -1176,6 +1501,7 @@ def create_app(
1176
1501
  )
1177
1502
  auto_compaction = await auto_compact_workspace_messages(
1178
1503
  connection=connection,
1504
+ context_window_limit=context_window_limit,
1179
1505
  messages=state.messages,
1180
1506
  model_history=[
1181
1507
  ChatMessage.model_validate(message)
@@ -1232,6 +1558,8 @@ def create_app(
1232
1558
  conversation: Sequence[Mapping[str, object]],
1233
1559
  ) -> AgentContextUpdate | None:
1234
1560
  nonlocal next_messages
1561
+ if not is_current_generation() or run.discard_on_cancel:
1562
+ return None
1235
1563
  assistant_snapshot = StoredMessage(
1236
1564
  author="assistant",
1237
1565
  content=assistant_output.content,
@@ -1267,6 +1595,7 @@ def create_app(
1267
1595
  )
1268
1596
  auto_result = await auto_compact_workspace_messages(
1269
1597
  connection=connection,
1598
+ context_window_limit=context_window_limit,
1270
1599
  messages=next_messages,
1271
1600
  model_history=model_history,
1272
1601
  source_message_id=assistant_snapshot.id,
@@ -1304,6 +1633,11 @@ def create_app(
1304
1633
  messages=current_request_messages,
1305
1634
  tool_runner=tool_runner,
1306
1635
  ):
1636
+ if not is_current_generation() or run.discard_on_cancel:
1637
+ raise asyncio.CancelledError
1638
+ run_event_data = event.data
1639
+ should_append_run_event = event.event != "usage"
1640
+ snapshot_after_event: StoredMessage | None = None
1307
1641
  if event.event == "start":
1308
1642
  event_id = event.data.get("id")
1309
1643
  if isinstance(event_id, str):
@@ -1311,20 +1645,24 @@ def create_app(
1311
1645
  update={"id": event_id}
1312
1646
  )
1313
1647
  assistant_output.set_assistant_id(event_id)
1314
- persist_assistant()
1648
+ snapshot_after_event = persist_assistant()
1315
1649
  if event.event == "output_start":
1316
1650
  index = event.data.get("index")
1317
1651
  if isinstance(index, int):
1652
+ run.active_output = None
1318
1653
  assistant_output.start_group(index)
1319
- persist_assistant()
1654
+ snapshot_after_event = persist_assistant()
1655
+ if event.event == "output_done":
1656
+ run.active_output = None
1320
1657
  if event.event == "tool_start":
1321
1658
  tool = event.data.get("tool")
1322
1659
  if isinstance(tool, dict) and isinstance(tool.get("id"), str):
1660
+ run.active_output = None
1323
1661
  current_tool_id = tool["id"]
1324
1662
  assistant_output.start_tool(
1325
1663
  StoredToolItem.model_validate(tool)
1326
1664
  )
1327
- persist_assistant()
1665
+ snapshot_after_event = persist_assistant()
1328
1666
  if event.event in {"tool_done", "tool_error"}:
1329
1667
  tool_id = event.data.get("id")
1330
1668
  if (
@@ -1335,34 +1673,37 @@ def create_app(
1335
1673
  None if current_tool_id == tool_id else current_tool_id
1336
1674
  )
1337
1675
  assistant_output.update_tool(tool_id, event.data)
1338
- persist_assistant()
1676
+ snapshot_after_event = persist_assistant()
1339
1677
  if event.event == "delta":
1678
+ run.active_output = "text"
1340
1679
  assistant_output.append_text(
1341
1680
  str(event.data.get("content") or "")
1342
1681
  )
1343
- persist_assistant()
1682
+ snapshot_after_event = persist_assistant_progress()
1344
1683
  if event.event == "thinking_delta":
1684
+ run.active_output = "thinking"
1345
1685
  assistant_output.append_thinking(
1346
1686
  str(event.data.get("content") or "")
1347
1687
  )
1348
- persist_assistant()
1688
+ snapshot_after_event = persist_assistant_progress()
1349
1689
  if event.event == "usage":
1350
1690
  usage_data = event.data.get("usage")
1351
1691
  if isinstance(usage_data, dict):
1352
- usage_info = append_token_usage(
1353
- store.read_usage_info(),
1354
- TokenUsage.model_validate(usage_data),
1355
- model_context_window=current_model_context_window(
1356
- connection.model
1692
+ usage_info = update_context_usage_for_response(
1693
+ append_token_usage(
1694
+ store.read_usage_info(),
1695
+ TokenUsage.model_validate(usage_data),
1696
+ model_context_window=context_window_limit,
1357
1697
  ),
1698
+ messages=current_request_messages,
1699
+ output_content=assistant_output.content,
1700
+ model_context_window=context_window_limit,
1358
1701
  )
1359
1702
  store.save_usage_info(usage_info)
1360
1703
  turn_usage_info = usage_info
1361
- await append_run_event(
1362
- run,
1363
- "usage",
1364
- usage_event_data(usage_info),
1365
- )
1704
+ run_event_data = usage_event_data(usage_info)
1705
+ should_append_run_event = True
1706
+ snapshot_after_event = persist_assistant()
1366
1707
  logger.log(
1367
1708
  TRACE_LEVEL,
1368
1709
  "Workspace stream event=%s data=%r",
@@ -1372,34 +1713,44 @@ def create_app(
1372
1713
  if event.event == "done":
1373
1714
  message = event.data.get("message")
1374
1715
  if isinstance(message, dict):
1716
+ run.active_output = None
1375
1717
  assistant_output.apply_done_message(message)
1376
1718
  response_usage_info = store.read_usage_info()
1377
1719
  final_usage_info = turn_usage_info
1378
1720
  if final_usage_info is None:
1379
- final_usage_info = recompute_context_usage(
1721
+ final_usage_info = update_context_usage_for_response(
1380
1722
  response_usage_info,
1381
- estimated_token_usage_for_messages(
1382
- model_visible_messages_for_usage(
1383
- current_request_messages
1384
- ),
1385
- output_content=assistant_output.content,
1386
- ).total_tokens,
1387
- model_context_window=current_model_context_window(
1388
- connection.model
1389
- ),
1723
+ messages=current_request_messages,
1724
+ output_content=assistant_output.content,
1725
+ model_context_window=context_window_limit,
1390
1726
  )
1391
- store.save_usage_info(final_usage_info)
1392
- if final_usage_info == response_usage_info:
1393
- assistant_message = assistant_message.model_copy(
1394
- update={"usage_info": final_usage_info}
1727
+ else:
1728
+ final_usage_info = update_context_usage_for_response(
1729
+ final_usage_info,
1730
+ messages=current_request_messages,
1731
+ output_content=assistant_output.content,
1732
+ model_context_window=context_window_limit,
1395
1733
  )
1396
- persist_assistant("completed")
1397
- if event.event != "usage":
1398
- await append_run_event(run, event.event, event.data)
1734
+ store.save_usage_info(final_usage_info)
1735
+ snapshot_after_event = persist_assistant("completed")
1736
+ if snapshot_after_event is not None:
1737
+ run_event_data = {
1738
+ "message": stream_message_data(snapshot_after_event)
1739
+ }
1740
+ if event.event == "done" and snapshot_after_event is not None:
1741
+ await append_run_snapshot(run, snapshot_after_event)
1742
+ await append_run_event(run, event.event, run_event_data)
1743
+ else:
1744
+ if should_append_run_event:
1745
+ await append_run_event(run, event.event, run_event_data)
1746
+ if snapshot_after_event is not None:
1747
+ await append_run_snapshot(run, snapshot_after_event)
1399
1748
  except asyncio.CancelledError:
1400
1749
  logger.info("Workspace run stopped")
1401
1750
  if not run.discard_on_cancel:
1402
- persist_assistant("interrupted")
1751
+ interrupted_snapshot = persist_assistant("interrupted")
1752
+ if interrupted_snapshot is not None:
1753
+ await append_run_snapshot(run, interrupted_snapshot)
1403
1754
  await append_run_event(
1404
1755
  run,
1405
1756
  "error",
@@ -1423,7 +1774,9 @@ def create_app(
1423
1774
  str(error) or EMPTY_MODEL_RESPONSE_DETAIL,
1424
1775
  )
1425
1776
  )
1426
- persist_assistant("failed")
1777
+ failed_snapshot = persist_assistant("failed")
1778
+ if failed_snapshot is not None:
1779
+ await append_run_snapshot(run, failed_snapshot)
1427
1780
  await append_run_event(run, "error", run_error_event_data(error_item))
1428
1781
  finally:
1429
1782
  run.is_done = True
@@ -1436,9 +1789,16 @@ def create_app(
1436
1789
  return run
1437
1790
 
1438
1791
  async def workspace_run_stream(
1439
- run: WorkspaceRun, after: int = 0
1792
+ run: WorkspaceRun, after: int = 0, include_snapshots: bool = True
1440
1793
  ) -> AsyncIterator[str]:
1441
1794
  next_event_index = after + 1
1795
+ reconnect_snapshot = run_snapshot_data_at(run, after) if after > 0 else None
1796
+ if include_snapshots and reconnect_snapshot is not None:
1797
+ yield stream_event(
1798
+ "snapshot",
1799
+ {"message": reconnect_snapshot},
1800
+ event_id=after,
1801
+ )
1442
1802
  while True:
1443
1803
  async with run.condition:
1444
1804
 
@@ -1452,7 +1812,9 @@ def create_app(
1452
1812
 
1453
1813
  for index, event, data in events:
1454
1814
  next_event_index = index + 1
1455
- yield stream_event(event, data)
1815
+ if event == "snapshot" and not include_snapshots:
1816
+ continue
1817
+ yield stream_event(event, data, event_id=index)
1456
1818
  if event in {"done", "error"}:
1457
1819
  return
1458
1820
 
@@ -1498,6 +1860,7 @@ def create_app(
1498
1860
  *,
1499
1861
  checkpoint: StoredCompactionCheckpoint | None,
1500
1862
  connection: ProviderConnection,
1863
+ context_window_limit: int,
1501
1864
  state: StoredState,
1502
1865
  ) -> tuple[StoredMessage, TokenUsageInfo]:
1503
1866
  logger.info("Workspace compact requested")
@@ -1513,6 +1876,7 @@ def create_app(
1513
1876
 
1514
1877
  marker, _, usage_info = await save_context_checkpoint(
1515
1878
  connection=connection,
1879
+ context_window_limit=context_window_limit,
1516
1880
  marker_content=COMPACTED_CONTEXT_MARKER,
1517
1881
  messages=state.messages,
1518
1882
  model_history=model_history,
@@ -1551,12 +1915,14 @@ def create_app(
1551
1915
  )
1552
1916
  state = store.read_state()
1553
1917
  connection = selected_connection(state)
1918
+ context_window_limit = context_window_for_settings(state.settings)
1554
1919
  checkpoint = store.read_active_compaction_checkpoint()
1555
1920
  store.save_is_compacting(True)
1556
1921
  compact_task = asyncio.create_task(
1557
1922
  run_manual_compact(
1558
1923
  checkpoint=checkpoint,
1559
1924
  connection=connection,
1925
+ context_window_limit=context_window_limit,
1560
1926
  state=state,
1561
1927
  )
1562
1928
  )
@@ -1596,7 +1962,7 @@ def create_app(
1596
1962
  logger.log(TRACE_LEVEL, "Workspace user content=%r", request.content)
1597
1963
  run = create_workspace_run(request.content)
1598
1964
  return StreamingResponse(
1599
- workspace_run_stream(run),
1965
+ workspace_run_stream(run, include_snapshots=False),
1600
1966
  media_type="text/event-stream",
1601
1967
  )
1602
1968