flowent 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "flowent"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  description = "A workflow orchestration platform for multi-agent collaboration"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -12,7 +12,7 @@ from flowent.llm import (
12
12
  ChatMessage,
13
13
  CompletionCallable,
14
14
  ProviderConnection,
15
- complete_chat,
15
+ stream_chat,
16
16
  )
17
17
 
18
18
  logger = logging.getLogger("flowent.approval")
@@ -128,7 +128,8 @@ async def review_approval_request(
128
128
  completion: CompletionCallable | None = None,
129
129
  ) -> ApprovalReviewDecision:
130
130
  try:
131
- message = await complete_chat(
131
+ content = ""
132
+ async for delta in stream_chat(
132
133
  connection,
133
134
  [
134
135
  ChatMessage(role="system", content=APPROVAL_REVIEWER_PROMPT),
@@ -138,8 +139,9 @@ async def review_approval_request(
138
139
  ),
139
140
  ],
140
141
  completion=completion,
141
- )
142
- return parse_review_decision(message.content)
142
+ ):
143
+ content += delta
144
+ return parse_review_decision(content)
143
145
  except Exception as error:
144
146
  logger.warning("Approval reviewer denied request after failure: %s", error)
145
147
  return ApprovalReviewDecision(
@@ -85,7 +85,7 @@ logger = logging.getLogger("flowent.main")
85
85
  DEFAULT_STATIC_DIR = Path(__file__).parent / "static"
86
86
  COMPACTED_CONTEXT_MARKER = "Context compacted"
87
87
  OPTIMIZED_CONTEXT_MARKER = "Context optimized"
88
- DEFAULT_AUTO_COMPACT_TOKEN_LIMIT = 120_000
88
+ DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO = 0.95
89
89
  AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
90
90
  APPROVAL_TRANSCRIPT_MESSAGE_LIMIT = 12
91
91
  APPROVAL_TRANSCRIPT_TEXT_LIMIT = 2_000
@@ -121,6 +121,14 @@ class WorkspaceRunResponse(BaseModel):
121
121
  run_id: str
122
122
 
123
123
 
124
+ class WorkspaceClearResponse(BaseModel):
125
+ model_config = ConfigDict(extra="forbid")
126
+
127
+ active_run_id: str | None = None
128
+ messages: list[StoredMessage]
129
+ usage_info: TokenUsageInfo | None = None
130
+
131
+
124
132
  @dataclass
125
133
  class WorkspaceCompactTask:
126
134
  task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]]
@@ -174,8 +182,10 @@ class WorkspaceRun:
174
182
  condition: asyncio.Condition
175
183
  discard_on_cancel: bool = False
176
184
  events: list[tuple[int, str, dict[str, object]]] = field(default_factory=list)
185
+ generation: int = 0
177
186
  id: str = field(default_factory=lambda: str(uuid4()))
178
187
  is_done: bool = False
188
+ latest_snapshot: StoredMessage | None = None
179
189
  task: asyncio.Task[None] | None = None
180
190
 
181
191
  @property
@@ -183,8 +193,15 @@ class WorkspaceRun:
183
193
  return self.events[-1][0] if self.events else 0
184
194
 
185
195
 
186
- def stream_event(event: str, data: dict[str, object]) -> str:
187
- return f"event: {event}\ndata: {json.dumps(data)}\n\n"
196
+ def stream_event(
197
+ event: str, data: dict[str, object], event_id: int | None = None
198
+ ) -> str:
199
+ id_line = f"id: {event_id}\n" if event_id is not None else ""
200
+ return f"{id_line}event: {event}\ndata: {json.dumps(data)}\n\n"
201
+
202
+
203
+ def stream_message_data(message: StoredMessage) -> dict[str, object]:
204
+ return {**message.model_dump(), "status": message.status}
188
205
 
189
206
 
190
207
  def append_or_replace_message(
@@ -196,6 +213,18 @@ def append_or_replace_message(
196
213
  ]
197
214
 
198
215
 
216
+ def run_snapshot_data_at(
217
+ run: WorkspaceRun, event_index: int
218
+ ) -> dict[str, object] | None:
219
+ for current_event_index, event, data in reversed(run.events):
220
+ if current_event_index > event_index or event != "snapshot":
221
+ continue
222
+ message = data.get("message")
223
+ if isinstance(message, dict):
224
+ return message
225
+ return None
226
+
227
+
199
228
  USER_VISIBLE_RUN_ERROR_TITLE = "Request failed"
200
229
  USER_VISIBLE_RUN_ERROR_MESSAGE = "Check the model connection settings and try again."
201
230
  USER_VISIBLE_CONTEXT_OPTIMIZATION_ERROR_MESSAGE = "Context could not be optimized."
@@ -511,16 +540,22 @@ def is_context_marker(message: StoredMessage) -> bool:
511
540
  return message.content in {COMPACTED_CONTEXT_MARKER, OPTIMIZED_CONTEXT_MARKER}
512
541
 
513
542
 
514
- def auto_compact_token_limit() -> int:
543
+ def auto_compact_token_limit(context_window: int) -> int:
515
544
  raw_limit = os.environ.get("FLOWENT_AUTO_COMPACT_TOKEN_LIMIT", "")
545
+ if not raw_limit:
546
+ return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
516
547
  try:
517
548
  return max(0, int(raw_limit))
518
549
  except ValueError:
519
- return DEFAULT_AUTO_COMPACT_TOKEN_LIMIT
550
+ return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
520
551
 
521
552
 
522
- def should_auto_compact(messages: list[ChatMessage]) -> bool:
523
- token_limit = auto_compact_token_limit()
553
+ def should_auto_compact(
554
+ messages: list[ChatMessage],
555
+ *,
556
+ context_window: int,
557
+ ) -> bool:
558
+ token_limit = auto_compact_token_limit(context_window)
524
559
  if token_limit <= 0:
525
560
  return False
526
561
  return (
@@ -543,19 +578,40 @@ def usage_event_data(usage_info: TokenUsageInfo) -> dict[str, object]:
543
578
  return {"usage_info": usage_info.model_dump()}
544
579
 
545
580
 
581
+ def update_context_usage_for_response(
582
+ usage_info: TokenUsageInfo | None,
583
+ *,
584
+ messages: Sequence[Mapping[str, object]],
585
+ output_content: str,
586
+ model_context_window: int,
587
+ ) -> TokenUsageInfo:
588
+ return recompute_context_usage(
589
+ usage_info,
590
+ estimated_token_usage_for_messages(
591
+ model_visible_messages_for_usage(messages),
592
+ output_content=output_content,
593
+ ).total_tokens,
594
+ model_context_window=model_context_window,
595
+ )
596
+
597
+
546
598
  def usage_info_for_model(
547
599
  usage_info: TokenUsageInfo | None,
548
- model_name: str | None,
600
+ model_context_window: int,
549
601
  ) -> TokenUsageInfo | None:
550
602
  if usage_info is None:
551
603
  return None
552
- return usage_info.model_copy(
553
- update={"model_context_window": current_model_context_window(model_name)}
554
- )
604
+ return usage_info.model_copy(update={"model_context_window": model_context_window})
605
+
606
+
607
+ def context_window_for_settings(settings: StoredSettings) -> int:
608
+ if settings.context_window_limit is not None:
609
+ return settings.context_window_limit
610
+ return current_model_context_window(settings.selected_model)
555
611
 
556
612
 
557
613
  def state_with_current_model_context_window(state: StoredState) -> StoredState:
558
- selected_model = state.settings.selected_model
614
+ model_context_window = context_window_for_settings(state.settings)
559
615
  return state.model_copy(
560
616
  update={
561
617
  "messages": [
@@ -563,7 +619,7 @@ def state_with_current_model_context_window(state: StoredState) -> StoredState:
563
619
  update={
564
620
  "usage_info": usage_info_for_model(
565
621
  message.usage_info,
566
- selected_model,
622
+ model_context_window,
567
623
  )
568
624
  }
569
625
  )
@@ -571,7 +627,10 @@ def state_with_current_model_context_window(state: StoredState) -> StoredState:
571
627
  else message
572
628
  for message in state.messages
573
629
  ],
574
- "usage_info": usage_info_for_model(state.usage_info, selected_model),
630
+ "usage_info": usage_info_for_model(
631
+ state.usage_info,
632
+ model_context_window,
633
+ ),
575
634
  }
576
635
  )
577
636
 
@@ -671,6 +730,7 @@ def create_app(
671
730
  telegram_bot_manager: TelegramBotManager | None = None
672
731
  workspace_runs: dict[str, WorkspaceRun] = {}
673
732
  active_workspace_run_id: str | None = None
733
+ workspace_generation = 0
674
734
  active_compact_task: WorkspaceCompactTask | None = None
675
735
 
676
736
  static_dir = frontend_static_directory().resolve(strict=False)
@@ -702,6 +762,7 @@ def create_app(
702
762
  async def save_context_checkpoint(
703
763
  *,
704
764
  connection: ProviderConnection,
765
+ context_window_limit: int,
705
766
  messages: list[StoredMessage],
706
767
  model_history: list[ChatMessage],
707
768
  marker_content: str,
@@ -723,12 +784,12 @@ def create_app(
723
784
  usage_info = append_token_usage(
724
785
  usage_info,
725
786
  compact_result.summary_usage,
726
- model_context_window=current_model_context_window(connection.model),
787
+ model_context_window=context_window_limit,
727
788
  )
728
789
  usage_info = recompute_context_usage(
729
790
  usage_info,
730
791
  compact_result.token_after,
731
- model_context_window=current_model_context_window(connection.model),
792
+ model_context_window=context_window_limit,
732
793
  )
733
794
  store.save_usage_info(usage_info)
734
795
  marker = StoredMessage(
@@ -767,16 +828,21 @@ def create_app(
767
828
  async def auto_compact_workspace_messages(
768
829
  *,
769
830
  connection: ProviderConnection,
831
+ context_window_limit: int,
770
832
  messages: list[StoredMessage],
771
833
  model_history: list[ChatMessage],
772
834
  source_message_id: str | None = None,
773
835
  ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
774
- if not should_auto_compact(model_history):
836
+ if not should_auto_compact(
837
+ model_history,
838
+ context_window=context_window_limit,
839
+ ):
775
840
  return None
776
841
  logger.info("Workspace auto compact requested")
777
842
  try:
778
843
  return await save_context_checkpoint(
779
844
  connection=connection,
845
+ context_window_limit=context_window_limit,
780
846
  marker_content=OPTIMIZED_CONTEXT_MARKER,
781
847
  messages=messages,
782
848
  model_history=model_history,
@@ -790,6 +856,7 @@ def create_app(
790
856
  async def run_workspace_turn(content: str) -> StoredMessage:
791
857
  state = store.read_state()
792
858
  connection = selected_connection(state)
859
+ context_window_limit = context_window_for_settings(state.settings)
793
860
  user_message = StoredMessage(
794
861
  author="user",
795
862
  content=content,
@@ -807,6 +874,7 @@ def create_app(
807
874
  ]
808
875
  auto_compaction = await auto_compact_workspace_messages(
809
876
  connection=connection,
877
+ context_window_limit=context_window_limit,
810
878
  messages=state.messages,
811
879
  model_history=model_history,
812
880
  source_message_id=None,
@@ -873,12 +941,15 @@ def create_app(
873
941
  if event.event == "usage":
874
942
  usage_data = event.data.get("usage")
875
943
  if isinstance(usage_data, dict):
876
- usage_info = append_token_usage(
877
- store.read_usage_info(),
878
- TokenUsage.model_validate(usage_data),
879
- model_context_window=current_model_context_window(
880
- connection.model
944
+ usage_info = update_context_usage_for_response(
945
+ append_token_usage(
946
+ store.read_usage_info(),
947
+ TokenUsage.model_validate(usage_data),
948
+ model_context_window=context_window_limit,
881
949
  ),
950
+ messages=request_messages,
951
+ output_content=assistant_output.content,
952
+ model_context_window=context_window_limit,
882
953
  )
883
954
  store.save_usage_info(usage_info)
884
955
  turn_usage_info = usage_info
@@ -899,15 +970,20 @@ def create_app(
899
970
 
900
971
  final_usage_info = turn_usage_info
901
972
  if final_usage_info is None:
902
- final_usage_info = recompute_context_usage(
973
+ final_usage_info = update_context_usage_for_response(
903
974
  store.read_usage_info(),
904
- estimated_token_usage_for_messages(
905
- model_visible_messages_for_usage(request_messages),
906
- output_content=assistant_output.content,
907
- ).total_tokens,
908
- model_context_window=current_model_context_window(connection.model),
975
+ messages=request_messages,
976
+ output_content=assistant_output.content,
977
+ model_context_window=context_window_limit,
909
978
  )
910
- store.save_usage_info(final_usage_info)
979
+ else:
980
+ final_usage_info = update_context_usage_for_response(
981
+ final_usage_info,
982
+ messages=request_messages,
983
+ output_content=assistant_output.content,
984
+ model_context_window=context_window_limit,
985
+ )
986
+ store.save_usage_info(final_usage_info)
911
987
 
912
988
  assistant_message = StoredMessage(
913
989
  author="assistant",
@@ -1096,14 +1172,23 @@ def create_app(
1096
1172
  async def save_workspace_messages(
1097
1173
  request: WorkspaceMessagesRequest,
1098
1174
  ) -> WorkspaceMessagesRequest:
1175
+ return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
1176
+
1177
+ @app.post("/api/workspace/clear")
1178
+ async def clear_workspace() -> WorkspaceClearResponse:
1099
1179
  nonlocal active_workspace_run_id
1100
- if not request.messages:
1101
- run = active_workspace_run()
1102
- if run is not None and run.task is not None and not run.task.done():
1180
+ nonlocal workspace_generation
1181
+ workspace_generation += 1
1182
+ for run in workspace_runs.values():
1183
+ run.is_done = True
1184
+ if run.task is not None and not run.task.done():
1103
1185
  run.discard_on_cancel = True
1104
1186
  run.task.cancel()
1105
- active_workspace_run_id = None
1106
- return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
1187
+ async with run.condition:
1188
+ run.condition.notify_all()
1189
+ active_workspace_run_id = None
1190
+ messages = store.save_messages([])
1191
+ return WorkspaceClearResponse(messages=messages)
1107
1192
 
1108
1193
  async def append_run_event(
1109
1194
  run: WorkspaceRun, event: str, data: dict[str, object]
@@ -1112,15 +1197,42 @@ def create_app(
1112
1197
  run.events.append((run.latest_event_index + 1, event, data))
1113
1198
  run.condition.notify_all()
1114
1199
 
1200
+ async def append_run_snapshot(run: WorkspaceRun, message: StoredMessage) -> None:
1201
+ if message.author != "assistant":
1202
+ return
1203
+ run.latest_snapshot = message
1204
+ await append_run_event(
1205
+ run,
1206
+ "snapshot",
1207
+ {"message": stream_message_data(message)},
1208
+ )
1209
+
1115
1210
  def active_workspace_run() -> WorkspaceRun | None:
1116
1211
  if active_workspace_run_id is None:
1117
1212
  return None
1118
- return workspace_runs.get(active_workspace_run_id)
1213
+ run = workspace_runs.get(active_workspace_run_id)
1214
+ if run is None or run.is_done:
1215
+ return None
1216
+ return run
1217
+
1218
+ def has_active_workspace_run() -> bool:
1219
+ return any(
1220
+ not run.is_done and run.task is not None and not run.task.done()
1221
+ for run in workspace_runs.values()
1222
+ )
1119
1223
 
1120
1224
  def create_workspace_run(content: str) -> WorkspaceRun:
1121
1225
  nonlocal active_workspace_run_id
1226
+ if has_active_workspace_run():
1227
+ active_run = active_workspace_run()
1228
+ raise HTTPException(
1229
+ status_code=409,
1230
+ detail="Response in progress",
1231
+ headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
1232
+ )
1122
1233
  state = store.read_state()
1123
1234
  connection = selected_connection(state)
1235
+ context_window_limit = context_window_for_settings(state.settings)
1124
1236
 
1125
1237
  user_message = StoredMessage(
1126
1238
  author="user",
@@ -1129,7 +1241,10 @@ def create_app(
1129
1241
  )
1130
1242
  next_messages = [*state.messages, user_message]
1131
1243
  store.save_messages(next_messages)
1132
- run = WorkspaceRun(condition=asyncio.Condition())
1244
+ run = WorkspaceRun(
1245
+ condition=asyncio.Condition(),
1246
+ generation=workspace_generation,
1247
+ )
1133
1248
  workspace_runs[run.id] = run
1134
1249
  active_workspace_run_id = run.id
1135
1250
 
@@ -1144,8 +1259,13 @@ def create_app(
1144
1259
  )
1145
1260
  assistant_output = AssistantOutputBuilder(assistant_message.id)
1146
1261
 
1147
- def persist_assistant(status: str = "running") -> None:
1262
+ def is_current_generation() -> bool:
1263
+ return run.generation == workspace_generation
1264
+
1265
+ def persist_assistant(status: str = "running") -> StoredMessage | None:
1148
1266
  nonlocal next_messages, assistant_message
1267
+ if not is_current_generation() or run.discard_on_cancel:
1268
+ return None
1149
1269
  assistant_message = StoredMessage(
1150
1270
  author="assistant",
1151
1271
  content=assistant_output.content,
@@ -1160,6 +1280,7 @@ def create_app(
1160
1280
  next_messages, assistant_message
1161
1281
  )
1162
1282
  store.upsert_message(assistant_message)
1283
+ return assistant_message
1163
1284
 
1164
1285
  try:
1165
1286
  current_tool_id: str | None = None
@@ -1176,6 +1297,7 @@ def create_app(
1176
1297
  )
1177
1298
  auto_compaction = await auto_compact_workspace_messages(
1178
1299
  connection=connection,
1300
+ context_window_limit=context_window_limit,
1179
1301
  messages=state.messages,
1180
1302
  model_history=[
1181
1303
  ChatMessage.model_validate(message)
@@ -1232,6 +1354,8 @@ def create_app(
1232
1354
  conversation: Sequence[Mapping[str, object]],
1233
1355
  ) -> AgentContextUpdate | None:
1234
1356
  nonlocal next_messages
1357
+ if not is_current_generation() or run.discard_on_cancel:
1358
+ return None
1235
1359
  assistant_snapshot = StoredMessage(
1236
1360
  author="assistant",
1237
1361
  content=assistant_output.content,
@@ -1267,6 +1391,7 @@ def create_app(
1267
1391
  )
1268
1392
  auto_result = await auto_compact_workspace_messages(
1269
1393
  connection=connection,
1394
+ context_window_limit=context_window_limit,
1270
1395
  messages=next_messages,
1271
1396
  model_history=model_history,
1272
1397
  source_message_id=assistant_snapshot.id,
@@ -1304,6 +1429,11 @@ def create_app(
1304
1429
  messages=current_request_messages,
1305
1430
  tool_runner=tool_runner,
1306
1431
  ):
1432
+ if not is_current_generation() or run.discard_on_cancel:
1433
+ raise asyncio.CancelledError
1434
+ run_event_data = event.data
1435
+ should_append_run_event = event.event != "usage"
1436
+ snapshot_after_event: StoredMessage | None = None
1307
1437
  if event.event == "start":
1308
1438
  event_id = event.data.get("id")
1309
1439
  if isinstance(event_id, str):
@@ -1311,12 +1441,12 @@ def create_app(
1311
1441
  update={"id": event_id}
1312
1442
  )
1313
1443
  assistant_output.set_assistant_id(event_id)
1314
- persist_assistant()
1444
+ snapshot_after_event = persist_assistant()
1315
1445
  if event.event == "output_start":
1316
1446
  index = event.data.get("index")
1317
1447
  if isinstance(index, int):
1318
1448
  assistant_output.start_group(index)
1319
- persist_assistant()
1449
+ snapshot_after_event = persist_assistant()
1320
1450
  if event.event == "tool_start":
1321
1451
  tool = event.data.get("tool")
1322
1452
  if isinstance(tool, dict) and isinstance(tool.get("id"), str):
@@ -1324,7 +1454,7 @@ def create_app(
1324
1454
  assistant_output.start_tool(
1325
1455
  StoredToolItem.model_validate(tool)
1326
1456
  )
1327
- persist_assistant()
1457
+ snapshot_after_event = persist_assistant()
1328
1458
  if event.event in {"tool_done", "tool_error"}:
1329
1459
  tool_id = event.data.get("id")
1330
1460
  if (
@@ -1335,34 +1465,35 @@ def create_app(
1335
1465
  None if current_tool_id == tool_id else current_tool_id
1336
1466
  )
1337
1467
  assistant_output.update_tool(tool_id, event.data)
1338
- persist_assistant()
1468
+ snapshot_after_event = persist_assistant()
1339
1469
  if event.event == "delta":
1340
1470
  assistant_output.append_text(
1341
1471
  str(event.data.get("content") or "")
1342
1472
  )
1343
- persist_assistant()
1473
+ snapshot_after_event = persist_assistant()
1344
1474
  if event.event == "thinking_delta":
1345
1475
  assistant_output.append_thinking(
1346
1476
  str(event.data.get("content") or "")
1347
1477
  )
1348
- persist_assistant()
1478
+ snapshot_after_event = persist_assistant()
1349
1479
  if event.event == "usage":
1350
1480
  usage_data = event.data.get("usage")
1351
1481
  if isinstance(usage_data, dict):
1352
- usage_info = append_token_usage(
1353
- store.read_usage_info(),
1354
- TokenUsage.model_validate(usage_data),
1355
- model_context_window=current_model_context_window(
1356
- connection.model
1482
+ usage_info = update_context_usage_for_response(
1483
+ append_token_usage(
1484
+ store.read_usage_info(),
1485
+ TokenUsage.model_validate(usage_data),
1486
+ model_context_window=context_window_limit,
1357
1487
  ),
1488
+ messages=current_request_messages,
1489
+ output_content=assistant_output.content,
1490
+ model_context_window=context_window_limit,
1358
1491
  )
1359
1492
  store.save_usage_info(usage_info)
1360
1493
  turn_usage_info = usage_info
1361
- await append_run_event(
1362
- run,
1363
- "usage",
1364
- usage_event_data(usage_info),
1365
- )
1494
+ run_event_data = usage_event_data(usage_info)
1495
+ should_append_run_event = True
1496
+ snapshot_after_event = persist_assistant()
1366
1497
  logger.log(
1367
1498
  TRACE_LEVEL,
1368
1499
  "Workspace stream event=%s data=%r",
@@ -1376,30 +1507,39 @@ def create_app(
1376
1507
  response_usage_info = store.read_usage_info()
1377
1508
  final_usage_info = turn_usage_info
1378
1509
  if final_usage_info is None:
1379
- final_usage_info = recompute_context_usage(
1510
+ final_usage_info = update_context_usage_for_response(
1380
1511
  response_usage_info,
1381
- estimated_token_usage_for_messages(
1382
- model_visible_messages_for_usage(
1383
- current_request_messages
1384
- ),
1385
- output_content=assistant_output.content,
1386
- ).total_tokens,
1387
- model_context_window=current_model_context_window(
1388
- connection.model
1389
- ),
1512
+ messages=current_request_messages,
1513
+ output_content=assistant_output.content,
1514
+ model_context_window=context_window_limit,
1390
1515
  )
1391
- store.save_usage_info(final_usage_info)
1392
- if final_usage_info == response_usage_info:
1393
- assistant_message = assistant_message.model_copy(
1394
- update={"usage_info": final_usage_info}
1516
+ else:
1517
+ final_usage_info = update_context_usage_for_response(
1518
+ final_usage_info,
1519
+ messages=current_request_messages,
1520
+ output_content=assistant_output.content,
1521
+ model_context_window=context_window_limit,
1395
1522
  )
1396
- persist_assistant("completed")
1397
- if event.event != "usage":
1398
- await append_run_event(run, event.event, event.data)
1523
+ store.save_usage_info(final_usage_info)
1524
+ snapshot_after_event = persist_assistant("completed")
1525
+ if snapshot_after_event is not None:
1526
+ run_event_data = {
1527
+ "message": stream_message_data(snapshot_after_event)
1528
+ }
1529
+ if event.event == "done" and snapshot_after_event is not None:
1530
+ await append_run_snapshot(run, snapshot_after_event)
1531
+ await append_run_event(run, event.event, run_event_data)
1532
+ else:
1533
+ if should_append_run_event:
1534
+ await append_run_event(run, event.event, run_event_data)
1535
+ if snapshot_after_event is not None:
1536
+ await append_run_snapshot(run, snapshot_after_event)
1399
1537
  except asyncio.CancelledError:
1400
1538
  logger.info("Workspace run stopped")
1401
1539
  if not run.discard_on_cancel:
1402
- persist_assistant("interrupted")
1540
+ interrupted_snapshot = persist_assistant("interrupted")
1541
+ if interrupted_snapshot is not None:
1542
+ await append_run_snapshot(run, interrupted_snapshot)
1403
1543
  await append_run_event(
1404
1544
  run,
1405
1545
  "error",
@@ -1423,7 +1563,9 @@ def create_app(
1423
1563
  str(error) or EMPTY_MODEL_RESPONSE_DETAIL,
1424
1564
  )
1425
1565
  )
1426
- persist_assistant("failed")
1566
+ failed_snapshot = persist_assistant("failed")
1567
+ if failed_snapshot is not None:
1568
+ await append_run_snapshot(run, failed_snapshot)
1427
1569
  await append_run_event(run, "error", run_error_event_data(error_item))
1428
1570
  finally:
1429
1571
  run.is_done = True
@@ -1436,9 +1578,16 @@ def create_app(
1436
1578
  return run
1437
1579
 
1438
1580
  async def workspace_run_stream(
1439
- run: WorkspaceRun, after: int = 0
1581
+ run: WorkspaceRun, after: int = 0, include_snapshots: bool = True
1440
1582
  ) -> AsyncIterator[str]:
1441
1583
  next_event_index = after + 1
1584
+ reconnect_snapshot = run_snapshot_data_at(run, after) if after > 0 else None
1585
+ if include_snapshots and reconnect_snapshot is not None:
1586
+ yield stream_event(
1587
+ "snapshot",
1588
+ {"message": reconnect_snapshot},
1589
+ event_id=after,
1590
+ )
1442
1591
  while True:
1443
1592
  async with run.condition:
1444
1593
 
@@ -1452,7 +1601,9 @@ def create_app(
1452
1601
 
1453
1602
  for index, event, data in events:
1454
1603
  next_event_index = index + 1
1455
- yield stream_event(event, data)
1604
+ if event == "snapshot" and not include_snapshots:
1605
+ continue
1606
+ yield stream_event(event, data, event_id=index)
1456
1607
  if event in {"done", "error"}:
1457
1608
  return
1458
1609
 
@@ -1498,6 +1649,7 @@ def create_app(
1498
1649
  *,
1499
1650
  checkpoint: StoredCompactionCheckpoint | None,
1500
1651
  connection: ProviderConnection,
1652
+ context_window_limit: int,
1501
1653
  state: StoredState,
1502
1654
  ) -> tuple[StoredMessage, TokenUsageInfo]:
1503
1655
  logger.info("Workspace compact requested")
@@ -1513,6 +1665,7 @@ def create_app(
1513
1665
 
1514
1666
  marker, _, usage_info = await save_context_checkpoint(
1515
1667
  connection=connection,
1668
+ context_window_limit=context_window_limit,
1516
1669
  marker_content=COMPACTED_CONTEXT_MARKER,
1517
1670
  messages=state.messages,
1518
1671
  model_history=model_history,
@@ -1551,12 +1704,14 @@ def create_app(
1551
1704
  )
1552
1705
  state = store.read_state()
1553
1706
  connection = selected_connection(state)
1707
+ context_window_limit = context_window_for_settings(state.settings)
1554
1708
  checkpoint = store.read_active_compaction_checkpoint()
1555
1709
  store.save_is_compacting(True)
1556
1710
  compact_task = asyncio.create_task(
1557
1711
  run_manual_compact(
1558
1712
  checkpoint=checkpoint,
1559
1713
  connection=connection,
1714
+ context_window_limit=context_window_limit,
1560
1715
  state=state,
1561
1716
  )
1562
1717
  )
@@ -1596,7 +1751,7 @@ def create_app(
1596
1751
  logger.log(TRACE_LEVEL, "Workspace user content=%r", request.content)
1597
1752
  run = create_workspace_run(request.content)
1598
1753
  return StreamingResponse(
1599
- workspace_run_stream(run),
1754
+ workspace_run_stream(run, include_snapshots=False),
1600
1755
  media_type="text/event-stream",
1601
1756
  )
1602
1757