flowent 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/backend/pyproject.toml +31 -5
  2. package/backend/src/flowent/agent.py +13 -4
  3. package/backend/src/flowent/approval.py +6 -4
  4. package/backend/src/flowent/compact.py +35 -14
  5. package/backend/src/flowent/llm.py +73 -7
  6. package/backend/src/flowent/main.py +441 -85
  7. package/backend/src/flowent/static/assets/index-Bz76A4EJ.js +82 -0
  8. package/backend/src/flowent/static/assets/index-DufpDl8x.css +2 -0
  9. package/backend/src/flowent/static/index.html +2 -2
  10. package/backend/src/flowent/storage.py +151 -7
  11. package/backend/src/flowent/usage.py +315 -0
  12. package/backend/uv.lock +971 -3
  13. package/dist/frontend/assets/index-Bz76A4EJ.js +82 -0
  14. package/dist/frontend/assets/index-DufpDl8x.css +2 -0
  15. package/dist/frontend/index.html +2 -2
  16. package/package.json +24 -3
  17. package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
  18. package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
  19. package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
  20. package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
  21. package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
  22. package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
  23. package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
  24. package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
  25. package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
  26. package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
  27. package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
  28. package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
  29. package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
  30. package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
  31. package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
  32. package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
  33. package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
  34. package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
  35. package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
  36. package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
  37. package/backend/src/flowent/static/assets/index-BlaCigkZ.js +0 -82
  38. package/backend/src/flowent/static/assets/index-CRvbsH4K.css +0 -2
  39. package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
  40. package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
  41. package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
  42. package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
  43. package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
  44. package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
  45. package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
  46. package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
  47. package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
  48. package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
  49. package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
  50. package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
  51. package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
  52. package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
  53. package/backend/tests/conftest.py +0 -60
  54. package/backend/tests/test_agent_tools.py +0 -1124
  55. package/backend/tests/test_approval.py +0 -283
  56. package/backend/tests/test_channels.py +0 -360
  57. package/backend/tests/test_health.py +0 -12
  58. package/backend/tests/test_llm_providers.py +0 -548
  59. package/backend/tests/test_logging.py +0 -212
  60. package/backend/tests/test_mcp.py +0 -788
  61. package/backend/tests/test_patch.py +0 -112
  62. package/backend/tests/test_permissions.py +0 -588
  63. package/backend/tests/test_persistence.py +0 -249
  64. package/backend/tests/test_skills.py +0 -462
  65. package/backend/tests/test_startup_requirements.py +0 -144
  66. package/backend/tests/test_workspace_chat.py +0 -2174
  67. package/dist/frontend/assets/index-BlaCigkZ.js +0 -82
  68. package/dist/frontend/assets/index-CRvbsH4K.css +0 -2
@@ -3,7 +3,7 @@ import json
3
3
  import logging
4
4
  import os
5
5
  from collections.abc import AsyncIterator, Mapping, Sequence
6
- from contextlib import asynccontextmanager
6
+ from contextlib import asynccontextmanager, suppress
7
7
  from dataclasses import dataclass, field
8
8
  from pathlib import Path
9
9
  from typing import Literal
@@ -70,6 +70,14 @@ from flowent.storage import (
70
70
  StoredWritablePath,
71
71
  )
72
72
  from flowent.tools import ToolContext
73
+ from flowent.usage import (
74
+ TokenUsage,
75
+ TokenUsageInfo,
76
+ append_token_usage,
77
+ current_model_context_window,
78
+ estimated_token_usage_for_messages,
79
+ recompute_context_usage,
80
+ )
73
81
 
74
82
  logger = logging.getLogger("flowent.main")
75
83
 
@@ -77,7 +85,7 @@ logger = logging.getLogger("flowent.main")
77
85
  DEFAULT_STATIC_DIR = Path(__file__).parent / "static"
78
86
  COMPACTED_CONTEXT_MARKER = "Context compacted"
79
87
  OPTIMIZED_CONTEXT_MARKER = "Context optimized"
80
- DEFAULT_AUTO_COMPACT_TOKEN_LIMIT = 120_000
88
+ DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO = 0.95
81
89
  AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
82
90
  APPROVAL_TRANSCRIPT_MESSAGE_LIMIT = 12
83
91
  APPROVAL_TRANSCRIPT_TEXT_LIMIT = 2_000
@@ -113,10 +121,17 @@ class WorkspaceRunResponse(BaseModel):
113
121
  run_id: str
114
122
 
115
123
 
116
- class WorkspaceCompactResponse(BaseModel):
124
+ class WorkspaceClearResponse(BaseModel):
117
125
  model_config = ConfigDict(extra="forbid")
118
126
 
119
- message: StoredMessage
127
+ active_run_id: str | None = None
128
+ messages: list[StoredMessage]
129
+ usage_info: TokenUsageInfo | None = None
130
+
131
+
132
+ @dataclass
133
+ class WorkspaceCompactTask:
134
+ task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]]
120
135
 
121
136
 
122
137
  class AboutResponse(BaseModel):
@@ -167,8 +182,10 @@ class WorkspaceRun:
167
182
  condition: asyncio.Condition
168
183
  discard_on_cancel: bool = False
169
184
  events: list[tuple[int, str, dict[str, object]]] = field(default_factory=list)
185
+ generation: int = 0
170
186
  id: str = field(default_factory=lambda: str(uuid4()))
171
187
  is_done: bool = False
188
+ latest_snapshot: StoredMessage | None = None
172
189
  task: asyncio.Task[None] | None = None
173
190
 
174
191
  @property
@@ -176,8 +193,15 @@ class WorkspaceRun:
176
193
  return self.events[-1][0] if self.events else 0
177
194
 
178
195
 
179
- def stream_event(event: str, data: dict[str, object]) -> str:
180
- return f"event: {event}\ndata: {json.dumps(data)}\n\n"
196
+ def stream_event(
197
+ event: str, data: dict[str, object], event_id: int | None = None
198
+ ) -> str:
199
+ id_line = f"id: {event_id}\n" if event_id is not None else ""
200
+ return f"{id_line}event: {event}\ndata: {json.dumps(data)}\n\n"
201
+
202
+
203
+ def stream_message_data(message: StoredMessage) -> dict[str, object]:
204
+ return {**message.model_dump(), "status": message.status}
181
205
 
182
206
 
183
207
  def append_or_replace_message(
@@ -189,6 +213,18 @@ def append_or_replace_message(
189
213
  ]
190
214
 
191
215
 
216
+ def run_snapshot_data_at(
217
+ run: WorkspaceRun, event_index: int
218
+ ) -> dict[str, object] | None:
219
+ for current_event_index, event, data in reversed(run.events):
220
+ if current_event_index > event_index or event != "snapshot":
221
+ continue
222
+ message = data.get("message")
223
+ if isinstance(message, dict):
224
+ return message
225
+ return None
226
+
227
+
192
228
  USER_VISIBLE_RUN_ERROR_TITLE = "Request failed"
193
229
  USER_VISIBLE_RUN_ERROR_MESSAGE = "Check the model connection settings and try again."
194
230
  USER_VISIBLE_CONTEXT_OPTIMIZATION_ERROR_MESSAGE = "Context could not be optimized."
@@ -504,16 +540,22 @@ def is_context_marker(message: StoredMessage) -> bool:
504
540
  return message.content in {COMPACTED_CONTEXT_MARKER, OPTIMIZED_CONTEXT_MARKER}
505
541
 
506
542
 
507
- def auto_compact_token_limit() -> int:
543
+ def auto_compact_token_limit(context_window: int) -> int:
508
544
  raw_limit = os.environ.get("FLOWENT_AUTO_COMPACT_TOKEN_LIMIT", "")
545
+ if not raw_limit:
546
+ return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
509
547
  try:
510
548
  return max(0, int(raw_limit))
511
549
  except ValueError:
512
- return DEFAULT_AUTO_COMPACT_TOKEN_LIMIT
550
+ return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
513
551
 
514
552
 
515
- def should_auto_compact(messages: list[ChatMessage]) -> bool:
516
- token_limit = auto_compact_token_limit()
553
+ def should_auto_compact(
554
+ messages: list[ChatMessage],
555
+ *,
556
+ context_window: int,
557
+ ) -> bool:
558
+ token_limit = auto_compact_token_limit(context_window)
517
559
  if token_limit <= 0:
518
560
  return False
519
561
  return (
@@ -522,6 +564,77 @@ def should_auto_compact(messages: list[ChatMessage]) -> bool:
522
564
  )
523
565
 
524
566
 
567
+ def model_visible_messages_for_usage(
568
+ messages: Sequence[Mapping[str, object]],
569
+ ) -> list[dict[str, object]]:
570
+ return [
571
+ dict(message)
572
+ for message in messages
573
+ if message.get("role") in {"system", "user", "assistant", "tool"}
574
+ ]
575
+
576
+
577
+ def usage_event_data(usage_info: TokenUsageInfo) -> dict[str, object]:
578
+ return {"usage_info": usage_info.model_dump()}
579
+
580
+
581
+ def update_context_usage_for_response(
582
+ usage_info: TokenUsageInfo | None,
583
+ *,
584
+ messages: Sequence[Mapping[str, object]],
585
+ output_content: str,
586
+ model_context_window: int,
587
+ ) -> TokenUsageInfo:
588
+ return recompute_context_usage(
589
+ usage_info,
590
+ estimated_token_usage_for_messages(
591
+ model_visible_messages_for_usage(messages),
592
+ output_content=output_content,
593
+ ).total_tokens,
594
+ model_context_window=model_context_window,
595
+ )
596
+
597
+
598
+ def usage_info_for_model(
599
+ usage_info: TokenUsageInfo | None,
600
+ model_context_window: int,
601
+ ) -> TokenUsageInfo | None:
602
+ if usage_info is None:
603
+ return None
604
+ return usage_info.model_copy(update={"model_context_window": model_context_window})
605
+
606
+
607
+ def context_window_for_settings(settings: StoredSettings) -> int:
608
+ if settings.context_window_limit is not None:
609
+ return settings.context_window_limit
610
+ return current_model_context_window(settings.selected_model)
611
+
612
+
613
+ def state_with_current_model_context_window(state: StoredState) -> StoredState:
614
+ model_context_window = context_window_for_settings(state.settings)
615
+ return state.model_copy(
616
+ update={
617
+ "messages": [
618
+ message.model_copy(
619
+ update={
620
+ "usage_info": usage_info_for_model(
621
+ message.usage_info,
622
+ model_context_window,
623
+ )
624
+ }
625
+ )
626
+ if message.usage_info is not None
627
+ else message
628
+ for message in state.messages
629
+ ],
630
+ "usage_info": usage_info_for_model(
631
+ state.usage_info,
632
+ model_context_window,
633
+ ),
634
+ }
635
+ )
636
+
637
+
525
638
  def workspace_chat_messages(
526
639
  messages: list[StoredMessage],
527
640
  compacted_context: str = "",
@@ -617,6 +730,8 @@ def create_app(
617
730
  telegram_bot_manager: TelegramBotManager | None = None
618
731
  workspace_runs: dict[str, WorkspaceRun] = {}
619
732
  active_workspace_run_id: str | None = None
733
+ workspace_generation = 0
734
+ active_compact_task: WorkspaceCompactTask | None = None
620
735
 
621
736
  static_dir = frontend_static_directory().resolve(strict=False)
622
737
  logger.debug("Flowent app created serve_frontend=%s", serve_frontend)
@@ -647,17 +762,13 @@ def create_app(
647
762
  async def save_context_checkpoint(
648
763
  *,
649
764
  connection: ProviderConnection,
765
+ context_window_limit: int,
650
766
  messages: list[StoredMessage],
651
767
  model_history: list[ChatMessage],
652
768
  marker_content: str,
653
769
  source_message_id: str | None = None,
654
770
  trigger: Literal["manual", "auto"],
655
- ) -> tuple[StoredMessage, list[dict[str, object]]]:
656
- marker = StoredMessage(
657
- author="system",
658
- content=marker_content,
659
- id=str(uuid4()),
660
- )
771
+ ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo]:
661
772
  compact_result = await compact_provider.compact(
662
773
  connection,
663
774
  CompactInput(
@@ -668,6 +779,25 @@ def create_app(
668
779
  ),
669
780
  completion=chat_completion,
670
781
  )
782
+ usage_info = store.read_usage_info()
783
+ if compact_result.summary_usage is not None:
784
+ usage_info = append_token_usage(
785
+ usage_info,
786
+ compact_result.summary_usage,
787
+ model_context_window=context_window_limit,
788
+ )
789
+ usage_info = recompute_context_usage(
790
+ usage_info,
791
+ compact_result.token_after,
792
+ model_context_window=context_window_limit,
793
+ )
794
+ store.save_usage_info(usage_info)
795
+ marker = StoredMessage(
796
+ author="system",
797
+ content=marker_content,
798
+ id=str(uuid4()),
799
+ usage_info=usage_info,
800
+ )
671
801
  store.save_compaction_checkpoint(
672
802
  StoredCompactionCheckpoint(
673
803
  id=str(uuid4()),
@@ -689,23 +819,30 @@ def create_app(
689
819
  compact_result.token_after,
690
820
  )
691
821
  logger.log(TRACE_LEVEL, "Workspace compact summary=%r", compact_result.summary)
692
- return marker, [
693
- message.model_dump() for message in compact_result.replacement_history
694
- ]
822
+ return (
823
+ marker,
824
+ [message.model_dump() for message in compact_result.replacement_history],
825
+ usage_info,
826
+ )
695
827
 
696
828
  async def auto_compact_workspace_messages(
697
829
  *,
698
830
  connection: ProviderConnection,
831
+ context_window_limit: int,
699
832
  messages: list[StoredMessage],
700
833
  model_history: list[ChatMessage],
701
834
  source_message_id: str | None = None,
702
- ) -> tuple[StoredMessage, list[dict[str, object]]] | None:
703
- if not should_auto_compact(model_history):
835
+ ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
836
+ if not should_auto_compact(
837
+ model_history,
838
+ context_window=context_window_limit,
839
+ ):
704
840
  return None
705
841
  logger.info("Workspace auto compact requested")
706
842
  try:
707
843
  return await save_context_checkpoint(
708
844
  connection=connection,
845
+ context_window_limit=context_window_limit,
709
846
  marker_content=OPTIMIZED_CONTEXT_MARKER,
710
847
  messages=messages,
711
848
  model_history=model_history,
@@ -719,6 +856,7 @@ def create_app(
719
856
  async def run_workspace_turn(content: str) -> StoredMessage:
720
857
  state = store.read_state()
721
858
  connection = selected_connection(state)
859
+ context_window_limit = context_window_for_settings(state.settings)
722
860
  user_message = StoredMessage(
723
861
  author="user",
724
862
  content=content,
@@ -736,17 +874,19 @@ def create_app(
736
874
  ]
737
875
  auto_compaction = await auto_compact_workspace_messages(
738
876
  connection=connection,
877
+ context_window_limit=context_window_limit,
739
878
  messages=state.messages,
740
879
  model_history=model_history,
741
880
  source_message_id=None,
742
881
  )
743
882
  if auto_compaction is not None:
744
- marker, _ = auto_compaction
883
+ marker, _, _ = auto_compaction
745
884
  next_messages = [*state.messages, marker, user_message]
746
885
  store.save_messages(next_messages)
747
886
  request_messages = request_messages_for_content(state, next_messages, content)
748
887
  assistant_id = str(uuid4())
749
888
  assistant_output = AssistantOutputBuilder(assistant_id)
889
+ turn_usage_info: TokenUsageInfo | None = None
750
890
 
751
891
  async def review_tool_approval(request: ApprovalReviewRequest):
752
892
  return await review_approval_request(
@@ -798,6 +938,21 @@ def create_app(
798
938
  assistant_output.append_text(str(event.data.get("content") or ""))
799
939
  if event.event == "thinking_delta":
800
940
  assistant_output.append_thinking(str(event.data.get("content") or ""))
941
+ if event.event == "usage":
942
+ usage_data = event.data.get("usage")
943
+ if isinstance(usage_data, dict):
944
+ usage_info = update_context_usage_for_response(
945
+ append_token_usage(
946
+ store.read_usage_info(),
947
+ TokenUsage.model_validate(usage_data),
948
+ model_context_window=context_window_limit,
949
+ ),
950
+ messages=request_messages,
951
+ output_content=assistant_output.content,
952
+ model_context_window=context_window_limit,
953
+ )
954
+ store.save_usage_info(usage_info)
955
+ turn_usage_info = usage_info
801
956
  if event.event == "tool_start":
802
957
  tool = event.data.get("tool")
803
958
  if isinstance(tool, dict) and isinstance(tool.get("id"), str):
@@ -813,6 +968,23 @@ def create_app(
813
968
  assistant_output.set_assistant_id(assistant_id)
814
969
  assistant_output.apply_done_message(message)
815
970
 
971
+ final_usage_info = turn_usage_info
972
+ if final_usage_info is None:
973
+ final_usage_info = update_context_usage_for_response(
974
+ store.read_usage_info(),
975
+ messages=request_messages,
976
+ output_content=assistant_output.content,
977
+ model_context_window=context_window_limit,
978
+ )
979
+ else:
980
+ final_usage_info = update_context_usage_for_response(
981
+ final_usage_info,
982
+ messages=request_messages,
983
+ output_content=assistant_output.content,
984
+ model_context_window=context_window_limit,
985
+ )
986
+ store.save_usage_info(final_usage_info)
987
+
816
988
  assistant_message = StoredMessage(
817
989
  author="assistant",
818
990
  content=assistant_output.content,
@@ -821,6 +993,7 @@ def create_app(
821
993
  status="completed",
822
994
  thinking=assistant_output.thinking,
823
995
  tools=list(assistant_output.tools.values()),
996
+ usage_info=final_usage_info,
824
997
  )
825
998
  store.save_messages([*next_messages, assistant_message])
826
999
  return assistant_message
@@ -858,7 +1031,7 @@ def create_app(
858
1031
 
859
1032
  @app.get("/api/state")
860
1033
  async def app_state() -> StoredState:
861
- state = store.read_state()
1034
+ state = state_with_current_model_context_window(store.read_state())
862
1035
  active_run = (
863
1036
  workspace_runs.get(active_workspace_run_id)
864
1037
  if active_workspace_run_id
@@ -999,14 +1172,23 @@ def create_app(
999
1172
  async def save_workspace_messages(
1000
1173
  request: WorkspaceMessagesRequest,
1001
1174
  ) -> WorkspaceMessagesRequest:
1175
+ return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
1176
+
1177
+ @app.post("/api/workspace/clear")
1178
+ async def clear_workspace() -> WorkspaceClearResponse:
1002
1179
  nonlocal active_workspace_run_id
1003
- if not request.messages:
1004
- run = active_workspace_run()
1005
- if run is not None and run.task is not None and not run.task.done():
1180
+ nonlocal workspace_generation
1181
+ workspace_generation += 1
1182
+ for run in workspace_runs.values():
1183
+ run.is_done = True
1184
+ if run.task is not None and not run.task.done():
1006
1185
  run.discard_on_cancel = True
1007
1186
  run.task.cancel()
1008
- active_workspace_run_id = None
1009
- return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
1187
+ async with run.condition:
1188
+ run.condition.notify_all()
1189
+ active_workspace_run_id = None
1190
+ messages = store.save_messages([])
1191
+ return WorkspaceClearResponse(messages=messages)
1010
1192
 
1011
1193
  async def append_run_event(
1012
1194
  run: WorkspaceRun, event: str, data: dict[str, object]
@@ -1015,15 +1197,42 @@ def create_app(
1015
1197
  run.events.append((run.latest_event_index + 1, event, data))
1016
1198
  run.condition.notify_all()
1017
1199
 
1200
+ async def append_run_snapshot(run: WorkspaceRun, message: StoredMessage) -> None:
1201
+ if message.author != "assistant":
1202
+ return
1203
+ run.latest_snapshot = message
1204
+ await append_run_event(
1205
+ run,
1206
+ "snapshot",
1207
+ {"message": stream_message_data(message)},
1208
+ )
1209
+
1018
1210
  def active_workspace_run() -> WorkspaceRun | None:
1019
1211
  if active_workspace_run_id is None:
1020
1212
  return None
1021
- return workspace_runs.get(active_workspace_run_id)
1213
+ run = workspace_runs.get(active_workspace_run_id)
1214
+ if run is None or run.is_done:
1215
+ return None
1216
+ return run
1217
+
1218
+ def has_active_workspace_run() -> bool:
1219
+ return any(
1220
+ not run.is_done and run.task is not None and not run.task.done()
1221
+ for run in workspace_runs.values()
1222
+ )
1022
1223
 
1023
1224
  def create_workspace_run(content: str) -> WorkspaceRun:
1024
1225
  nonlocal active_workspace_run_id
1226
+ if has_active_workspace_run():
1227
+ active_run = active_workspace_run()
1228
+ raise HTTPException(
1229
+ status_code=409,
1230
+ detail="Response in progress",
1231
+ headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
1232
+ )
1025
1233
  state = store.read_state()
1026
1234
  connection = selected_connection(state)
1235
+ context_window_limit = context_window_for_settings(state.settings)
1027
1236
 
1028
1237
  user_message = StoredMessage(
1029
1238
  author="user",
@@ -1032,7 +1241,10 @@ def create_app(
1032
1241
  )
1033
1242
  next_messages = [*state.messages, user_message]
1034
1243
  store.save_messages(next_messages)
1035
- run = WorkspaceRun(condition=asyncio.Condition())
1244
+ run = WorkspaceRun(
1245
+ condition=asyncio.Condition(),
1246
+ generation=workspace_generation,
1247
+ )
1036
1248
  workspace_runs[run.id] = run
1037
1249
  active_workspace_run_id = run.id
1038
1250
 
@@ -1047,8 +1259,13 @@ def create_app(
1047
1259
  )
1048
1260
  assistant_output = AssistantOutputBuilder(assistant_message.id)
1049
1261
 
1050
- def persist_assistant(status: str = "running") -> None:
1262
+ def is_current_generation() -> bool:
1263
+ return run.generation == workspace_generation
1264
+
1265
+ def persist_assistant(status: str = "running") -> StoredMessage | None:
1051
1266
  nonlocal next_messages, assistant_message
1267
+ if not is_current_generation() or run.discard_on_cancel:
1268
+ return None
1052
1269
  assistant_message = StoredMessage(
1053
1270
  author="assistant",
1054
1271
  content=assistant_output.content,
@@ -1057,14 +1274,17 @@ def create_app(
1057
1274
  status=status,
1058
1275
  thinking=assistant_output.thinking,
1059
1276
  tools=list(assistant_output.tools.values()),
1277
+ usage_info=store.read_usage_info(),
1060
1278
  )
1061
1279
  next_messages = append_or_replace_message(
1062
1280
  next_messages, assistant_message
1063
1281
  )
1064
1282
  store.upsert_message(assistant_message)
1283
+ return assistant_message
1065
1284
 
1066
1285
  try:
1067
1286
  current_tool_id: str | None = None
1287
+ turn_usage_info: TokenUsageInfo | None = None
1068
1288
  current_request_messages = request_messages_for_content(
1069
1289
  state,
1070
1290
  next_messages,
@@ -1077,6 +1297,7 @@ def create_app(
1077
1297
  )
1078
1298
  auto_compaction = await auto_compact_workspace_messages(
1079
1299
  connection=connection,
1300
+ context_window_limit=context_window_limit,
1080
1301
  messages=state.messages,
1081
1302
  model_history=[
1082
1303
  ChatMessage.model_validate(message)
@@ -1085,13 +1306,16 @@ def create_app(
1085
1306
  source_message_id=None,
1086
1307
  )
1087
1308
  if auto_compaction is not None:
1088
- marker, _ = auto_compaction
1309
+ marker, _, usage_info = auto_compaction
1089
1310
  next_messages = [*state.messages, marker, user_message]
1090
1311
  store.save_messages(next_messages)
1091
1312
  await append_run_event(
1092
1313
  run,
1093
1314
  "context_optimized",
1094
- {"message": marker.model_dump()},
1315
+ {
1316
+ "message": marker.model_dump(),
1317
+ **usage_event_data(usage_info),
1318
+ },
1095
1319
  )
1096
1320
  current_request_messages = request_messages_for_content(
1097
1321
  state,
@@ -1130,6 +1354,8 @@ def create_app(
1130
1354
  conversation: Sequence[Mapping[str, object]],
1131
1355
  ) -> AgentContextUpdate | None:
1132
1356
  nonlocal next_messages
1357
+ if not is_current_generation() or run.discard_on_cancel:
1358
+ return None
1133
1359
  assistant_snapshot = StoredMessage(
1134
1360
  author="assistant",
1135
1361
  content=assistant_output.content,
@@ -1138,6 +1364,7 @@ def create_app(
1138
1364
  status="running",
1139
1365
  thinking=assistant_output.thinking,
1140
1366
  tools=list(assistant_output.tools.values()),
1367
+ usage_info=store.read_usage_info(),
1141
1368
  )
1142
1369
  model_history: list[ChatMessage] = []
1143
1370
  for message in conversation:
@@ -1164,13 +1391,17 @@ def create_app(
1164
1391
  )
1165
1392
  auto_result = await auto_compact_workspace_messages(
1166
1393
  connection=connection,
1394
+ context_window_limit=context_window_limit,
1167
1395
  messages=next_messages,
1168
1396
  model_history=model_history,
1169
1397
  source_message_id=assistant_snapshot.id,
1170
1398
  )
1171
1399
  if auto_result is None:
1172
1400
  return None
1173
- marker, replacement_history = auto_result
1401
+ marker, replacement_history, usage_info = auto_result
1402
+ assistant_snapshot = assistant_snapshot.model_copy(
1403
+ update={"usage_info": usage_info}
1404
+ )
1174
1405
  next_messages = append_or_replace_message(
1175
1406
  [*next_messages, marker], assistant_snapshot
1176
1407
  )
@@ -1181,7 +1412,10 @@ def create_app(
1181
1412
  ]
1182
1413
  return AgentContextUpdate(
1183
1414
  conversation=compacted_conversation,
1184
- message=marker.model_dump(),
1415
+ message={
1416
+ **marker.model_dump(),
1417
+ "usage_info": usage_info.model_dump(),
1418
+ },
1185
1419
  )
1186
1420
 
1187
1421
  async for event in run_agent_stream(
@@ -1195,6 +1429,11 @@ def create_app(
1195
1429
  messages=current_request_messages,
1196
1430
  tool_runner=tool_runner,
1197
1431
  ):
1432
+ if not is_current_generation() or run.discard_on_cancel:
1433
+ raise asyncio.CancelledError
1434
+ run_event_data = event.data
1435
+ should_append_run_event = event.event != "usage"
1436
+ snapshot_after_event: StoredMessage | None = None
1198
1437
  if event.event == "start":
1199
1438
  event_id = event.data.get("id")
1200
1439
  if isinstance(event_id, str):
@@ -1202,12 +1441,12 @@ def create_app(
1202
1441
  update={"id": event_id}
1203
1442
  )
1204
1443
  assistant_output.set_assistant_id(event_id)
1205
- persist_assistant()
1444
+ snapshot_after_event = persist_assistant()
1206
1445
  if event.event == "output_start":
1207
1446
  index = event.data.get("index")
1208
1447
  if isinstance(index, int):
1209
1448
  assistant_output.start_group(index)
1210
- persist_assistant()
1449
+ snapshot_after_event = persist_assistant()
1211
1450
  if event.event == "tool_start":
1212
1451
  tool = event.data.get("tool")
1213
1452
  if isinstance(tool, dict) and isinstance(tool.get("id"), str):
@@ -1215,7 +1454,7 @@ def create_app(
1215
1454
  assistant_output.start_tool(
1216
1455
  StoredToolItem.model_validate(tool)
1217
1456
  )
1218
- persist_assistant()
1457
+ snapshot_after_event = persist_assistant()
1219
1458
  if event.event in {"tool_done", "tool_error"}:
1220
1459
  tool_id = event.data.get("id")
1221
1460
  if (
@@ -1226,17 +1465,35 @@ def create_app(
1226
1465
  None if current_tool_id == tool_id else current_tool_id
1227
1466
  )
1228
1467
  assistant_output.update_tool(tool_id, event.data)
1229
- persist_assistant()
1468
+ snapshot_after_event = persist_assistant()
1230
1469
  if event.event == "delta":
1231
1470
  assistant_output.append_text(
1232
1471
  str(event.data.get("content") or "")
1233
1472
  )
1234
- persist_assistant()
1473
+ snapshot_after_event = persist_assistant()
1235
1474
  if event.event == "thinking_delta":
1236
1475
  assistant_output.append_thinking(
1237
1476
  str(event.data.get("content") or "")
1238
1477
  )
1239
- persist_assistant()
1478
+ snapshot_after_event = persist_assistant()
1479
+ if event.event == "usage":
1480
+ usage_data = event.data.get("usage")
1481
+ if isinstance(usage_data, dict):
1482
+ usage_info = update_context_usage_for_response(
1483
+ append_token_usage(
1484
+ store.read_usage_info(),
1485
+ TokenUsage.model_validate(usage_data),
1486
+ model_context_window=context_window_limit,
1487
+ ),
1488
+ messages=current_request_messages,
1489
+ output_content=assistant_output.content,
1490
+ model_context_window=context_window_limit,
1491
+ )
1492
+ store.save_usage_info(usage_info)
1493
+ turn_usage_info = usage_info
1494
+ run_event_data = usage_event_data(usage_info)
1495
+ should_append_run_event = True
1496
+ snapshot_after_event = persist_assistant()
1240
1497
  logger.log(
1241
1498
  TRACE_LEVEL,
1242
1499
  "Workspace stream event=%s data=%r",
@@ -1247,12 +1504,42 @@ def create_app(
1247
1504
  message = event.data.get("message")
1248
1505
  if isinstance(message, dict):
1249
1506
  assistant_output.apply_done_message(message)
1250
- persist_assistant("completed")
1251
- await append_run_event(run, event.event, event.data)
1507
+ response_usage_info = store.read_usage_info()
1508
+ final_usage_info = turn_usage_info
1509
+ if final_usage_info is None:
1510
+ final_usage_info = update_context_usage_for_response(
1511
+ response_usage_info,
1512
+ messages=current_request_messages,
1513
+ output_content=assistant_output.content,
1514
+ model_context_window=context_window_limit,
1515
+ )
1516
+ else:
1517
+ final_usage_info = update_context_usage_for_response(
1518
+ final_usage_info,
1519
+ messages=current_request_messages,
1520
+ output_content=assistant_output.content,
1521
+ model_context_window=context_window_limit,
1522
+ )
1523
+ store.save_usage_info(final_usage_info)
1524
+ snapshot_after_event = persist_assistant("completed")
1525
+ if snapshot_after_event is not None:
1526
+ run_event_data = {
1527
+ "message": stream_message_data(snapshot_after_event)
1528
+ }
1529
+ if event.event == "done" and snapshot_after_event is not None:
1530
+ await append_run_snapshot(run, snapshot_after_event)
1531
+ await append_run_event(run, event.event, run_event_data)
1532
+ else:
1533
+ if should_append_run_event:
1534
+ await append_run_event(run, event.event, run_event_data)
1535
+ if snapshot_after_event is not None:
1536
+ await append_run_snapshot(run, snapshot_after_event)
1252
1537
  except asyncio.CancelledError:
1253
1538
  logger.info("Workspace run stopped")
1254
1539
  if not run.discard_on_cancel:
1255
- persist_assistant("interrupted")
1540
+ interrupted_snapshot = persist_assistant("interrupted")
1541
+ if interrupted_snapshot is not None:
1542
+ await append_run_snapshot(run, interrupted_snapshot)
1256
1543
  await append_run_event(
1257
1544
  run,
1258
1545
  "error",
@@ -1276,7 +1563,9 @@ def create_app(
1276
1563
  str(error) or EMPTY_MODEL_RESPONSE_DETAIL,
1277
1564
  )
1278
1565
  )
1279
- persist_assistant("failed")
1566
+ failed_snapshot = persist_assistant("failed")
1567
+ if failed_snapshot is not None:
1568
+ await append_run_snapshot(run, failed_snapshot)
1280
1569
  await append_run_event(run, "error", run_error_event_data(error_item))
1281
1570
  finally:
1282
1571
  run.is_done = True
@@ -1289,9 +1578,16 @@ def create_app(
1289
1578
  return run
1290
1579
 
1291
1580
  async def workspace_run_stream(
1292
- run: WorkspaceRun, after: int = 0
1581
+ run: WorkspaceRun, after: int = 0, include_snapshots: bool = True
1293
1582
  ) -> AsyncIterator[str]:
1294
1583
  next_event_index = after + 1
1584
+ reconnect_snapshot = run_snapshot_data_at(run, after) if after > 0 else None
1585
+ if include_snapshots and reconnect_snapshot is not None:
1586
+ yield stream_event(
1587
+ "snapshot",
1588
+ {"message": reconnect_snapshot},
1589
+ event_id=after,
1590
+ )
1295
1591
  while True:
1296
1592
  async with run.condition:
1297
1593
 
@@ -1305,7 +1601,9 @@ def create_app(
1305
1601
 
1306
1602
  for index, event, data in events:
1307
1603
  next_event_index = index + 1
1308
- yield stream_event(event, data)
1604
+ if event == "snapshot" and not include_snapshots:
1605
+ continue
1606
+ yield stream_event(event, data, event_id=index)
1309
1607
  if event in {"done", "error"}:
1310
1608
  return
1311
1609
 
@@ -1343,47 +1641,105 @@ def create_app(
1343
1641
  run.task.cancel()
1344
1642
  return {"ok": True}
1345
1643
 
1346
- @app.post("/api/workspace/compact")
1347
- async def compact_workspace() -> WorkspaceCompactResponse:
1348
- if active_workspace_run() is not None:
1349
- raise HTTPException(
1350
- status_code=409,
1351
- detail="Compact is unavailable while Flowent is responding.",
1644
+ @app.post("/api/workspace/compact", response_class=StreamingResponse)
1645
+ async def compact_workspace() -> StreamingResponse:
1646
+ nonlocal active_compact_task
1647
+
1648
+ async def run_manual_compact(
1649
+ *,
1650
+ checkpoint: StoredCompactionCheckpoint | None,
1651
+ connection: ProviderConnection,
1652
+ context_window_limit: int,
1653
+ state: StoredState,
1654
+ ) -> tuple[StoredMessage, TokenUsageInfo]:
1655
+ logger.info("Workspace compact requested")
1656
+ try:
1657
+ model_history = [
1658
+ *runtime_context_messages(cwd, state.settings.agent_prompt),
1659
+ *workspace_chat_messages(
1660
+ state.messages,
1661
+ store.read_compacted_context(),
1662
+ checkpoint,
1663
+ ),
1664
+ ]
1665
+
1666
+ marker, _, usage_info = await save_context_checkpoint(
1667
+ connection=connection,
1668
+ context_window_limit=context_window_limit,
1669
+ marker_content=COMPACTED_CONTEXT_MARKER,
1670
+ messages=state.messages,
1671
+ model_history=model_history,
1672
+ source_message_id=None,
1673
+ trigger="manual",
1674
+ )
1675
+ store.save_messages([*state.messages, marker])
1676
+ logger.info("Workspace compact completed")
1677
+ return marker, usage_info
1678
+ except Exception:
1679
+ logger.exception("Workspace compact failed")
1680
+ raise
1681
+ finally:
1682
+ store.save_is_compacting(False)
1683
+
1684
+ def clear_active_compact_task(
1685
+ task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]],
1686
+ ) -> None:
1687
+ nonlocal active_compact_task
1688
+ if active_compact_task is not None and active_compact_task.task is task:
1689
+ active_compact_task = None
1690
+ with suppress(asyncio.CancelledError):
1691
+ task.exception()
1692
+
1693
+ if active_compact_task is not None:
1694
+ if not active_compact_task.task.done():
1695
+ compact_task = active_compact_task.task
1696
+ else:
1697
+ active_compact_task = None
1698
+
1699
+ if active_compact_task is None:
1700
+ if active_workspace_run() is not None:
1701
+ raise HTTPException(
1702
+ status_code=409,
1703
+ detail="Compact is unavailable while Flowent is responding.",
1704
+ )
1705
+ state = store.read_state()
1706
+ connection = selected_connection(state)
1707
+ context_window_limit = context_window_for_settings(state.settings)
1708
+ checkpoint = store.read_active_compaction_checkpoint()
1709
+ store.save_is_compacting(True)
1710
+ compact_task = asyncio.create_task(
1711
+ run_manual_compact(
1712
+ checkpoint=checkpoint,
1713
+ connection=connection,
1714
+ context_window_limit=context_window_limit,
1715
+ state=state,
1716
+ )
1352
1717
  )
1353
- logger.info("Workspace compact requested")
1354
- state = store.read_state()
1355
- connection = selected_connection(state)
1356
- checkpoint = store.read_active_compaction_checkpoint()
1357
- model_history = [
1358
- *runtime_context_messages(cwd, state.settings.agent_prompt),
1359
- *workspace_chat_messages(
1360
- state.messages,
1361
- store.read_compacted_context(),
1362
- checkpoint,
1363
- ),
1364
- ]
1718
+ compact_task.add_done_callback(clear_active_compact_task)
1719
+ active_compact_task = WorkspaceCompactTask(task=compact_task)
1365
1720
 
1366
- try:
1367
- marker, _ = await save_context_checkpoint(
1368
- connection=connection,
1369
- marker_content=COMPACTED_CONTEXT_MARKER,
1370
- messages=state.messages,
1371
- model_history=model_history,
1372
- source_message_id=None,
1373
- trigger="manual",
1721
+ async def compact_workspace_stream() -> AsyncIterator[str]:
1722
+ try:
1723
+ marker, usage_info = await asyncio.shield(compact_task)
1724
+ except Exception:
1725
+ yield stream_event(
1726
+ "error",
1727
+ {"message": "Context could not be compacted."},
1728
+ )
1729
+ return
1730
+
1731
+ marker_data = marker.model_dump()
1732
+ yield stream_event("usage", usage_event_data(usage_info))
1733
+ yield stream_event(
1734
+ "context_optimized",
1735
+ {"message": marker_data, **usage_event_data(usage_info)},
1374
1736
  )
1375
- except HTTPException:
1376
- raise
1377
- except Exception as error:
1378
- logger.exception("Workspace compact failed")
1379
- raise HTTPException(
1380
- status_code=500,
1381
- detail="Context could not be compacted.",
1382
- ) from error
1737
+ yield stream_event("done", {"message": marker_data})
1383
1738
 
1384
- store.save_messages([*state.messages, marker])
1385
- logger.info("Workspace compact completed")
1386
- return WorkspaceCompactResponse(message=marker)
1739
+ return StreamingResponse(
1740
+ compact_workspace_stream(),
1741
+ media_type="text/event-stream",
1742
+ )
1387
1743
 
1388
1744
  @app.post("/api/workspace/respond")
1389
1745
  async def respond_to_workspace(
@@ -1395,7 +1751,7 @@ def create_app(
1395
1751
  logger.log(TRACE_LEVEL, "Workspace user content=%r", request.content)
1396
1752
  run = create_workspace_run(request.content)
1397
1753
  return StreamingResponse(
1398
- workspace_run_stream(run),
1754
+ workspace_run_stream(run, include_snapshots=False),
1399
1755
  media_type="text/event-stream",
1400
1756
  )
1401
1757