npm - flowent - Versions diffs - 0.3.0 → 0.3.2 - Mend

flowent 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/backend/pyproject.toml +1 -1
package/backend/src/flowent/agent.py +22 -15
package/backend/src/flowent/api_models.py +13 -8
package/backend/src/flowent/llm.py +50 -6
package/backend/src/flowent/mcp.py +4 -3
package/backend/src/flowent/permissions.py +51 -38
package/backend/src/flowent/routes/providers.py +33 -10
package/backend/src/flowent/routes/system.py +5 -6
package/backend/src/flowent/routes/workspace.py +33 -23
package/backend/src/flowent/state/models.py +4 -4
package/backend/src/flowent/state/schema.py +121 -0
package/backend/src/flowent/state/store.py +9 -3
package/backend/src/flowent/static/assets/index-BX18a4Jz.js +100 -0
package/backend/src/flowent/static/assets/index-EC37agAH.css +2 -0
package/backend/src/flowent/static/index.html +2 -2
package/backend/src/flowent/tools.py +84 -33
package/backend/src/flowent/usage.py +66 -0
package/backend/src/flowent/workspace/context.py +140 -47
package/backend/src/flowent/workspace/events.py +5 -7
package/backend/src/flowent/workspace/output.py +129 -4
package/backend/src/flowent/workspace/runtime.py +393 -185
package/backend/uv.lock +1 -1
package/dist/frontend/assets/index-BX18a4Jz.js +100 -0
package/dist/frontend/assets/index-EC37agAH.css +2 -0
package/dist/frontend/index.html +2 -2
package/package.json +8 -10
package/backend/src/flowent/static/assets/index-CvWZZMtK.css +0 -2
package/backend/src/flowent/static/assets/index-ma2v8oW7.js +0 -90
package/dist/frontend/assets/index-CvWZZMtK.css +0 -2
package/dist/frontend/assets/index-ma2v8oW7.js +0 -90

package/backend/src/flowent/workspace/runtime.py CHANGED Viewed

@@ -27,26 +27,31 @@ from flowent.storage import (
     StoredState,
     StoredToolItem,
 )
-from flowent.tools import ToolContext
+from flowent.tools import ToolContext, text_tool_result, tool_specs
 from flowent.usage import (
     TokenUsage,
     TokenUsageInfo,
     append_token_usage,
+    full_context_usage,
+    is_context_window_error,
     recompute_context_usage,
 )
 from flowent.workspace.context import (
     COMPACTED_CONTEXT_MARKER,
     OPTIMIZED_CONTEXT_MARKER,
+    compact_prompt_chat_messages,
     context_window_for_settings,
+    model_request_messages_data,
+    model_visible_assistant_output_messages,
     should_auto_compact,
     update_context_usage_for_response,
     usage_event_data,
     workspace_chat_messages,
 )
 from flowent.workspace.events import (
-    WorkspaceRun,
+    WorkspaceResponse,
     append_or_replace_message,
-    run_snapshot_data_at,
+    response_snapshot_data_at,
     stream_event,
     stream_message_data,
 )
@@ -54,14 +59,17 @@ from flowent.workspace.output import (
     EMPTY_MODEL_RESPONSE_DETAIL,
     AssistantOutputBuilder,
     approval_transcript,
+    assistant_retry_output_start_index,
     run_error_event_data,
     run_error_output_item,
+    trim_assistant_message_at_error,
 )
 logger = logging.getLogger("flowent.workspace.runtime")
 AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
 WORKSPACE_PROGRESS_FLUSH_INTERVAL_SECONDS = 0.5
+USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE = "Context could not be compacted."
 @dataclass
@@ -84,8 +92,7 @@ class WorkspaceRuntime:
         self.cwd = cwd
         self.mcp_manager = mcp_manager
         self.store = store
-        self.runs: dict[str, WorkspaceRun] = {}
-        self.active_run_id: str | None = None
+        self.active_response: WorkspaceResponse | None = None
         self.generation = 0
         self.active_compact_task: WorkspaceCompactTask | None = None
@@ -102,14 +109,13 @@ class WorkspaceRuntime:
             compacted_context,
             checkpoint,
         )
-        return [
-            message.model_dump()
-            for message in [
+        return model_request_messages_data(
+            [
                 *runtime_context_messages(self.cwd, state.settings.agent_prompt),
                 *explicit_skill_messages(self.cwd, self.store, content),
                 *chat_messages,
             ]
-        ]
+        )
     async def save_context_checkpoint(
         self,
@@ -117,16 +123,17 @@ class WorkspaceRuntime:
         connection: ProviderConnection,
         context_window_limit: int,
         messages: list[StoredMessage],
-        model_history: list[ChatMessage],
+        model_history: Sequence[ChatMessage | Mapping[str, object]],
         marker_content: str,
         source_message_id: str | None = None,
         trigger: Literal["manual", "auto"],
     ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo]:
+        compact_model_history = compact_prompt_chat_messages(model_history)
         compact_result = await self.compact_provider.compact(
             connection,
             CompactInput(
                 messages=messages,
-                model_history=model_history,
+                model_history=compact_model_history,
                 retained_message_token_budget=AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET,
                 trigger=trigger,
             ),
@@ -149,6 +156,7 @@ class WorkspaceRuntime:
             author="system",
             content=marker_content,
             id=str(uuid4()),
+            summary=compact_result.summary,
             usage_info=usage_info,
         )
         self.store.save_compaction_checkpoint(
@@ -183,13 +191,16 @@ class WorkspaceRuntime:
         *,
         connection: ProviderConnection,
         context_window_limit: int,
+        budget_messages: Sequence[ChatMessage | Mapping[str, object]] | None = None,
         messages: list[StoredMessage],
-        model_history: list[ChatMessage],
+        model_history: Sequence[ChatMessage | Mapping[str, object]],
         source_message_id: str | None = None,
+        tools: Sequence[Mapping[str, object]] = (),
     ) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
         if not should_auto_compact(
-            model_history,
+            budget_messages or model_history,
             context_window=context_window_limit,
+            tools=tools,
         ):
             return None
         logger.info("Workspace auto compact requested")
@@ -218,7 +229,11 @@ class WorkspaceRuntime:
         )
         next_messages = [*state.messages, user_message]
         self.store.save_messages(next_messages)
-        model_history = [
+        model_tool_specs = [
+            *tool_specs(),
+            *list(self.mcp_manager.tool_specs()),
+        ]
+        model_history: list[ChatMessage | Mapping[str, object]] = [
             *runtime_context_messages(self.cwd, state.settings.agent_prompt),
             *workspace_chat_messages(
                 state.messages,
@@ -229,9 +244,13 @@ class WorkspaceRuntime:
         auto_compaction = await self.auto_compact_messages(
             connection=connection,
             context_window_limit=context_window_limit,
+            budget_messages=self.request_messages_for_content(
+                state, next_messages, content
+            ),
             messages=state.messages,
             model_history=model_history,
             source_message_id=None,
+            tools=model_tool_specs,
         )
         if auto_compaction is not None:
             marker, _, _ = auto_compaction
@@ -336,6 +355,7 @@ class WorkspaceRuntime:
                     tool.model_dump(exclude_none=True)
                     for tool in assistant_output.tools.values()
                 ],
+                request_tools=model_tool_specs,
                 model_context_window=context_window_limit,
             )
         self.store.save_usage_info(final_usage_info)
@@ -372,14 +392,14 @@ class WorkspaceRuntime:
                     exc_info=(type(result), result, result.__traceback__),
                 )
-    async def stop_runs_for_shutdown(self) -> None:
+    async def stop_response_for_shutdown(self) -> None:
         tasks: list[asyncio.Task[None]] = []
-        for run in self.runs.values():
-            if run.task is None or run.task.done():
-                continue
-            run.task.cancel()
-            tasks.append(run.task)
-        await self.gather_shutdown_tasks("Workspace run", tasks)
+        response = self.active_response
+        if response is not None and response.task is not None:
+            if not response.task.done():
+                response.task.cancel()
+            tasks.append(response.task)
+        await self.gather_shutdown_tasks("Workspace response", tasks)
     async def stop_compact_for_shutdown(self) -> None:
         if self.active_compact_task is None:
@@ -393,64 +413,72 @@ class WorkspaceRuntime:
         self.store.save_is_compacting(False)
     async def stop_for_shutdown(self) -> None:
-        await self.stop_runs_for_shutdown()
+        await self.stop_response_for_shutdown()
         await self.stop_compact_for_shutdown()
-    def active_run(self) -> WorkspaceRun | None:
-        if self.active_run_id is None:
+    def current_response(self) -> WorkspaceResponse | None:
+        response = self.active_response
+        if response is None or response.is_done:
             return None
-        run = self.runs.get(self.active_run_id)
-        if run is None or run.is_done:
-            return None
-        return run
+        return response
-    def has_active_run(self) -> bool:
-        return any(
-            not run.is_done and run.task is not None and not run.task.done()
-            for run in self.runs.values()
+    def has_active_response(self) -> bool:
+        response = self.active_response
+        return (
+            response is not None
+            and not response.is_done
+            and response.task is not None
+            and not response.task.done()
         )
     def clear(self) -> list[StoredMessage]:
         self.generation += 1
-        for run in self.runs.values():
-            run.is_done = True
-            if run.task is not None and not run.task.done():
-                run.discard_on_cancel = True
-                run.task.cancel()
-        self.active_run_id = None
+        response = self.active_response
+        if response is not None:
+            response.is_done = True
+            if response.task is not None and not response.task.done():
+                response.discard_on_cancel = True
+                response.task.cancel()
         return self.store.save_messages([])
-    async def notify_cleared_runs(self) -> None:
-        for run in self.runs.values():
-            async with run.condition:
-                run.condition.notify_all()
+    async def notify_cleared_response(self) -> None:
+        response = self.active_response
+        if response is None:
+            return
+        async with response.condition:
+            response.condition.notify_all()
     async def append_event(
-        self, run: WorkspaceRun, event: str, data: dict[str, object]
+        self, response: WorkspaceResponse, event: str, data: dict[str, object]
     ) -> None:
-        async with run.condition:
-            run.events.append((run.latest_event_index + 1, event, data))
-            run.condition.notify_all()
+        async with response.condition:
+            response.events.append((response.latest_event_index + 1, event, data))
+            response.condition.notify_all()
-    async def append_snapshot(self, run: WorkspaceRun, message: StoredMessage) -> None:
+    async def append_snapshot(
+        self, response: WorkspaceResponse, message: StoredMessage
+    ) -> None:
         if message.author != "assistant":
             return
-        run.latest_snapshot = message
+        response.latest_snapshot = message
         await self.append_event(
-            run,
+            response,
             "snapshot",
-            {"message": stream_message_data(message, run.active_output)},
+            {"message": stream_message_data(message, response.active_output)},
         )
-    def create_run(
+    def start_response(
         self, content: str, *, message_id: str | None = None
-    ) -> WorkspaceRun:
-        if self.has_active_run():
-            active_run = self.active_run()
+    ) -> WorkspaceResponse:
+        if self.has_active_response():
             raise HTTPException(
                 status_code=409,
                 detail="Response in progress",
-                headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
+            )
+        if self.store.read_is_compacting():
+            raise HTTPException(
+                status_code=409,
+                detail="Context refining in progress. Please wait a moment.",
             )
         state = self.store.read_state()
         user_message_id = message_id or str(uuid4())
@@ -463,7 +491,7 @@ class WorkspaceRuntime:
         )
         next_messages = [*state.messages, user_message]
         self.store.save_messages(next_messages)
-        return self._create_run_from_messages(
+        return self._start_response_from_messages(
             content=content,
             next_messages=next_messages,
             state=state,
@@ -476,13 +504,16 @@ class WorkspaceRuntime:
         *,
         action: Literal["resend", "save"],
         content: str,
-    ) -> tuple[list[StoredMessage], WorkspaceRun | None]:
-        if self.has_active_run():
-            active_run = self.active_run()
+    ) -> tuple[list[StoredMessage], WorkspaceResponse | None]:
+        if self.has_active_response():
             raise HTTPException(
                 status_code=409,
                 detail="Response in progress",
-                headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
+            )
+        if self.store.read_is_compacting():
+            raise HTTPException(
+                status_code=409,
+                detail="Context refining in progress. Please wait a moment.",
             )
         state = self.store.read_state()
         message_index = next(
@@ -513,50 +544,140 @@ class WorkspaceRuntime:
         previous_messages = state.messages[:message_index]
         next_messages = [*previous_messages, updated_message]
         self.store.save_messages(next_messages)
-        run = self._create_run_from_messages(
+        response = self._start_response_from_messages(
             content=content,
             next_messages=next_messages,
             state=state.model_copy(update={"messages": previous_messages}),
             user_message=updated_message,
         )
-        return next_messages, run
+        return next_messages, response
+    def retry_error(
+        self,
+        message_id: str,
+        *,
+        error_id: str,
+    ) -> tuple[list[StoredMessage], WorkspaceResponse]:
+        if self.has_active_response():
+            raise HTTPException(
+                status_code=409,
+                detail="Response in progress",
+            )
+        if self.store.read_is_compacting():
+            raise HTTPException(
+                status_code=409,
+                detail="Context refining in progress. Please wait a moment.",
+            )
+        state = self.store.read_state()
+        message_index = next(
+            (
+                index
+                for index, message in enumerate(state.messages)
+                if message.id == message_id
+            ),
+            -1,
+        )
+        if message_index < 0:
+            raise HTTPException(status_code=404, detail="Message not found.")
+        message = state.messages[message_index]
+        if message.author != "assistant":
+            raise HTTPException(
+                status_code=400, detail="Only assistant errors can be retried."
+            )
+        previous_user_message = next(
+            (
+                current_message
+                for current_message in reversed(state.messages[:message_index])
+                if current_message.author == "user"
+            ),
+            None,
+        )
+        if previous_user_message is None:
+            raise HTTPException(status_code=400, detail="Message history is invalid.")
+        trimmed_message = trim_assistant_message_at_error(
+            message,
+            error_id,
+            status="running",
+        )
+        if trimmed_message is None:
+            raise HTTPException(status_code=404, detail="Error block not found.")
-    def _create_run_from_messages(
+        previous_messages = state.messages[:message_index]
+        next_messages = [*previous_messages, trimmed_message]
+        self.store.save_messages(next_messages)
+        state_before_assistant = state.model_copy(
+            update={"messages": previous_messages}
+        )
+        base_request_messages = self.request_messages_for_content(
+            state_before_assistant,
+            previous_messages,
+            previous_user_message.content,
+        )
+        request_messages = [
+            *base_request_messages,
+            *model_visible_assistant_output_messages(trimmed_message),
+        ]
+        response = self._start_response_from_messages(
+            content=previous_user_message.content,
+            initial_assistant_message=trimmed_message,
+            next_messages=next_messages,
+            output_start_index=assistant_retry_output_start_index(trimmed_message),
+            request_messages=request_messages,
+            state=state_before_assistant,
+            usage_request_messages=base_request_messages,
+            user_message=previous_user_message,
+        )
+        return next_messages, response
+    def _start_response_from_messages(
         self,
         *,
         content: str,
+        initial_assistant_message: StoredMessage | None = None,
         next_messages: list[StoredMessage],
+        output_start_index: int = 1,
+        request_messages: list[dict[str, object]] | None = None,
         state: StoredState,
+        usage_request_messages: list[dict[str, object]] | None = None,
         user_message: StoredMessage,
-    ) -> WorkspaceRun:
+    ) -> WorkspaceResponse:
         connection = selected_connection(state)
         context_window_limit = context_window_for_settings(state.settings)
-        run = WorkspaceRun(
+        response = WorkspaceResponse(
             condition=asyncio.Condition(),
             generation=self.generation,
         )
-        self.runs[run.id] = run
-        self.active_run_id = run.id
+        self.active_response = response
-        async def run_task() -> None:
+        async def response_task() -> None:
             nonlocal next_messages
-            assistant_message = StoredMessage(
-                author="assistant",
-                content="",
-                id=str(uuid4()),
-                status="running",
+            assistant_message = (
+                initial_assistant_message
+                if initial_assistant_message is not None
+                else StoredMessage(
+                    author="assistant",
+                    content="",
+                    id=str(uuid4()),
+                    status="running",
+                )
             )
-            assistant_output = AssistantOutputBuilder(assistant_message.id)
+            assistant_output = (
+                AssistantOutputBuilder.from_message(assistant_message)
+                if initial_assistant_message is not None
+                else AssistantOutputBuilder(assistant_message.id)
+            )
+            initial_assistant_content = assistant_output.content
+            initial_assistant_thinking = assistant_output.thinking
             last_progress_flush_at = 0.0
             def is_current_generation() -> bool:
-                return run.generation == self.generation
+                return response.generation == self.generation
             def update_assistant_message(
                 status: str = "running", *, persist: bool
             ) -> StoredMessage | None:
                 nonlocal next_messages, assistant_message
-                if not is_current_generation() or run.discard_on_cancel:
+                if not is_current_generation() or response.discard_on_cancel:
                     return None
                 assistant_message = StoredMessage(
                     author="assistant",
@@ -603,43 +724,90 @@ class WorkspaceRuntime:
                 turn_usage_info: TokenUsageInfo | None = None
                 current_output_index = 0
                 latest_usage_output_index: int | None = None
-                current_request_messages = self.request_messages_for_content(
-                    state,
-                    next_messages,
-                    content,
-                )
-                pre_turn_request_messages = self.request_messages_for_content(
-                    state,
-                    state.messages,
-                    content,
-                )
-                auto_compaction = await self.auto_compact_messages(
-                    connection=connection,
-                    context_window_limit=context_window_limit,
-                    messages=state.messages,
-                    model_history=[
-                        ChatMessage.model_validate(message)
-                        for message in pre_turn_request_messages
-                    ],
-                    source_message_id=None,
-                )
-                if auto_compaction is not None:
-                    marker, _, usage_info = auto_compaction
-                    next_messages = [*state.messages, marker, user_message]
-                    self.store.save_messages(next_messages)
-                    await self.append_event(
-                        run,
-                        "context_optimized",
-                        {
-                            "message": marker.model_dump(),
-                            **usage_event_data(usage_info),
-                        },
-                    )
+                model_tool_specs = [
+                    *tool_specs(),
+                    *list(self.mcp_manager.tool_specs()),
+                ]
+                if request_messages is None:
                     current_request_messages = self.request_messages_for_content(
                         state,
                         next_messages,
                         content,
                     )
+                    pre_turn_request_messages = self.request_messages_for_content(
+                        state,
+                        state.messages,
+                        content,
+                    )
+                    auto_compaction = await self.auto_compact_messages(
+                        connection=connection,
+                        context_window_limit=context_window_limit,
+                        budget_messages=current_request_messages,
+                        messages=state.messages,
+                        model_history=pre_turn_request_messages,
+                        source_message_id=None,
+                        tools=model_tool_specs,
+                    )
+                    if auto_compaction is not None:
+                        marker, _, usage_info = auto_compaction
+                        next_messages = [*state.messages, marker, user_message]
+                        self.store.save_messages(next_messages)
+                        await self.append_event(
+                            response,
+                            "context_optimized",
+                            {
+                                "message": marker.model_dump(),
+                                **usage_event_data(usage_info),
+                            },
+                        )
+                        current_request_messages = self.request_messages_for_content(
+                            state,
+                            next_messages,
+                            content,
+                        )
+                else:
+                    current_request_messages = request_messages
+                    auto_compaction = await self.auto_compact_messages(
+                        connection=connection,
+                        context_window_limit=context_window_limit,
+                        messages=next_messages,
+                        model_history=compact_prompt_chat_messages(
+                            current_request_messages
+                        ),
+                        source_message_id=assistant_message.id,
+                        tools=model_tool_specs,
+                    )
+                    if auto_compaction is not None:
+                        marker, replacement_history, usage_info = auto_compaction
+                        assistant_message = assistant_message.model_copy(
+                            update={"usage_info": usage_info}
+                        )
+                        next_messages = append_or_replace_message(
+                            [*next_messages, marker], assistant_message
+                        )
+                        self.store.save_messages(next_messages)
+                        await self.append_event(
+                            response,
+                            "context_optimized",
+                            {
+                                "message": marker.model_dump(),
+                                **usage_event_data(usage_info),
+                            },
+                        )
+                        current_request_messages = model_request_messages_data(
+                            [
+                                *runtime_context_messages(
+                                    self.cwd, state.settings.agent_prompt
+                                ),
+                                *explicit_skill_messages(self.cwd, self.store, content),
+                                *replacement_history,
+                            ]
+                        )
+                context_usage_messages = (
+                    usage_request_messages
+                    if usage_request_messages is not None
+                    else current_request_messages
+                )
                 async def review_tool_approval(request: ApprovalReviewRequest):
                     return await review_approval_request(
@@ -672,7 +840,7 @@ class WorkspaceRuntime:
                     conversation: Sequence[Mapping[str, object]],
                 ) -> AgentContextUpdate | None:
                     nonlocal next_messages
-                    if not is_current_generation() or run.discard_on_cancel:
+                    if not is_current_generation() or response.discard_on_cancel:
                         return None
                     assistant_snapshot = StoredMessage(
                         author="assistant",
@@ -684,35 +852,13 @@ class WorkspaceRuntime:
                         tools=list(assistant_output.tools.values()),
                         usage_info=self.store.read_usage_info(),
                     )
-                    model_history: list[ChatMessage] = []
-                    for message in conversation:
-                        role_value = message.get("role")
-                        content = str(message.get("content") or "")
-                        if role_value == "system":
-                            model_history.append(
-                                ChatMessage(role="system", content=content)
-                            )
-                        if role_value == "user":
-                            model_history.append(
-                                ChatMessage(role="user", content=content)
-                            )
-                        if role_value == "assistant":
-                            model_history.append(
-                                ChatMessage(role="assistant", content=content)
-                            )
-                        if role_value == "tool":
-                            model_history.append(
-                                ChatMessage(
-                                    role="user",
-                                    content=f"Tool result: {content}",
-                                )
-                            )
                     auto_result = await self.auto_compact_messages(
                         connection=connection,
                         context_window_limit=context_window_limit,
                         messages=next_messages,
-                        model_history=model_history,
+                        model_history=compact_prompt_chat_messages(conversation),
                         source_message_id=assistant_snapshot.id,
+                        tools=model_tool_specs,
                     )
                     if auto_result is None:
                         return None
@@ -747,14 +893,18 @@ class WorkspaceRuntime:
                     messages=current_request_messages,
                     tool_runner=tool_runner,
                 ):
-                    if not is_current_generation() or run.discard_on_cancel:
+                    if not is_current_generation() or response.discard_on_cancel:
                         raise asyncio.CancelledError
                     run_event_data = event.data
                     should_append_run_event = event.event != "usage"
                     snapshot_after_event: StoredMessage | None = None
                     if event.event == "start":
                         event_id = event.data.get("id")
-                        if isinstance(event_id, str):
+                        if initial_assistant_message is not None:
+                            assistant_output.set_assistant_id(assistant_message.id)
+                            run_event_data = {"id": assistant_message.id}
+                            snapshot_after_event = persist_assistant()
+                        elif isinstance(event_id, str):
                             assistant_message = assistant_message.model_copy(
                                 update={"id": event_id}
                             )
@@ -763,16 +913,24 @@ class WorkspaceRuntime:
                     if event.event == "output_start":
                         index = event.data.get("index")
                         if isinstance(index, int):
-                            current_output_index = index
-                            run.active_output = None
-                            assistant_output.start_group(index)
+                            output_index = index + output_start_index - 1
+                            current_output_index = output_index
+                            run_event_data = {**event.data, "index": output_index}
+                            response.active_output = None
+                            assistant_output.start_group(output_index)
                             snapshot_after_event = persist_assistant()
                     if event.event == "output_done":
-                        run.active_output = None
+                        index = event.data.get("index")
+                        if isinstance(index, int):
+                            run_event_data = {
+                                **event.data,
+                                "index": index + output_start_index - 1,
+                            }
+                        response.active_output = None
                     if event.event == "tool_start":
                         tool = event.data.get("tool")
                         if isinstance(tool, dict) and isinstance(tool.get("id"), str):
-                            run.active_output = None
+                            response.active_output = None
                             current_tool_id = tool["id"]
                             assistant_output.start_tool(
                                 StoredToolItem.model_validate(tool)
@@ -790,13 +948,13 @@ class WorkspaceRuntime:
                             assistant_output.update_tool(tool_id, event.data)
                             snapshot_after_event = persist_assistant()
                     if event.event == "delta":
-                        run.active_output = "text"
+                        response.active_output = "text"
                         assistant_output.append_text(
                             str(event.data.get("content") or "")
                         )
                         snapshot_after_event = persist_assistant_progress()
                     if event.event == "thinking_delta":
-                        run.active_output = "thinking"
+                        response.active_output = "thinking"
                         assistant_output.append_thinking(
                             str(event.data.get("content") or "")
                         )
@@ -824,8 +982,12 @@ class WorkspaceRuntime:
                     if event.event == "done":
                         message = event.data.get("message")
                         if isinstance(message, dict):
-                            run.active_output = None
-                            assistant_output.apply_done_message(message)
+                            response.active_output = None
+                            assistant_output.apply_done_message(
+                                message,
+                                content_prefix=initial_assistant_content,
+                                thinking_prefix=initial_assistant_thinking,
+                            )
                             response_usage_info = self.store.read_usage_info()
                             final_usage_info = turn_usage_info
                             if (
@@ -834,12 +996,13 @@ class WorkspaceRuntime:
                             ):
                                 final_usage_info = update_context_usage_for_response(
                                     final_usage_info or response_usage_info,
-                                    messages=current_request_messages,
+                                    messages=context_usage_messages,
                                     output_content=assistant_output.content,
                                     output_tools=[
                                         tool.model_dump(exclude_none=True)
                                         for tool in assistant_output.tools.values()
                                     ],
+                                    request_tools=model_tool_specs,
                                     model_context_window=context_window_limit,
                                 )
                             self.store.save_usage_info(final_usage_info)
@@ -849,27 +1012,35 @@ class WorkspaceRuntime:
                                     "message": stream_message_data(snapshot_after_event)
                                 }
                     if event.event == "done" and snapshot_after_event is not None:
-                        await self.append_snapshot(run, snapshot_after_event)
-                        await self.append_event(run, event.event, run_event_data)
+                        await self.append_snapshot(response, snapshot_after_event)
+                        await self.append_event(response, event.event, run_event_data)
                     else:
                         if should_append_run_event:
-                            await self.append_event(run, event.event, run_event_data)
+                            await self.append_event(
+                                response, event.event, run_event_data
+                            )
                         if snapshot_after_event is not None:
-                            await self.append_snapshot(run, snapshot_after_event)
+                            await self.append_snapshot(response, snapshot_after_event)
             except asyncio.CancelledError:
-                logger.info("Workspace run stopped")
-                if not run.discard_on_cancel:
+                logger.info("Workspace response stopped")
+                if not response.discard_on_cancel:
                     interrupted_snapshot = persist_assistant("interrupted")
                     if interrupted_snapshot is not None:
-                        await self.append_snapshot(run, interrupted_snapshot)
+                        await self.append_snapshot(response, interrupted_snapshot)
                     await self.append_event(
-                        run,
+                        response,
                         "error",
                         {"message": "Response stopped."},
                     )
                 raise
             except Exception as error:
                 logger.exception("Workspace response failed")
+                if is_context_window_error(error):
+                    usage_info = full_context_usage(
+                        self.store.read_usage_info(),
+                        model_context_window=context_window_limit,
+                    )
+                    self.store.save_usage_info(usage_info)
                 if (
                     current_tool_id is not None
                     and current_tool_id in assistant_output.tools
@@ -877,7 +1048,10 @@ class WorkspaceRuntime:
                 ):
                     assistant_output.update_tool(
                         current_tool_id,
-                        {"content": str(error) or "Tool failed.", "status": "failed"},
+                        {
+                            "result": text_tool_result(str(error) or "Tool failed."),
+                            "status": "failed",
+                        },
                     )
                 error_item = assistant_output.append_error(
                     run_error_output_item(
@@ -887,23 +1061,30 @@ class WorkspaceRuntime:
                 )
                 failed_snapshot = persist_assistant("failed")
                 if failed_snapshot is not None:
-                    await self.append_snapshot(run, failed_snapshot)
-                await self.append_event(run, "error", run_error_event_data(error_item))
+                    await self.append_snapshot(response, failed_snapshot)
+                await self.append_event(
+                    response, "error", run_error_event_data(error_item)
+                )
             finally:
-                run.is_done = True
-                async with run.condition:
-                    run.condition.notify_all()
-                if self.active_run_id == run.id:
-                    self.active_run_id = None
+                response.is_done = True
+                async with response.condition:
+                    response.condition.notify_all()
+                if self.active_response is response:
+                    self.active_response = None
-        run.task = asyncio.create_task(run_task())
-        return run
+        response.task = asyncio.create_task(response_task())
+        return response
-    async def run_stream(
-        self, run: WorkspaceRun, after: int = 0, include_snapshots: bool = True
+    async def response_stream(
+        self,
+        response: WorkspaceResponse,
+        after: int = 0,
+        include_snapshots: bool = True,
     ) -> AsyncIterator[str]:
         next_event_index = after + 1
-        reconnect_snapshot = run_snapshot_data_at(run, after) if after > 0 else None
+        reconnect_snapshot = (
+            response_snapshot_data_at(response, after) if after > 0 else None
+        )
         if include_snapshots and reconnect_snapshot is not None:
             yield stream_event(
                 "snapshot",
@@ -911,15 +1092,17 @@ class WorkspaceRuntime:
                 event_id=after,
             )
         while True:
-            async with run.condition:
+            async with response.condition:
                 def has_next_event(index: int = next_event_index) -> bool:
-                    return run.is_done or any(
-                        event_index >= index for event_index, _, _ in run.events
+                    return response.is_done or any(
+                        event_index >= index for event_index, _, _ in response.events
                     )
-                await run.condition.wait_for(has_next_event)
-                events = [event for event in run.events if event[0] >= next_event_index]
+                await response.condition.wait_for(has_next_event)
+                events = [
+                    event for event in response.events if event[0] >= next_event_index
+                ]
             for index, event, data in events:
                 next_event_index = index + 1
@@ -929,19 +1112,23 @@ class WorkspaceRuntime:
                 if event in {"done", "error"}:
                     return
-            if run.is_done and not events:
+            if response.is_done and not events:
                 return
-    def run_by_id(self, run_id: str) -> WorkspaceRun:
-        run = self.runs.get(run_id)
-        if run is None:
-            raise HTTPException(status_code=404, detail="Run not found.")
-        return run
+    def stream_current_response(self) -> WorkspaceResponse:
+        response = self.current_response()
+        if response is None:
+            raise HTTPException(status_code=404, detail="Response not found.")
+        return response
-    def stop_run(self, run_id: str) -> None:
-        run = self.run_by_id(run_id)
-        if run.task is not None and not run.task.done():
-            run.task.cancel()
+    def stop_response(self) -> None:
+        response = self.current_response()
+        if (
+            response is not None
+            and response.task is not None
+            and not response.task.done()
+        ):
+            response.task.cancel()
     def compact_stream(self) -> AsyncIterator[str]:
         async def run_manual_compact(
@@ -953,7 +1140,7 @@ class WorkspaceRuntime:
         ) -> tuple[StoredMessage, TokenUsageInfo]:
             logger.info("Workspace compact requested")
             try:
-                model_history = [
+                model_history: list[ChatMessage | Mapping[str, object]] = [
                     *runtime_context_messages(self.cwd, state.settings.agent_prompt),
                     *workspace_chat_messages(
                         state.messages,
@@ -999,7 +1186,7 @@ class WorkspaceRuntime:
                 self.active_compact_task = None
         if self.active_compact_task is None:
-            if self.active_run() is not None:
+            if self.current_response() is not None:
                 raise HTTPException(
                     status_code=409,
                     detail="Compact is unavailable while Flowent is responding.",
@@ -1023,10 +1210,31 @@ class WorkspaceRuntime:
         async def compact_events() -> AsyncIterator[str]:
             try:
                 marker, usage_info = await asyncio.shield(compact_task)
-            except Exception:
+            except Exception as error:
+                assistant_id = str(uuid4())
+                assistant_output = AssistantOutputBuilder(assistant_id)
+                error_item = run_error_output_item(assistant_id, str(error)).model_copy(
+                    update={"message": USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE}
+                )
+                assistant_output.append_error(error_item)
+                failed_message = StoredMessage(
+                    author="assistant",
+                    content="",
+                    groups=assistant_output.groups,
+                    id=assistant_id,
+                    status="failed",
+                )
+                self.store.save_messages(
+                    [*self.store.read_state().messages, failed_message]
+                )
+                failed_message_data = stream_message_data(failed_message)
+                yield stream_event("snapshot", {"message": failed_message_data})
                 yield stream_event(
                     "error",
-                    {"message": "Context could not be compacted."},
+                    {
+                        "error": error_item.model_dump(exclude_none=True),
+                        "message": USER_VISIBLE_MANUAL_COMPACT_ERROR_MESSAGE,
+                    },
                 )
                 return