flowent 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +31 -5
- package/backend/src/flowent/agent.py +13 -4
- package/backend/src/flowent/approval.py +6 -4
- package/backend/src/flowent/compact.py +35 -14
- package/backend/src/flowent/llm.py +73 -7
- package/backend/src/flowent/main.py +441 -85
- package/backend/src/flowent/static/assets/index-Bz76A4EJ.js +82 -0
- package/backend/src/flowent/static/assets/index-DufpDl8x.css +2 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/storage.py +151 -7
- package/backend/src/flowent/usage.py +315 -0
- package/backend/uv.lock +971 -3
- package/dist/frontend/assets/index-Bz76A4EJ.js +82 -0
- package/dist/frontend/assets/index-DufpDl8x.css +2 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +24 -3
- package/backend/src/flowent/__pycache__/__init__.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/_version.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/agent.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/approval.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/channels.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/cli.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/compact.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/context.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/llm.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/logging.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/main.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/mcp_import.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/patch.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/paths.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/permissions.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/sandbox.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/skills.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/storage.cpython-313.pyc +0 -0
- package/backend/src/flowent/__pycache__/tools.cpython-313.pyc +0 -0
- package/backend/src/flowent/static/assets/index-BlaCigkZ.js +0 -82
- package/backend/src/flowent/static/assets/index-CRvbsH4K.css +0 -2
- package/backend/tests/__pycache__/conftest.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_agent_tools.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_approval.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_channels.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_health.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_llm_providers.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_logging.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_mcp.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_patch.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_permissions.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_persistence.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_skills.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_startup_requirements.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/__pycache__/test_workspace_chat.cpython-313-pytest-9.0.3.pyc +0 -0
- package/backend/tests/conftest.py +0 -60
- package/backend/tests/test_agent_tools.py +0 -1124
- package/backend/tests/test_approval.py +0 -283
- package/backend/tests/test_channels.py +0 -360
- package/backend/tests/test_health.py +0 -12
- package/backend/tests/test_llm_providers.py +0 -548
- package/backend/tests/test_logging.py +0 -212
- package/backend/tests/test_mcp.py +0 -788
- package/backend/tests/test_patch.py +0 -112
- package/backend/tests/test_permissions.py +0 -588
- package/backend/tests/test_persistence.py +0 -249
- package/backend/tests/test_skills.py +0 -462
- package/backend/tests/test_startup_requirements.py +0 -144
- package/backend/tests/test_workspace_chat.py +0 -2174
- package/dist/frontend/assets/index-BlaCigkZ.js +0 -82
- package/dist/frontend/assets/index-CRvbsH4K.css +0 -2
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
from collections.abc import AsyncIterator, Mapping, Sequence
|
|
6
|
-
from contextlib import asynccontextmanager
|
|
6
|
+
from contextlib import asynccontextmanager, suppress
|
|
7
7
|
from dataclasses import dataclass, field
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Literal
|
|
@@ -70,6 +70,14 @@ from flowent.storage import (
|
|
|
70
70
|
StoredWritablePath,
|
|
71
71
|
)
|
|
72
72
|
from flowent.tools import ToolContext
|
|
73
|
+
from flowent.usage import (
|
|
74
|
+
TokenUsage,
|
|
75
|
+
TokenUsageInfo,
|
|
76
|
+
append_token_usage,
|
|
77
|
+
current_model_context_window,
|
|
78
|
+
estimated_token_usage_for_messages,
|
|
79
|
+
recompute_context_usage,
|
|
80
|
+
)
|
|
73
81
|
|
|
74
82
|
logger = logging.getLogger("flowent.main")
|
|
75
83
|
|
|
@@ -77,7 +85,7 @@ logger = logging.getLogger("flowent.main")
|
|
|
77
85
|
DEFAULT_STATIC_DIR = Path(__file__).parent / "static"
|
|
78
86
|
COMPACTED_CONTEXT_MARKER = "Context compacted"
|
|
79
87
|
OPTIMIZED_CONTEXT_MARKER = "Context optimized"
|
|
80
|
-
|
|
88
|
+
DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO = 0.95
|
|
81
89
|
AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
|
|
82
90
|
APPROVAL_TRANSCRIPT_MESSAGE_LIMIT = 12
|
|
83
91
|
APPROVAL_TRANSCRIPT_TEXT_LIMIT = 2_000
|
|
@@ -113,10 +121,17 @@ class WorkspaceRunResponse(BaseModel):
|
|
|
113
121
|
run_id: str
|
|
114
122
|
|
|
115
123
|
|
|
116
|
-
class
|
|
124
|
+
class WorkspaceClearResponse(BaseModel):
|
|
117
125
|
model_config = ConfigDict(extra="forbid")
|
|
118
126
|
|
|
119
|
-
|
|
127
|
+
active_run_id: str | None = None
|
|
128
|
+
messages: list[StoredMessage]
|
|
129
|
+
usage_info: TokenUsageInfo | None = None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class WorkspaceCompactTask:
|
|
134
|
+
task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]]
|
|
120
135
|
|
|
121
136
|
|
|
122
137
|
class AboutResponse(BaseModel):
|
|
@@ -167,8 +182,10 @@ class WorkspaceRun:
|
|
|
167
182
|
condition: asyncio.Condition
|
|
168
183
|
discard_on_cancel: bool = False
|
|
169
184
|
events: list[tuple[int, str, dict[str, object]]] = field(default_factory=list)
|
|
185
|
+
generation: int = 0
|
|
170
186
|
id: str = field(default_factory=lambda: str(uuid4()))
|
|
171
187
|
is_done: bool = False
|
|
188
|
+
latest_snapshot: StoredMessage | None = None
|
|
172
189
|
task: asyncio.Task[None] | None = None
|
|
173
190
|
|
|
174
191
|
@property
|
|
@@ -176,8 +193,15 @@ class WorkspaceRun:
|
|
|
176
193
|
return self.events[-1][0] if self.events else 0
|
|
177
194
|
|
|
178
195
|
|
|
179
|
-
def stream_event(
|
|
180
|
-
|
|
196
|
+
def stream_event(
|
|
197
|
+
event: str, data: dict[str, object], event_id: int | None = None
|
|
198
|
+
) -> str:
|
|
199
|
+
id_line = f"id: {event_id}\n" if event_id is not None else ""
|
|
200
|
+
return f"{id_line}event: {event}\ndata: {json.dumps(data)}\n\n"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def stream_message_data(message: StoredMessage) -> dict[str, object]:
|
|
204
|
+
return {**message.model_dump(), "status": message.status}
|
|
181
205
|
|
|
182
206
|
|
|
183
207
|
def append_or_replace_message(
|
|
@@ -189,6 +213,18 @@ def append_or_replace_message(
|
|
|
189
213
|
]
|
|
190
214
|
|
|
191
215
|
|
|
216
|
+
def run_snapshot_data_at(
|
|
217
|
+
run: WorkspaceRun, event_index: int
|
|
218
|
+
) -> dict[str, object] | None:
|
|
219
|
+
for current_event_index, event, data in reversed(run.events):
|
|
220
|
+
if current_event_index > event_index or event != "snapshot":
|
|
221
|
+
continue
|
|
222
|
+
message = data.get("message")
|
|
223
|
+
if isinstance(message, dict):
|
|
224
|
+
return message
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
|
|
192
228
|
USER_VISIBLE_RUN_ERROR_TITLE = "Request failed"
|
|
193
229
|
USER_VISIBLE_RUN_ERROR_MESSAGE = "Check the model connection settings and try again."
|
|
194
230
|
USER_VISIBLE_CONTEXT_OPTIMIZATION_ERROR_MESSAGE = "Context could not be optimized."
|
|
@@ -504,16 +540,22 @@ def is_context_marker(message: StoredMessage) -> bool:
|
|
|
504
540
|
return message.content in {COMPACTED_CONTEXT_MARKER, OPTIMIZED_CONTEXT_MARKER}
|
|
505
541
|
|
|
506
542
|
|
|
507
|
-
def auto_compact_token_limit() -> int:
|
|
543
|
+
def auto_compact_token_limit(context_window: int) -> int:
|
|
508
544
|
raw_limit = os.environ.get("FLOWENT_AUTO_COMPACT_TOKEN_LIMIT", "")
|
|
545
|
+
if not raw_limit:
|
|
546
|
+
return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
|
|
509
547
|
try:
|
|
510
548
|
return max(0, int(raw_limit))
|
|
511
549
|
except ValueError:
|
|
512
|
-
return
|
|
550
|
+
return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
|
|
513
551
|
|
|
514
552
|
|
|
515
|
-
def should_auto_compact(
|
|
516
|
-
|
|
553
|
+
def should_auto_compact(
|
|
554
|
+
messages: list[ChatMessage],
|
|
555
|
+
*,
|
|
556
|
+
context_window: int,
|
|
557
|
+
) -> bool:
|
|
558
|
+
token_limit = auto_compact_token_limit(context_window)
|
|
517
559
|
if token_limit <= 0:
|
|
518
560
|
return False
|
|
519
561
|
return (
|
|
@@ -522,6 +564,77 @@ def should_auto_compact(messages: list[ChatMessage]) -> bool:
|
|
|
522
564
|
)
|
|
523
565
|
|
|
524
566
|
|
|
567
|
+
def model_visible_messages_for_usage(
|
|
568
|
+
messages: Sequence[Mapping[str, object]],
|
|
569
|
+
) -> list[dict[str, object]]:
|
|
570
|
+
return [
|
|
571
|
+
dict(message)
|
|
572
|
+
for message in messages
|
|
573
|
+
if message.get("role") in {"system", "user", "assistant", "tool"}
|
|
574
|
+
]
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
def usage_event_data(usage_info: TokenUsageInfo) -> dict[str, object]:
|
|
578
|
+
return {"usage_info": usage_info.model_dump()}
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def update_context_usage_for_response(
|
|
582
|
+
usage_info: TokenUsageInfo | None,
|
|
583
|
+
*,
|
|
584
|
+
messages: Sequence[Mapping[str, object]],
|
|
585
|
+
output_content: str,
|
|
586
|
+
model_context_window: int,
|
|
587
|
+
) -> TokenUsageInfo:
|
|
588
|
+
return recompute_context_usage(
|
|
589
|
+
usage_info,
|
|
590
|
+
estimated_token_usage_for_messages(
|
|
591
|
+
model_visible_messages_for_usage(messages),
|
|
592
|
+
output_content=output_content,
|
|
593
|
+
).total_tokens,
|
|
594
|
+
model_context_window=model_context_window,
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def usage_info_for_model(
|
|
599
|
+
usage_info: TokenUsageInfo | None,
|
|
600
|
+
model_context_window: int,
|
|
601
|
+
) -> TokenUsageInfo | None:
|
|
602
|
+
if usage_info is None:
|
|
603
|
+
return None
|
|
604
|
+
return usage_info.model_copy(update={"model_context_window": model_context_window})
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def context_window_for_settings(settings: StoredSettings) -> int:
|
|
608
|
+
if settings.context_window_limit is not None:
|
|
609
|
+
return settings.context_window_limit
|
|
610
|
+
return current_model_context_window(settings.selected_model)
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def state_with_current_model_context_window(state: StoredState) -> StoredState:
|
|
614
|
+
model_context_window = context_window_for_settings(state.settings)
|
|
615
|
+
return state.model_copy(
|
|
616
|
+
update={
|
|
617
|
+
"messages": [
|
|
618
|
+
message.model_copy(
|
|
619
|
+
update={
|
|
620
|
+
"usage_info": usage_info_for_model(
|
|
621
|
+
message.usage_info,
|
|
622
|
+
model_context_window,
|
|
623
|
+
)
|
|
624
|
+
}
|
|
625
|
+
)
|
|
626
|
+
if message.usage_info is not None
|
|
627
|
+
else message
|
|
628
|
+
for message in state.messages
|
|
629
|
+
],
|
|
630
|
+
"usage_info": usage_info_for_model(
|
|
631
|
+
state.usage_info,
|
|
632
|
+
model_context_window,
|
|
633
|
+
),
|
|
634
|
+
}
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
|
|
525
638
|
def workspace_chat_messages(
|
|
526
639
|
messages: list[StoredMessage],
|
|
527
640
|
compacted_context: str = "",
|
|
@@ -617,6 +730,8 @@ def create_app(
|
|
|
617
730
|
telegram_bot_manager: TelegramBotManager | None = None
|
|
618
731
|
workspace_runs: dict[str, WorkspaceRun] = {}
|
|
619
732
|
active_workspace_run_id: str | None = None
|
|
733
|
+
workspace_generation = 0
|
|
734
|
+
active_compact_task: WorkspaceCompactTask | None = None
|
|
620
735
|
|
|
621
736
|
static_dir = frontend_static_directory().resolve(strict=False)
|
|
622
737
|
logger.debug("Flowent app created serve_frontend=%s", serve_frontend)
|
|
@@ -647,17 +762,13 @@ def create_app(
|
|
|
647
762
|
async def save_context_checkpoint(
|
|
648
763
|
*,
|
|
649
764
|
connection: ProviderConnection,
|
|
765
|
+
context_window_limit: int,
|
|
650
766
|
messages: list[StoredMessage],
|
|
651
767
|
model_history: list[ChatMessage],
|
|
652
768
|
marker_content: str,
|
|
653
769
|
source_message_id: str | None = None,
|
|
654
770
|
trigger: Literal["manual", "auto"],
|
|
655
|
-
) -> tuple[StoredMessage, list[dict[str, object]]]:
|
|
656
|
-
marker = StoredMessage(
|
|
657
|
-
author="system",
|
|
658
|
-
content=marker_content,
|
|
659
|
-
id=str(uuid4()),
|
|
660
|
-
)
|
|
771
|
+
) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo]:
|
|
661
772
|
compact_result = await compact_provider.compact(
|
|
662
773
|
connection,
|
|
663
774
|
CompactInput(
|
|
@@ -668,6 +779,25 @@ def create_app(
|
|
|
668
779
|
),
|
|
669
780
|
completion=chat_completion,
|
|
670
781
|
)
|
|
782
|
+
usage_info = store.read_usage_info()
|
|
783
|
+
if compact_result.summary_usage is not None:
|
|
784
|
+
usage_info = append_token_usage(
|
|
785
|
+
usage_info,
|
|
786
|
+
compact_result.summary_usage,
|
|
787
|
+
model_context_window=context_window_limit,
|
|
788
|
+
)
|
|
789
|
+
usage_info = recompute_context_usage(
|
|
790
|
+
usage_info,
|
|
791
|
+
compact_result.token_after,
|
|
792
|
+
model_context_window=context_window_limit,
|
|
793
|
+
)
|
|
794
|
+
store.save_usage_info(usage_info)
|
|
795
|
+
marker = StoredMessage(
|
|
796
|
+
author="system",
|
|
797
|
+
content=marker_content,
|
|
798
|
+
id=str(uuid4()),
|
|
799
|
+
usage_info=usage_info,
|
|
800
|
+
)
|
|
671
801
|
store.save_compaction_checkpoint(
|
|
672
802
|
StoredCompactionCheckpoint(
|
|
673
803
|
id=str(uuid4()),
|
|
@@ -689,23 +819,30 @@ def create_app(
|
|
|
689
819
|
compact_result.token_after,
|
|
690
820
|
)
|
|
691
821
|
logger.log(TRACE_LEVEL, "Workspace compact summary=%r", compact_result.summary)
|
|
692
|
-
return
|
|
693
|
-
|
|
694
|
-
|
|
822
|
+
return (
|
|
823
|
+
marker,
|
|
824
|
+
[message.model_dump() for message in compact_result.replacement_history],
|
|
825
|
+
usage_info,
|
|
826
|
+
)
|
|
695
827
|
|
|
696
828
|
async def auto_compact_workspace_messages(
|
|
697
829
|
*,
|
|
698
830
|
connection: ProviderConnection,
|
|
831
|
+
context_window_limit: int,
|
|
699
832
|
messages: list[StoredMessage],
|
|
700
833
|
model_history: list[ChatMessage],
|
|
701
834
|
source_message_id: str | None = None,
|
|
702
|
-
) -> tuple[StoredMessage, list[dict[str, object]]] | None:
|
|
703
|
-
if not should_auto_compact(
|
|
835
|
+
) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
|
|
836
|
+
if not should_auto_compact(
|
|
837
|
+
model_history,
|
|
838
|
+
context_window=context_window_limit,
|
|
839
|
+
):
|
|
704
840
|
return None
|
|
705
841
|
logger.info("Workspace auto compact requested")
|
|
706
842
|
try:
|
|
707
843
|
return await save_context_checkpoint(
|
|
708
844
|
connection=connection,
|
|
845
|
+
context_window_limit=context_window_limit,
|
|
709
846
|
marker_content=OPTIMIZED_CONTEXT_MARKER,
|
|
710
847
|
messages=messages,
|
|
711
848
|
model_history=model_history,
|
|
@@ -719,6 +856,7 @@ def create_app(
|
|
|
719
856
|
async def run_workspace_turn(content: str) -> StoredMessage:
|
|
720
857
|
state = store.read_state()
|
|
721
858
|
connection = selected_connection(state)
|
|
859
|
+
context_window_limit = context_window_for_settings(state.settings)
|
|
722
860
|
user_message = StoredMessage(
|
|
723
861
|
author="user",
|
|
724
862
|
content=content,
|
|
@@ -736,17 +874,19 @@ def create_app(
|
|
|
736
874
|
]
|
|
737
875
|
auto_compaction = await auto_compact_workspace_messages(
|
|
738
876
|
connection=connection,
|
|
877
|
+
context_window_limit=context_window_limit,
|
|
739
878
|
messages=state.messages,
|
|
740
879
|
model_history=model_history,
|
|
741
880
|
source_message_id=None,
|
|
742
881
|
)
|
|
743
882
|
if auto_compaction is not None:
|
|
744
|
-
marker, _ = auto_compaction
|
|
883
|
+
marker, _, _ = auto_compaction
|
|
745
884
|
next_messages = [*state.messages, marker, user_message]
|
|
746
885
|
store.save_messages(next_messages)
|
|
747
886
|
request_messages = request_messages_for_content(state, next_messages, content)
|
|
748
887
|
assistant_id = str(uuid4())
|
|
749
888
|
assistant_output = AssistantOutputBuilder(assistant_id)
|
|
889
|
+
turn_usage_info: TokenUsageInfo | None = None
|
|
750
890
|
|
|
751
891
|
async def review_tool_approval(request: ApprovalReviewRequest):
|
|
752
892
|
return await review_approval_request(
|
|
@@ -798,6 +938,21 @@ def create_app(
|
|
|
798
938
|
assistant_output.append_text(str(event.data.get("content") or ""))
|
|
799
939
|
if event.event == "thinking_delta":
|
|
800
940
|
assistant_output.append_thinking(str(event.data.get("content") or ""))
|
|
941
|
+
if event.event == "usage":
|
|
942
|
+
usage_data = event.data.get("usage")
|
|
943
|
+
if isinstance(usage_data, dict):
|
|
944
|
+
usage_info = update_context_usage_for_response(
|
|
945
|
+
append_token_usage(
|
|
946
|
+
store.read_usage_info(),
|
|
947
|
+
TokenUsage.model_validate(usage_data),
|
|
948
|
+
model_context_window=context_window_limit,
|
|
949
|
+
),
|
|
950
|
+
messages=request_messages,
|
|
951
|
+
output_content=assistant_output.content,
|
|
952
|
+
model_context_window=context_window_limit,
|
|
953
|
+
)
|
|
954
|
+
store.save_usage_info(usage_info)
|
|
955
|
+
turn_usage_info = usage_info
|
|
801
956
|
if event.event == "tool_start":
|
|
802
957
|
tool = event.data.get("tool")
|
|
803
958
|
if isinstance(tool, dict) and isinstance(tool.get("id"), str):
|
|
@@ -813,6 +968,23 @@ def create_app(
|
|
|
813
968
|
assistant_output.set_assistant_id(assistant_id)
|
|
814
969
|
assistant_output.apply_done_message(message)
|
|
815
970
|
|
|
971
|
+
final_usage_info = turn_usage_info
|
|
972
|
+
if final_usage_info is None:
|
|
973
|
+
final_usage_info = update_context_usage_for_response(
|
|
974
|
+
store.read_usage_info(),
|
|
975
|
+
messages=request_messages,
|
|
976
|
+
output_content=assistant_output.content,
|
|
977
|
+
model_context_window=context_window_limit,
|
|
978
|
+
)
|
|
979
|
+
else:
|
|
980
|
+
final_usage_info = update_context_usage_for_response(
|
|
981
|
+
final_usage_info,
|
|
982
|
+
messages=request_messages,
|
|
983
|
+
output_content=assistant_output.content,
|
|
984
|
+
model_context_window=context_window_limit,
|
|
985
|
+
)
|
|
986
|
+
store.save_usage_info(final_usage_info)
|
|
987
|
+
|
|
816
988
|
assistant_message = StoredMessage(
|
|
817
989
|
author="assistant",
|
|
818
990
|
content=assistant_output.content,
|
|
@@ -821,6 +993,7 @@ def create_app(
|
|
|
821
993
|
status="completed",
|
|
822
994
|
thinking=assistant_output.thinking,
|
|
823
995
|
tools=list(assistant_output.tools.values()),
|
|
996
|
+
usage_info=final_usage_info,
|
|
824
997
|
)
|
|
825
998
|
store.save_messages([*next_messages, assistant_message])
|
|
826
999
|
return assistant_message
|
|
@@ -858,7 +1031,7 @@ def create_app(
|
|
|
858
1031
|
|
|
859
1032
|
@app.get("/api/state")
|
|
860
1033
|
async def app_state() -> StoredState:
|
|
861
|
-
state = store.read_state()
|
|
1034
|
+
state = state_with_current_model_context_window(store.read_state())
|
|
862
1035
|
active_run = (
|
|
863
1036
|
workspace_runs.get(active_workspace_run_id)
|
|
864
1037
|
if active_workspace_run_id
|
|
@@ -999,14 +1172,23 @@ def create_app(
|
|
|
999
1172
|
async def save_workspace_messages(
|
|
1000
1173
|
request: WorkspaceMessagesRequest,
|
|
1001
1174
|
) -> WorkspaceMessagesRequest:
|
|
1175
|
+
return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
|
|
1176
|
+
|
|
1177
|
+
@app.post("/api/workspace/clear")
|
|
1178
|
+
async def clear_workspace() -> WorkspaceClearResponse:
|
|
1002
1179
|
nonlocal active_workspace_run_id
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1180
|
+
nonlocal workspace_generation
|
|
1181
|
+
workspace_generation += 1
|
|
1182
|
+
for run in workspace_runs.values():
|
|
1183
|
+
run.is_done = True
|
|
1184
|
+
if run.task is not None and not run.task.done():
|
|
1006
1185
|
run.discard_on_cancel = True
|
|
1007
1186
|
run.task.cancel()
|
|
1008
|
-
|
|
1009
|
-
|
|
1187
|
+
async with run.condition:
|
|
1188
|
+
run.condition.notify_all()
|
|
1189
|
+
active_workspace_run_id = None
|
|
1190
|
+
messages = store.save_messages([])
|
|
1191
|
+
return WorkspaceClearResponse(messages=messages)
|
|
1010
1192
|
|
|
1011
1193
|
async def append_run_event(
|
|
1012
1194
|
run: WorkspaceRun, event: str, data: dict[str, object]
|
|
@@ -1015,15 +1197,42 @@ def create_app(
|
|
|
1015
1197
|
run.events.append((run.latest_event_index + 1, event, data))
|
|
1016
1198
|
run.condition.notify_all()
|
|
1017
1199
|
|
|
1200
|
+
async def append_run_snapshot(run: WorkspaceRun, message: StoredMessage) -> None:
|
|
1201
|
+
if message.author != "assistant":
|
|
1202
|
+
return
|
|
1203
|
+
run.latest_snapshot = message
|
|
1204
|
+
await append_run_event(
|
|
1205
|
+
run,
|
|
1206
|
+
"snapshot",
|
|
1207
|
+
{"message": stream_message_data(message)},
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1018
1210
|
def active_workspace_run() -> WorkspaceRun | None:
|
|
1019
1211
|
if active_workspace_run_id is None:
|
|
1020
1212
|
return None
|
|
1021
|
-
|
|
1213
|
+
run = workspace_runs.get(active_workspace_run_id)
|
|
1214
|
+
if run is None or run.is_done:
|
|
1215
|
+
return None
|
|
1216
|
+
return run
|
|
1217
|
+
|
|
1218
|
+
def has_active_workspace_run() -> bool:
|
|
1219
|
+
return any(
|
|
1220
|
+
not run.is_done and run.task is not None and not run.task.done()
|
|
1221
|
+
for run in workspace_runs.values()
|
|
1222
|
+
)
|
|
1022
1223
|
|
|
1023
1224
|
def create_workspace_run(content: str) -> WorkspaceRun:
|
|
1024
1225
|
nonlocal active_workspace_run_id
|
|
1226
|
+
if has_active_workspace_run():
|
|
1227
|
+
active_run = active_workspace_run()
|
|
1228
|
+
raise HTTPException(
|
|
1229
|
+
status_code=409,
|
|
1230
|
+
detail="Response in progress",
|
|
1231
|
+
headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
|
|
1232
|
+
)
|
|
1025
1233
|
state = store.read_state()
|
|
1026
1234
|
connection = selected_connection(state)
|
|
1235
|
+
context_window_limit = context_window_for_settings(state.settings)
|
|
1027
1236
|
|
|
1028
1237
|
user_message = StoredMessage(
|
|
1029
1238
|
author="user",
|
|
@@ -1032,7 +1241,10 @@ def create_app(
|
|
|
1032
1241
|
)
|
|
1033
1242
|
next_messages = [*state.messages, user_message]
|
|
1034
1243
|
store.save_messages(next_messages)
|
|
1035
|
-
run = WorkspaceRun(
|
|
1244
|
+
run = WorkspaceRun(
|
|
1245
|
+
condition=asyncio.Condition(),
|
|
1246
|
+
generation=workspace_generation,
|
|
1247
|
+
)
|
|
1036
1248
|
workspace_runs[run.id] = run
|
|
1037
1249
|
active_workspace_run_id = run.id
|
|
1038
1250
|
|
|
@@ -1047,8 +1259,13 @@ def create_app(
|
|
|
1047
1259
|
)
|
|
1048
1260
|
assistant_output = AssistantOutputBuilder(assistant_message.id)
|
|
1049
1261
|
|
|
1050
|
-
def
|
|
1262
|
+
def is_current_generation() -> bool:
|
|
1263
|
+
return run.generation == workspace_generation
|
|
1264
|
+
|
|
1265
|
+
def persist_assistant(status: str = "running") -> StoredMessage | None:
|
|
1051
1266
|
nonlocal next_messages, assistant_message
|
|
1267
|
+
if not is_current_generation() or run.discard_on_cancel:
|
|
1268
|
+
return None
|
|
1052
1269
|
assistant_message = StoredMessage(
|
|
1053
1270
|
author="assistant",
|
|
1054
1271
|
content=assistant_output.content,
|
|
@@ -1057,14 +1274,17 @@ def create_app(
|
|
|
1057
1274
|
status=status,
|
|
1058
1275
|
thinking=assistant_output.thinking,
|
|
1059
1276
|
tools=list(assistant_output.tools.values()),
|
|
1277
|
+
usage_info=store.read_usage_info(),
|
|
1060
1278
|
)
|
|
1061
1279
|
next_messages = append_or_replace_message(
|
|
1062
1280
|
next_messages, assistant_message
|
|
1063
1281
|
)
|
|
1064
1282
|
store.upsert_message(assistant_message)
|
|
1283
|
+
return assistant_message
|
|
1065
1284
|
|
|
1066
1285
|
try:
|
|
1067
1286
|
current_tool_id: str | None = None
|
|
1287
|
+
turn_usage_info: TokenUsageInfo | None = None
|
|
1068
1288
|
current_request_messages = request_messages_for_content(
|
|
1069
1289
|
state,
|
|
1070
1290
|
next_messages,
|
|
@@ -1077,6 +1297,7 @@ def create_app(
|
|
|
1077
1297
|
)
|
|
1078
1298
|
auto_compaction = await auto_compact_workspace_messages(
|
|
1079
1299
|
connection=connection,
|
|
1300
|
+
context_window_limit=context_window_limit,
|
|
1080
1301
|
messages=state.messages,
|
|
1081
1302
|
model_history=[
|
|
1082
1303
|
ChatMessage.model_validate(message)
|
|
@@ -1085,13 +1306,16 @@ def create_app(
|
|
|
1085
1306
|
source_message_id=None,
|
|
1086
1307
|
)
|
|
1087
1308
|
if auto_compaction is not None:
|
|
1088
|
-
marker, _ = auto_compaction
|
|
1309
|
+
marker, _, usage_info = auto_compaction
|
|
1089
1310
|
next_messages = [*state.messages, marker, user_message]
|
|
1090
1311
|
store.save_messages(next_messages)
|
|
1091
1312
|
await append_run_event(
|
|
1092
1313
|
run,
|
|
1093
1314
|
"context_optimized",
|
|
1094
|
-
{
|
|
1315
|
+
{
|
|
1316
|
+
"message": marker.model_dump(),
|
|
1317
|
+
**usage_event_data(usage_info),
|
|
1318
|
+
},
|
|
1095
1319
|
)
|
|
1096
1320
|
current_request_messages = request_messages_for_content(
|
|
1097
1321
|
state,
|
|
@@ -1130,6 +1354,8 @@ def create_app(
|
|
|
1130
1354
|
conversation: Sequence[Mapping[str, object]],
|
|
1131
1355
|
) -> AgentContextUpdate | None:
|
|
1132
1356
|
nonlocal next_messages
|
|
1357
|
+
if not is_current_generation() or run.discard_on_cancel:
|
|
1358
|
+
return None
|
|
1133
1359
|
assistant_snapshot = StoredMessage(
|
|
1134
1360
|
author="assistant",
|
|
1135
1361
|
content=assistant_output.content,
|
|
@@ -1138,6 +1364,7 @@ def create_app(
|
|
|
1138
1364
|
status="running",
|
|
1139
1365
|
thinking=assistant_output.thinking,
|
|
1140
1366
|
tools=list(assistant_output.tools.values()),
|
|
1367
|
+
usage_info=store.read_usage_info(),
|
|
1141
1368
|
)
|
|
1142
1369
|
model_history: list[ChatMessage] = []
|
|
1143
1370
|
for message in conversation:
|
|
@@ -1164,13 +1391,17 @@ def create_app(
|
|
|
1164
1391
|
)
|
|
1165
1392
|
auto_result = await auto_compact_workspace_messages(
|
|
1166
1393
|
connection=connection,
|
|
1394
|
+
context_window_limit=context_window_limit,
|
|
1167
1395
|
messages=next_messages,
|
|
1168
1396
|
model_history=model_history,
|
|
1169
1397
|
source_message_id=assistant_snapshot.id,
|
|
1170
1398
|
)
|
|
1171
1399
|
if auto_result is None:
|
|
1172
1400
|
return None
|
|
1173
|
-
marker, replacement_history = auto_result
|
|
1401
|
+
marker, replacement_history, usage_info = auto_result
|
|
1402
|
+
assistant_snapshot = assistant_snapshot.model_copy(
|
|
1403
|
+
update={"usage_info": usage_info}
|
|
1404
|
+
)
|
|
1174
1405
|
next_messages = append_or_replace_message(
|
|
1175
1406
|
[*next_messages, marker], assistant_snapshot
|
|
1176
1407
|
)
|
|
@@ -1181,7 +1412,10 @@ def create_app(
|
|
|
1181
1412
|
]
|
|
1182
1413
|
return AgentContextUpdate(
|
|
1183
1414
|
conversation=compacted_conversation,
|
|
1184
|
-
message=
|
|
1415
|
+
message={
|
|
1416
|
+
**marker.model_dump(),
|
|
1417
|
+
"usage_info": usage_info.model_dump(),
|
|
1418
|
+
},
|
|
1185
1419
|
)
|
|
1186
1420
|
|
|
1187
1421
|
async for event in run_agent_stream(
|
|
@@ -1195,6 +1429,11 @@ def create_app(
|
|
|
1195
1429
|
messages=current_request_messages,
|
|
1196
1430
|
tool_runner=tool_runner,
|
|
1197
1431
|
):
|
|
1432
|
+
if not is_current_generation() or run.discard_on_cancel:
|
|
1433
|
+
raise asyncio.CancelledError
|
|
1434
|
+
run_event_data = event.data
|
|
1435
|
+
should_append_run_event = event.event != "usage"
|
|
1436
|
+
snapshot_after_event: StoredMessage | None = None
|
|
1198
1437
|
if event.event == "start":
|
|
1199
1438
|
event_id = event.data.get("id")
|
|
1200
1439
|
if isinstance(event_id, str):
|
|
@@ -1202,12 +1441,12 @@ def create_app(
|
|
|
1202
1441
|
update={"id": event_id}
|
|
1203
1442
|
)
|
|
1204
1443
|
assistant_output.set_assistant_id(event_id)
|
|
1205
|
-
persist_assistant()
|
|
1444
|
+
snapshot_after_event = persist_assistant()
|
|
1206
1445
|
if event.event == "output_start":
|
|
1207
1446
|
index = event.data.get("index")
|
|
1208
1447
|
if isinstance(index, int):
|
|
1209
1448
|
assistant_output.start_group(index)
|
|
1210
|
-
persist_assistant()
|
|
1449
|
+
snapshot_after_event = persist_assistant()
|
|
1211
1450
|
if event.event == "tool_start":
|
|
1212
1451
|
tool = event.data.get("tool")
|
|
1213
1452
|
if isinstance(tool, dict) and isinstance(tool.get("id"), str):
|
|
@@ -1215,7 +1454,7 @@ def create_app(
|
|
|
1215
1454
|
assistant_output.start_tool(
|
|
1216
1455
|
StoredToolItem.model_validate(tool)
|
|
1217
1456
|
)
|
|
1218
|
-
persist_assistant()
|
|
1457
|
+
snapshot_after_event = persist_assistant()
|
|
1219
1458
|
if event.event in {"tool_done", "tool_error"}:
|
|
1220
1459
|
tool_id = event.data.get("id")
|
|
1221
1460
|
if (
|
|
@@ -1226,17 +1465,35 @@ def create_app(
|
|
|
1226
1465
|
None if current_tool_id == tool_id else current_tool_id
|
|
1227
1466
|
)
|
|
1228
1467
|
assistant_output.update_tool(tool_id, event.data)
|
|
1229
|
-
persist_assistant()
|
|
1468
|
+
snapshot_after_event = persist_assistant()
|
|
1230
1469
|
if event.event == "delta":
|
|
1231
1470
|
assistant_output.append_text(
|
|
1232
1471
|
str(event.data.get("content") or "")
|
|
1233
1472
|
)
|
|
1234
|
-
persist_assistant()
|
|
1473
|
+
snapshot_after_event = persist_assistant()
|
|
1235
1474
|
if event.event == "thinking_delta":
|
|
1236
1475
|
assistant_output.append_thinking(
|
|
1237
1476
|
str(event.data.get("content") or "")
|
|
1238
1477
|
)
|
|
1239
|
-
persist_assistant()
|
|
1478
|
+
snapshot_after_event = persist_assistant()
|
|
1479
|
+
if event.event == "usage":
|
|
1480
|
+
usage_data = event.data.get("usage")
|
|
1481
|
+
if isinstance(usage_data, dict):
|
|
1482
|
+
usage_info = update_context_usage_for_response(
|
|
1483
|
+
append_token_usage(
|
|
1484
|
+
store.read_usage_info(),
|
|
1485
|
+
TokenUsage.model_validate(usage_data),
|
|
1486
|
+
model_context_window=context_window_limit,
|
|
1487
|
+
),
|
|
1488
|
+
messages=current_request_messages,
|
|
1489
|
+
output_content=assistant_output.content,
|
|
1490
|
+
model_context_window=context_window_limit,
|
|
1491
|
+
)
|
|
1492
|
+
store.save_usage_info(usage_info)
|
|
1493
|
+
turn_usage_info = usage_info
|
|
1494
|
+
run_event_data = usage_event_data(usage_info)
|
|
1495
|
+
should_append_run_event = True
|
|
1496
|
+
snapshot_after_event = persist_assistant()
|
|
1240
1497
|
logger.log(
|
|
1241
1498
|
TRACE_LEVEL,
|
|
1242
1499
|
"Workspace stream event=%s data=%r",
|
|
@@ -1247,12 +1504,42 @@ def create_app(
|
|
|
1247
1504
|
message = event.data.get("message")
|
|
1248
1505
|
if isinstance(message, dict):
|
|
1249
1506
|
assistant_output.apply_done_message(message)
|
|
1250
|
-
|
|
1251
|
-
|
|
1507
|
+
response_usage_info = store.read_usage_info()
|
|
1508
|
+
final_usage_info = turn_usage_info
|
|
1509
|
+
if final_usage_info is None:
|
|
1510
|
+
final_usage_info = update_context_usage_for_response(
|
|
1511
|
+
response_usage_info,
|
|
1512
|
+
messages=current_request_messages,
|
|
1513
|
+
output_content=assistant_output.content,
|
|
1514
|
+
model_context_window=context_window_limit,
|
|
1515
|
+
)
|
|
1516
|
+
else:
|
|
1517
|
+
final_usage_info = update_context_usage_for_response(
|
|
1518
|
+
final_usage_info,
|
|
1519
|
+
messages=current_request_messages,
|
|
1520
|
+
output_content=assistant_output.content,
|
|
1521
|
+
model_context_window=context_window_limit,
|
|
1522
|
+
)
|
|
1523
|
+
store.save_usage_info(final_usage_info)
|
|
1524
|
+
snapshot_after_event = persist_assistant("completed")
|
|
1525
|
+
if snapshot_after_event is not None:
|
|
1526
|
+
run_event_data = {
|
|
1527
|
+
"message": stream_message_data(snapshot_after_event)
|
|
1528
|
+
}
|
|
1529
|
+
if event.event == "done" and snapshot_after_event is not None:
|
|
1530
|
+
await append_run_snapshot(run, snapshot_after_event)
|
|
1531
|
+
await append_run_event(run, event.event, run_event_data)
|
|
1532
|
+
else:
|
|
1533
|
+
if should_append_run_event:
|
|
1534
|
+
await append_run_event(run, event.event, run_event_data)
|
|
1535
|
+
if snapshot_after_event is not None:
|
|
1536
|
+
await append_run_snapshot(run, snapshot_after_event)
|
|
1252
1537
|
except asyncio.CancelledError:
|
|
1253
1538
|
logger.info("Workspace run stopped")
|
|
1254
1539
|
if not run.discard_on_cancel:
|
|
1255
|
-
persist_assistant("interrupted")
|
|
1540
|
+
interrupted_snapshot = persist_assistant("interrupted")
|
|
1541
|
+
if interrupted_snapshot is not None:
|
|
1542
|
+
await append_run_snapshot(run, interrupted_snapshot)
|
|
1256
1543
|
await append_run_event(
|
|
1257
1544
|
run,
|
|
1258
1545
|
"error",
|
|
@@ -1276,7 +1563,9 @@ def create_app(
|
|
|
1276
1563
|
str(error) or EMPTY_MODEL_RESPONSE_DETAIL,
|
|
1277
1564
|
)
|
|
1278
1565
|
)
|
|
1279
|
-
persist_assistant("failed")
|
|
1566
|
+
failed_snapshot = persist_assistant("failed")
|
|
1567
|
+
if failed_snapshot is not None:
|
|
1568
|
+
await append_run_snapshot(run, failed_snapshot)
|
|
1280
1569
|
await append_run_event(run, "error", run_error_event_data(error_item))
|
|
1281
1570
|
finally:
|
|
1282
1571
|
run.is_done = True
|
|
@@ -1289,9 +1578,16 @@ def create_app(
|
|
|
1289
1578
|
return run
|
|
1290
1579
|
|
|
1291
1580
|
async def workspace_run_stream(
|
|
1292
|
-
run: WorkspaceRun, after: int = 0
|
|
1581
|
+
run: WorkspaceRun, after: int = 0, include_snapshots: bool = True
|
|
1293
1582
|
) -> AsyncIterator[str]:
|
|
1294
1583
|
next_event_index = after + 1
|
|
1584
|
+
reconnect_snapshot = run_snapshot_data_at(run, after) if after > 0 else None
|
|
1585
|
+
if include_snapshots and reconnect_snapshot is not None:
|
|
1586
|
+
yield stream_event(
|
|
1587
|
+
"snapshot",
|
|
1588
|
+
{"message": reconnect_snapshot},
|
|
1589
|
+
event_id=after,
|
|
1590
|
+
)
|
|
1295
1591
|
while True:
|
|
1296
1592
|
async with run.condition:
|
|
1297
1593
|
|
|
@@ -1305,7 +1601,9 @@ def create_app(
|
|
|
1305
1601
|
|
|
1306
1602
|
for index, event, data in events:
|
|
1307
1603
|
next_event_index = index + 1
|
|
1308
|
-
|
|
1604
|
+
if event == "snapshot" and not include_snapshots:
|
|
1605
|
+
continue
|
|
1606
|
+
yield stream_event(event, data, event_id=index)
|
|
1309
1607
|
if event in {"done", "error"}:
|
|
1310
1608
|
return
|
|
1311
1609
|
|
|
@@ -1343,47 +1641,105 @@ def create_app(
|
|
|
1343
1641
|
run.task.cancel()
|
|
1344
1642
|
return {"ok": True}
|
|
1345
1643
|
|
|
1346
|
-
@app.post("/api/workspace/compact")
|
|
1347
|
-
async def compact_workspace() ->
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1644
|
+
@app.post("/api/workspace/compact", response_class=StreamingResponse)
|
|
1645
|
+
async def compact_workspace() -> StreamingResponse:
|
|
1646
|
+
nonlocal active_compact_task
|
|
1647
|
+
|
|
1648
|
+
async def run_manual_compact(
|
|
1649
|
+
*,
|
|
1650
|
+
checkpoint: StoredCompactionCheckpoint | None,
|
|
1651
|
+
connection: ProviderConnection,
|
|
1652
|
+
context_window_limit: int,
|
|
1653
|
+
state: StoredState,
|
|
1654
|
+
) -> tuple[StoredMessage, TokenUsageInfo]:
|
|
1655
|
+
logger.info("Workspace compact requested")
|
|
1656
|
+
try:
|
|
1657
|
+
model_history = [
|
|
1658
|
+
*runtime_context_messages(cwd, state.settings.agent_prompt),
|
|
1659
|
+
*workspace_chat_messages(
|
|
1660
|
+
state.messages,
|
|
1661
|
+
store.read_compacted_context(),
|
|
1662
|
+
checkpoint,
|
|
1663
|
+
),
|
|
1664
|
+
]
|
|
1665
|
+
|
|
1666
|
+
marker, _, usage_info = await save_context_checkpoint(
|
|
1667
|
+
connection=connection,
|
|
1668
|
+
context_window_limit=context_window_limit,
|
|
1669
|
+
marker_content=COMPACTED_CONTEXT_MARKER,
|
|
1670
|
+
messages=state.messages,
|
|
1671
|
+
model_history=model_history,
|
|
1672
|
+
source_message_id=None,
|
|
1673
|
+
trigger="manual",
|
|
1674
|
+
)
|
|
1675
|
+
store.save_messages([*state.messages, marker])
|
|
1676
|
+
logger.info("Workspace compact completed")
|
|
1677
|
+
return marker, usage_info
|
|
1678
|
+
except Exception:
|
|
1679
|
+
logger.exception("Workspace compact failed")
|
|
1680
|
+
raise
|
|
1681
|
+
finally:
|
|
1682
|
+
store.save_is_compacting(False)
|
|
1683
|
+
|
|
1684
|
+
def clear_active_compact_task(
|
|
1685
|
+
task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]],
|
|
1686
|
+
) -> None:
|
|
1687
|
+
nonlocal active_compact_task
|
|
1688
|
+
if active_compact_task is not None and active_compact_task.task is task:
|
|
1689
|
+
active_compact_task = None
|
|
1690
|
+
with suppress(asyncio.CancelledError):
|
|
1691
|
+
task.exception()
|
|
1692
|
+
|
|
1693
|
+
if active_compact_task is not None:
|
|
1694
|
+
if not active_compact_task.task.done():
|
|
1695
|
+
compact_task = active_compact_task.task
|
|
1696
|
+
else:
|
|
1697
|
+
active_compact_task = None
|
|
1698
|
+
|
|
1699
|
+
if active_compact_task is None:
|
|
1700
|
+
if active_workspace_run() is not None:
|
|
1701
|
+
raise HTTPException(
|
|
1702
|
+
status_code=409,
|
|
1703
|
+
detail="Compact is unavailable while Flowent is responding.",
|
|
1704
|
+
)
|
|
1705
|
+
state = store.read_state()
|
|
1706
|
+
connection = selected_connection(state)
|
|
1707
|
+
context_window_limit = context_window_for_settings(state.settings)
|
|
1708
|
+
checkpoint = store.read_active_compaction_checkpoint()
|
|
1709
|
+
store.save_is_compacting(True)
|
|
1710
|
+
compact_task = asyncio.create_task(
|
|
1711
|
+
run_manual_compact(
|
|
1712
|
+
checkpoint=checkpoint,
|
|
1713
|
+
connection=connection,
|
|
1714
|
+
context_window_limit=context_window_limit,
|
|
1715
|
+
state=state,
|
|
1716
|
+
)
|
|
1352
1717
|
)
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
connection = selected_connection(state)
|
|
1356
|
-
checkpoint = store.read_active_compaction_checkpoint()
|
|
1357
|
-
model_history = [
|
|
1358
|
-
*runtime_context_messages(cwd, state.settings.agent_prompt),
|
|
1359
|
-
*workspace_chat_messages(
|
|
1360
|
-
state.messages,
|
|
1361
|
-
store.read_compacted_context(),
|
|
1362
|
-
checkpoint,
|
|
1363
|
-
),
|
|
1364
|
-
]
|
|
1718
|
+
compact_task.add_done_callback(clear_active_compact_task)
|
|
1719
|
+
active_compact_task = WorkspaceCompactTask(task=compact_task)
|
|
1365
1720
|
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1721
|
+
async def compact_workspace_stream() -> AsyncIterator[str]:
|
|
1722
|
+
try:
|
|
1723
|
+
marker, usage_info = await asyncio.shield(compact_task)
|
|
1724
|
+
except Exception:
|
|
1725
|
+
yield stream_event(
|
|
1726
|
+
"error",
|
|
1727
|
+
{"message": "Context could not be compacted."},
|
|
1728
|
+
)
|
|
1729
|
+
return
|
|
1730
|
+
|
|
1731
|
+
marker_data = marker.model_dump()
|
|
1732
|
+
yield stream_event("usage", usage_event_data(usage_info))
|
|
1733
|
+
yield stream_event(
|
|
1734
|
+
"context_optimized",
|
|
1735
|
+
{"message": marker_data, **usage_event_data(usage_info)},
|
|
1374
1736
|
)
|
|
1375
|
-
|
|
1376
|
-
raise
|
|
1377
|
-
except Exception as error:
|
|
1378
|
-
logger.exception("Workspace compact failed")
|
|
1379
|
-
raise HTTPException(
|
|
1380
|
-
status_code=500,
|
|
1381
|
-
detail="Context could not be compacted.",
|
|
1382
|
-
) from error
|
|
1737
|
+
yield stream_event("done", {"message": marker_data})
|
|
1383
1738
|
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1739
|
+
return StreamingResponse(
|
|
1740
|
+
compact_workspace_stream(),
|
|
1741
|
+
media_type="text/event-stream",
|
|
1742
|
+
)
|
|
1387
1743
|
|
|
1388
1744
|
@app.post("/api/workspace/respond")
|
|
1389
1745
|
async def respond_to_workspace(
|
|
@@ -1395,7 +1751,7 @@ def create_app(
|
|
|
1395
1751
|
logger.log(TRACE_LEVEL, "Workspace user content=%r", request.content)
|
|
1396
1752
|
run = create_workspace_run(request.content)
|
|
1397
1753
|
return StreamingResponse(
|
|
1398
|
-
workspace_run_stream(run),
|
|
1754
|
+
workspace_run_stream(run, include_snapshots=False),
|
|
1399
1755
|
media_type="text/event-stream",
|
|
1400
1756
|
)
|
|
1401
1757
|
|