flowent 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/pyproject.toml +1 -1
- package/backend/src/flowent/approval.py +6 -4
- package/backend/src/flowent/main.py +230 -75
- package/backend/src/flowent/static/assets/index-Bz76A4EJ.js +82 -0
- package/backend/src/flowent/static/assets/index-DufpDl8x.css +2 -0
- package/backend/src/flowent/static/index.html +2 -2
- package/backend/src/flowent/storage.py +16 -4
- package/backend/uv.lock +1 -1
- package/dist/frontend/assets/index-Bz76A4EJ.js +82 -0
- package/dist/frontend/assets/index-DufpDl8x.css +2 -0
- package/dist/frontend/index.html +2 -2
- package/package.json +1 -1
- package/backend/src/flowent/static/assets/index-CRSV2xu1.css +0 -2
- package/backend/src/flowent/static/assets/index-DUYj6rgD.js +0 -82
- package/dist/frontend/assets/index-CRSV2xu1.css +0 -2
- package/dist/frontend/assets/index-DUYj6rgD.js +0 -82
package/backend/pyproject.toml
CHANGED
|
@@ -12,7 +12,7 @@ from flowent.llm import (
|
|
|
12
12
|
ChatMessage,
|
|
13
13
|
CompletionCallable,
|
|
14
14
|
ProviderConnection,
|
|
15
|
-
|
|
15
|
+
stream_chat,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger("flowent.approval")
|
|
@@ -128,7 +128,8 @@ async def review_approval_request(
|
|
|
128
128
|
completion: CompletionCallable | None = None,
|
|
129
129
|
) -> ApprovalReviewDecision:
|
|
130
130
|
try:
|
|
131
|
-
|
|
131
|
+
content = ""
|
|
132
|
+
async for delta in stream_chat(
|
|
132
133
|
connection,
|
|
133
134
|
[
|
|
134
135
|
ChatMessage(role="system", content=APPROVAL_REVIEWER_PROMPT),
|
|
@@ -138,8 +139,9 @@ async def review_approval_request(
|
|
|
138
139
|
),
|
|
139
140
|
],
|
|
140
141
|
completion=completion,
|
|
141
|
-
)
|
|
142
|
-
|
|
142
|
+
):
|
|
143
|
+
content += delta
|
|
144
|
+
return parse_review_decision(content)
|
|
143
145
|
except Exception as error:
|
|
144
146
|
logger.warning("Approval reviewer denied request after failure: %s", error)
|
|
145
147
|
return ApprovalReviewDecision(
|
|
@@ -85,7 +85,7 @@ logger = logging.getLogger("flowent.main")
|
|
|
85
85
|
DEFAULT_STATIC_DIR = Path(__file__).parent / "static"
|
|
86
86
|
COMPACTED_CONTEXT_MARKER = "Context compacted"
|
|
87
87
|
OPTIMIZED_CONTEXT_MARKER = "Context optimized"
|
|
88
|
-
|
|
88
|
+
DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO = 0.95
|
|
89
89
|
AUTO_COMPACT_RETAINED_MESSAGE_TOKEN_BUDGET = 20_000
|
|
90
90
|
APPROVAL_TRANSCRIPT_MESSAGE_LIMIT = 12
|
|
91
91
|
APPROVAL_TRANSCRIPT_TEXT_LIMIT = 2_000
|
|
@@ -121,6 +121,14 @@ class WorkspaceRunResponse(BaseModel):
|
|
|
121
121
|
run_id: str
|
|
122
122
|
|
|
123
123
|
|
|
124
|
+
class WorkspaceClearResponse(BaseModel):
|
|
125
|
+
model_config = ConfigDict(extra="forbid")
|
|
126
|
+
|
|
127
|
+
active_run_id: str | None = None
|
|
128
|
+
messages: list[StoredMessage]
|
|
129
|
+
usage_info: TokenUsageInfo | None = None
|
|
130
|
+
|
|
131
|
+
|
|
124
132
|
@dataclass
|
|
125
133
|
class WorkspaceCompactTask:
|
|
126
134
|
task: asyncio.Task[tuple[StoredMessage, TokenUsageInfo]]
|
|
@@ -174,8 +182,10 @@ class WorkspaceRun:
|
|
|
174
182
|
condition: asyncio.Condition
|
|
175
183
|
discard_on_cancel: bool = False
|
|
176
184
|
events: list[tuple[int, str, dict[str, object]]] = field(default_factory=list)
|
|
185
|
+
generation: int = 0
|
|
177
186
|
id: str = field(default_factory=lambda: str(uuid4()))
|
|
178
187
|
is_done: bool = False
|
|
188
|
+
latest_snapshot: StoredMessage | None = None
|
|
179
189
|
task: asyncio.Task[None] | None = None
|
|
180
190
|
|
|
181
191
|
@property
|
|
@@ -183,8 +193,15 @@ class WorkspaceRun:
|
|
|
183
193
|
return self.events[-1][0] if self.events else 0
|
|
184
194
|
|
|
185
195
|
|
|
186
|
-
def stream_event(
|
|
187
|
-
|
|
196
|
+
def stream_event(
|
|
197
|
+
event: str, data: dict[str, object], event_id: int | None = None
|
|
198
|
+
) -> str:
|
|
199
|
+
id_line = f"id: {event_id}\n" if event_id is not None else ""
|
|
200
|
+
return f"{id_line}event: {event}\ndata: {json.dumps(data)}\n\n"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def stream_message_data(message: StoredMessage) -> dict[str, object]:
|
|
204
|
+
return {**message.model_dump(), "status": message.status}
|
|
188
205
|
|
|
189
206
|
|
|
190
207
|
def append_or_replace_message(
|
|
@@ -196,6 +213,18 @@ def append_or_replace_message(
|
|
|
196
213
|
]
|
|
197
214
|
|
|
198
215
|
|
|
216
|
+
def run_snapshot_data_at(
|
|
217
|
+
run: WorkspaceRun, event_index: int
|
|
218
|
+
) -> dict[str, object] | None:
|
|
219
|
+
for current_event_index, event, data in reversed(run.events):
|
|
220
|
+
if current_event_index > event_index or event != "snapshot":
|
|
221
|
+
continue
|
|
222
|
+
message = data.get("message")
|
|
223
|
+
if isinstance(message, dict):
|
|
224
|
+
return message
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
|
|
199
228
|
USER_VISIBLE_RUN_ERROR_TITLE = "Request failed"
|
|
200
229
|
USER_VISIBLE_RUN_ERROR_MESSAGE = "Check the model connection settings and try again."
|
|
201
230
|
USER_VISIBLE_CONTEXT_OPTIMIZATION_ERROR_MESSAGE = "Context could not be optimized."
|
|
@@ -511,16 +540,22 @@ def is_context_marker(message: StoredMessage) -> bool:
|
|
|
511
540
|
return message.content in {COMPACTED_CONTEXT_MARKER, OPTIMIZED_CONTEXT_MARKER}
|
|
512
541
|
|
|
513
542
|
|
|
514
|
-
def auto_compact_token_limit() -> int:
|
|
543
|
+
def auto_compact_token_limit(context_window: int) -> int:
|
|
515
544
|
raw_limit = os.environ.get("FLOWENT_AUTO_COMPACT_TOKEN_LIMIT", "")
|
|
545
|
+
if not raw_limit:
|
|
546
|
+
return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
|
|
516
547
|
try:
|
|
517
548
|
return max(0, int(raw_limit))
|
|
518
549
|
except ValueError:
|
|
519
|
-
return
|
|
550
|
+
return max(0, int(context_window * DEFAULT_AUTO_COMPACT_CONTEXT_WINDOW_RATIO))
|
|
520
551
|
|
|
521
552
|
|
|
522
|
-
def should_auto_compact(
|
|
523
|
-
|
|
553
|
+
def should_auto_compact(
|
|
554
|
+
messages: list[ChatMessage],
|
|
555
|
+
*,
|
|
556
|
+
context_window: int,
|
|
557
|
+
) -> bool:
|
|
558
|
+
token_limit = auto_compact_token_limit(context_window)
|
|
524
559
|
if token_limit <= 0:
|
|
525
560
|
return False
|
|
526
561
|
return (
|
|
@@ -543,19 +578,40 @@ def usage_event_data(usage_info: TokenUsageInfo) -> dict[str, object]:
|
|
|
543
578
|
return {"usage_info": usage_info.model_dump()}
|
|
544
579
|
|
|
545
580
|
|
|
581
|
+
def update_context_usage_for_response(
|
|
582
|
+
usage_info: TokenUsageInfo | None,
|
|
583
|
+
*,
|
|
584
|
+
messages: Sequence[Mapping[str, object]],
|
|
585
|
+
output_content: str,
|
|
586
|
+
model_context_window: int,
|
|
587
|
+
) -> TokenUsageInfo:
|
|
588
|
+
return recompute_context_usage(
|
|
589
|
+
usage_info,
|
|
590
|
+
estimated_token_usage_for_messages(
|
|
591
|
+
model_visible_messages_for_usage(messages),
|
|
592
|
+
output_content=output_content,
|
|
593
|
+
).total_tokens,
|
|
594
|
+
model_context_window=model_context_window,
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
|
|
546
598
|
def usage_info_for_model(
|
|
547
599
|
usage_info: TokenUsageInfo | None,
|
|
548
|
-
|
|
600
|
+
model_context_window: int,
|
|
549
601
|
) -> TokenUsageInfo | None:
|
|
550
602
|
if usage_info is None:
|
|
551
603
|
return None
|
|
552
|
-
return usage_info.model_copy(
|
|
553
|
-
|
|
554
|
-
|
|
604
|
+
return usage_info.model_copy(update={"model_context_window": model_context_window})
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def context_window_for_settings(settings: StoredSettings) -> int:
|
|
608
|
+
if settings.context_window_limit is not None:
|
|
609
|
+
return settings.context_window_limit
|
|
610
|
+
return current_model_context_window(settings.selected_model)
|
|
555
611
|
|
|
556
612
|
|
|
557
613
|
def state_with_current_model_context_window(state: StoredState) -> StoredState:
|
|
558
|
-
|
|
614
|
+
model_context_window = context_window_for_settings(state.settings)
|
|
559
615
|
return state.model_copy(
|
|
560
616
|
update={
|
|
561
617
|
"messages": [
|
|
@@ -563,7 +619,7 @@ def state_with_current_model_context_window(state: StoredState) -> StoredState:
|
|
|
563
619
|
update={
|
|
564
620
|
"usage_info": usage_info_for_model(
|
|
565
621
|
message.usage_info,
|
|
566
|
-
|
|
622
|
+
model_context_window,
|
|
567
623
|
)
|
|
568
624
|
}
|
|
569
625
|
)
|
|
@@ -571,7 +627,10 @@ def state_with_current_model_context_window(state: StoredState) -> StoredState:
|
|
|
571
627
|
else message
|
|
572
628
|
for message in state.messages
|
|
573
629
|
],
|
|
574
|
-
"usage_info": usage_info_for_model(
|
|
630
|
+
"usage_info": usage_info_for_model(
|
|
631
|
+
state.usage_info,
|
|
632
|
+
model_context_window,
|
|
633
|
+
),
|
|
575
634
|
}
|
|
576
635
|
)
|
|
577
636
|
|
|
@@ -671,6 +730,7 @@ def create_app(
|
|
|
671
730
|
telegram_bot_manager: TelegramBotManager | None = None
|
|
672
731
|
workspace_runs: dict[str, WorkspaceRun] = {}
|
|
673
732
|
active_workspace_run_id: str | None = None
|
|
733
|
+
workspace_generation = 0
|
|
674
734
|
active_compact_task: WorkspaceCompactTask | None = None
|
|
675
735
|
|
|
676
736
|
static_dir = frontend_static_directory().resolve(strict=False)
|
|
@@ -702,6 +762,7 @@ def create_app(
|
|
|
702
762
|
async def save_context_checkpoint(
|
|
703
763
|
*,
|
|
704
764
|
connection: ProviderConnection,
|
|
765
|
+
context_window_limit: int,
|
|
705
766
|
messages: list[StoredMessage],
|
|
706
767
|
model_history: list[ChatMessage],
|
|
707
768
|
marker_content: str,
|
|
@@ -723,12 +784,12 @@ def create_app(
|
|
|
723
784
|
usage_info = append_token_usage(
|
|
724
785
|
usage_info,
|
|
725
786
|
compact_result.summary_usage,
|
|
726
|
-
model_context_window=
|
|
787
|
+
model_context_window=context_window_limit,
|
|
727
788
|
)
|
|
728
789
|
usage_info = recompute_context_usage(
|
|
729
790
|
usage_info,
|
|
730
791
|
compact_result.token_after,
|
|
731
|
-
model_context_window=
|
|
792
|
+
model_context_window=context_window_limit,
|
|
732
793
|
)
|
|
733
794
|
store.save_usage_info(usage_info)
|
|
734
795
|
marker = StoredMessage(
|
|
@@ -767,16 +828,21 @@ def create_app(
|
|
|
767
828
|
async def auto_compact_workspace_messages(
|
|
768
829
|
*,
|
|
769
830
|
connection: ProviderConnection,
|
|
831
|
+
context_window_limit: int,
|
|
770
832
|
messages: list[StoredMessage],
|
|
771
833
|
model_history: list[ChatMessage],
|
|
772
834
|
source_message_id: str | None = None,
|
|
773
835
|
) -> tuple[StoredMessage, list[dict[str, object]], TokenUsageInfo] | None:
|
|
774
|
-
if not should_auto_compact(
|
|
836
|
+
if not should_auto_compact(
|
|
837
|
+
model_history,
|
|
838
|
+
context_window=context_window_limit,
|
|
839
|
+
):
|
|
775
840
|
return None
|
|
776
841
|
logger.info("Workspace auto compact requested")
|
|
777
842
|
try:
|
|
778
843
|
return await save_context_checkpoint(
|
|
779
844
|
connection=connection,
|
|
845
|
+
context_window_limit=context_window_limit,
|
|
780
846
|
marker_content=OPTIMIZED_CONTEXT_MARKER,
|
|
781
847
|
messages=messages,
|
|
782
848
|
model_history=model_history,
|
|
@@ -790,6 +856,7 @@ def create_app(
|
|
|
790
856
|
async def run_workspace_turn(content: str) -> StoredMessage:
|
|
791
857
|
state = store.read_state()
|
|
792
858
|
connection = selected_connection(state)
|
|
859
|
+
context_window_limit = context_window_for_settings(state.settings)
|
|
793
860
|
user_message = StoredMessage(
|
|
794
861
|
author="user",
|
|
795
862
|
content=content,
|
|
@@ -807,6 +874,7 @@ def create_app(
|
|
|
807
874
|
]
|
|
808
875
|
auto_compaction = await auto_compact_workspace_messages(
|
|
809
876
|
connection=connection,
|
|
877
|
+
context_window_limit=context_window_limit,
|
|
810
878
|
messages=state.messages,
|
|
811
879
|
model_history=model_history,
|
|
812
880
|
source_message_id=None,
|
|
@@ -873,12 +941,15 @@ def create_app(
|
|
|
873
941
|
if event.event == "usage":
|
|
874
942
|
usage_data = event.data.get("usage")
|
|
875
943
|
if isinstance(usage_data, dict):
|
|
876
|
-
usage_info =
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
944
|
+
usage_info = update_context_usage_for_response(
|
|
945
|
+
append_token_usage(
|
|
946
|
+
store.read_usage_info(),
|
|
947
|
+
TokenUsage.model_validate(usage_data),
|
|
948
|
+
model_context_window=context_window_limit,
|
|
881
949
|
),
|
|
950
|
+
messages=request_messages,
|
|
951
|
+
output_content=assistant_output.content,
|
|
952
|
+
model_context_window=context_window_limit,
|
|
882
953
|
)
|
|
883
954
|
store.save_usage_info(usage_info)
|
|
884
955
|
turn_usage_info = usage_info
|
|
@@ -899,15 +970,20 @@ def create_app(
|
|
|
899
970
|
|
|
900
971
|
final_usage_info = turn_usage_info
|
|
901
972
|
if final_usage_info is None:
|
|
902
|
-
final_usage_info =
|
|
973
|
+
final_usage_info = update_context_usage_for_response(
|
|
903
974
|
store.read_usage_info(),
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
).total_tokens,
|
|
908
|
-
model_context_window=current_model_context_window(connection.model),
|
|
975
|
+
messages=request_messages,
|
|
976
|
+
output_content=assistant_output.content,
|
|
977
|
+
model_context_window=context_window_limit,
|
|
909
978
|
)
|
|
910
|
-
|
|
979
|
+
else:
|
|
980
|
+
final_usage_info = update_context_usage_for_response(
|
|
981
|
+
final_usage_info,
|
|
982
|
+
messages=request_messages,
|
|
983
|
+
output_content=assistant_output.content,
|
|
984
|
+
model_context_window=context_window_limit,
|
|
985
|
+
)
|
|
986
|
+
store.save_usage_info(final_usage_info)
|
|
911
987
|
|
|
912
988
|
assistant_message = StoredMessage(
|
|
913
989
|
author="assistant",
|
|
@@ -1096,14 +1172,23 @@ def create_app(
|
|
|
1096
1172
|
async def save_workspace_messages(
|
|
1097
1173
|
request: WorkspaceMessagesRequest,
|
|
1098
1174
|
) -> WorkspaceMessagesRequest:
|
|
1175
|
+
return WorkspaceMessagesRequest(messages=store.save_messages(request.messages))
|
|
1176
|
+
|
|
1177
|
+
@app.post("/api/workspace/clear")
|
|
1178
|
+
async def clear_workspace() -> WorkspaceClearResponse:
|
|
1099
1179
|
nonlocal active_workspace_run_id
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1180
|
+
nonlocal workspace_generation
|
|
1181
|
+
workspace_generation += 1
|
|
1182
|
+
for run in workspace_runs.values():
|
|
1183
|
+
run.is_done = True
|
|
1184
|
+
if run.task is not None and not run.task.done():
|
|
1103
1185
|
run.discard_on_cancel = True
|
|
1104
1186
|
run.task.cancel()
|
|
1105
|
-
|
|
1106
|
-
|
|
1187
|
+
async with run.condition:
|
|
1188
|
+
run.condition.notify_all()
|
|
1189
|
+
active_workspace_run_id = None
|
|
1190
|
+
messages = store.save_messages([])
|
|
1191
|
+
return WorkspaceClearResponse(messages=messages)
|
|
1107
1192
|
|
|
1108
1193
|
async def append_run_event(
|
|
1109
1194
|
run: WorkspaceRun, event: str, data: dict[str, object]
|
|
@@ -1112,15 +1197,42 @@ def create_app(
|
|
|
1112
1197
|
run.events.append((run.latest_event_index + 1, event, data))
|
|
1113
1198
|
run.condition.notify_all()
|
|
1114
1199
|
|
|
1200
|
+
async def append_run_snapshot(run: WorkspaceRun, message: StoredMessage) -> None:
|
|
1201
|
+
if message.author != "assistant":
|
|
1202
|
+
return
|
|
1203
|
+
run.latest_snapshot = message
|
|
1204
|
+
await append_run_event(
|
|
1205
|
+
run,
|
|
1206
|
+
"snapshot",
|
|
1207
|
+
{"message": stream_message_data(message)},
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1115
1210
|
def active_workspace_run() -> WorkspaceRun | None:
|
|
1116
1211
|
if active_workspace_run_id is None:
|
|
1117
1212
|
return None
|
|
1118
|
-
|
|
1213
|
+
run = workspace_runs.get(active_workspace_run_id)
|
|
1214
|
+
if run is None or run.is_done:
|
|
1215
|
+
return None
|
|
1216
|
+
return run
|
|
1217
|
+
|
|
1218
|
+
def has_active_workspace_run() -> bool:
|
|
1219
|
+
return any(
|
|
1220
|
+
not run.is_done and run.task is not None and not run.task.done()
|
|
1221
|
+
for run in workspace_runs.values()
|
|
1222
|
+
)
|
|
1119
1223
|
|
|
1120
1224
|
def create_workspace_run(content: str) -> WorkspaceRun:
|
|
1121
1225
|
nonlocal active_workspace_run_id
|
|
1226
|
+
if has_active_workspace_run():
|
|
1227
|
+
active_run = active_workspace_run()
|
|
1228
|
+
raise HTTPException(
|
|
1229
|
+
status_code=409,
|
|
1230
|
+
detail="Response in progress",
|
|
1231
|
+
headers={"X-Flowent-Run-Id": active_run.id if active_run else ""},
|
|
1232
|
+
)
|
|
1122
1233
|
state = store.read_state()
|
|
1123
1234
|
connection = selected_connection(state)
|
|
1235
|
+
context_window_limit = context_window_for_settings(state.settings)
|
|
1124
1236
|
|
|
1125
1237
|
user_message = StoredMessage(
|
|
1126
1238
|
author="user",
|
|
@@ -1129,7 +1241,10 @@ def create_app(
|
|
|
1129
1241
|
)
|
|
1130
1242
|
next_messages = [*state.messages, user_message]
|
|
1131
1243
|
store.save_messages(next_messages)
|
|
1132
|
-
run = WorkspaceRun(
|
|
1244
|
+
run = WorkspaceRun(
|
|
1245
|
+
condition=asyncio.Condition(),
|
|
1246
|
+
generation=workspace_generation,
|
|
1247
|
+
)
|
|
1133
1248
|
workspace_runs[run.id] = run
|
|
1134
1249
|
active_workspace_run_id = run.id
|
|
1135
1250
|
|
|
@@ -1144,8 +1259,13 @@ def create_app(
|
|
|
1144
1259
|
)
|
|
1145
1260
|
assistant_output = AssistantOutputBuilder(assistant_message.id)
|
|
1146
1261
|
|
|
1147
|
-
def
|
|
1262
|
+
def is_current_generation() -> bool:
|
|
1263
|
+
return run.generation == workspace_generation
|
|
1264
|
+
|
|
1265
|
+
def persist_assistant(status: str = "running") -> StoredMessage | None:
|
|
1148
1266
|
nonlocal next_messages, assistant_message
|
|
1267
|
+
if not is_current_generation() or run.discard_on_cancel:
|
|
1268
|
+
return None
|
|
1149
1269
|
assistant_message = StoredMessage(
|
|
1150
1270
|
author="assistant",
|
|
1151
1271
|
content=assistant_output.content,
|
|
@@ -1160,6 +1280,7 @@ def create_app(
|
|
|
1160
1280
|
next_messages, assistant_message
|
|
1161
1281
|
)
|
|
1162
1282
|
store.upsert_message(assistant_message)
|
|
1283
|
+
return assistant_message
|
|
1163
1284
|
|
|
1164
1285
|
try:
|
|
1165
1286
|
current_tool_id: str | None = None
|
|
@@ -1176,6 +1297,7 @@ def create_app(
|
|
|
1176
1297
|
)
|
|
1177
1298
|
auto_compaction = await auto_compact_workspace_messages(
|
|
1178
1299
|
connection=connection,
|
|
1300
|
+
context_window_limit=context_window_limit,
|
|
1179
1301
|
messages=state.messages,
|
|
1180
1302
|
model_history=[
|
|
1181
1303
|
ChatMessage.model_validate(message)
|
|
@@ -1232,6 +1354,8 @@ def create_app(
|
|
|
1232
1354
|
conversation: Sequence[Mapping[str, object]],
|
|
1233
1355
|
) -> AgentContextUpdate | None:
|
|
1234
1356
|
nonlocal next_messages
|
|
1357
|
+
if not is_current_generation() or run.discard_on_cancel:
|
|
1358
|
+
return None
|
|
1235
1359
|
assistant_snapshot = StoredMessage(
|
|
1236
1360
|
author="assistant",
|
|
1237
1361
|
content=assistant_output.content,
|
|
@@ -1267,6 +1391,7 @@ def create_app(
|
|
|
1267
1391
|
)
|
|
1268
1392
|
auto_result = await auto_compact_workspace_messages(
|
|
1269
1393
|
connection=connection,
|
|
1394
|
+
context_window_limit=context_window_limit,
|
|
1270
1395
|
messages=next_messages,
|
|
1271
1396
|
model_history=model_history,
|
|
1272
1397
|
source_message_id=assistant_snapshot.id,
|
|
@@ -1304,6 +1429,11 @@ def create_app(
|
|
|
1304
1429
|
messages=current_request_messages,
|
|
1305
1430
|
tool_runner=tool_runner,
|
|
1306
1431
|
):
|
|
1432
|
+
if not is_current_generation() or run.discard_on_cancel:
|
|
1433
|
+
raise asyncio.CancelledError
|
|
1434
|
+
run_event_data = event.data
|
|
1435
|
+
should_append_run_event = event.event != "usage"
|
|
1436
|
+
snapshot_after_event: StoredMessage | None = None
|
|
1307
1437
|
if event.event == "start":
|
|
1308
1438
|
event_id = event.data.get("id")
|
|
1309
1439
|
if isinstance(event_id, str):
|
|
@@ -1311,12 +1441,12 @@ def create_app(
|
|
|
1311
1441
|
update={"id": event_id}
|
|
1312
1442
|
)
|
|
1313
1443
|
assistant_output.set_assistant_id(event_id)
|
|
1314
|
-
persist_assistant()
|
|
1444
|
+
snapshot_after_event = persist_assistant()
|
|
1315
1445
|
if event.event == "output_start":
|
|
1316
1446
|
index = event.data.get("index")
|
|
1317
1447
|
if isinstance(index, int):
|
|
1318
1448
|
assistant_output.start_group(index)
|
|
1319
|
-
persist_assistant()
|
|
1449
|
+
snapshot_after_event = persist_assistant()
|
|
1320
1450
|
if event.event == "tool_start":
|
|
1321
1451
|
tool = event.data.get("tool")
|
|
1322
1452
|
if isinstance(tool, dict) and isinstance(tool.get("id"), str):
|
|
@@ -1324,7 +1454,7 @@ def create_app(
|
|
|
1324
1454
|
assistant_output.start_tool(
|
|
1325
1455
|
StoredToolItem.model_validate(tool)
|
|
1326
1456
|
)
|
|
1327
|
-
persist_assistant()
|
|
1457
|
+
snapshot_after_event = persist_assistant()
|
|
1328
1458
|
if event.event in {"tool_done", "tool_error"}:
|
|
1329
1459
|
tool_id = event.data.get("id")
|
|
1330
1460
|
if (
|
|
@@ -1335,34 +1465,35 @@ def create_app(
|
|
|
1335
1465
|
None if current_tool_id == tool_id else current_tool_id
|
|
1336
1466
|
)
|
|
1337
1467
|
assistant_output.update_tool(tool_id, event.data)
|
|
1338
|
-
persist_assistant()
|
|
1468
|
+
snapshot_after_event = persist_assistant()
|
|
1339
1469
|
if event.event == "delta":
|
|
1340
1470
|
assistant_output.append_text(
|
|
1341
1471
|
str(event.data.get("content") or "")
|
|
1342
1472
|
)
|
|
1343
|
-
persist_assistant()
|
|
1473
|
+
snapshot_after_event = persist_assistant()
|
|
1344
1474
|
if event.event == "thinking_delta":
|
|
1345
1475
|
assistant_output.append_thinking(
|
|
1346
1476
|
str(event.data.get("content") or "")
|
|
1347
1477
|
)
|
|
1348
|
-
persist_assistant()
|
|
1478
|
+
snapshot_after_event = persist_assistant()
|
|
1349
1479
|
if event.event == "usage":
|
|
1350
1480
|
usage_data = event.data.get("usage")
|
|
1351
1481
|
if isinstance(usage_data, dict):
|
|
1352
|
-
usage_info =
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1482
|
+
usage_info = update_context_usage_for_response(
|
|
1483
|
+
append_token_usage(
|
|
1484
|
+
store.read_usage_info(),
|
|
1485
|
+
TokenUsage.model_validate(usage_data),
|
|
1486
|
+
model_context_window=context_window_limit,
|
|
1357
1487
|
),
|
|
1488
|
+
messages=current_request_messages,
|
|
1489
|
+
output_content=assistant_output.content,
|
|
1490
|
+
model_context_window=context_window_limit,
|
|
1358
1491
|
)
|
|
1359
1492
|
store.save_usage_info(usage_info)
|
|
1360
1493
|
turn_usage_info = usage_info
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
usage_event_data(usage_info),
|
|
1365
|
-
)
|
|
1494
|
+
run_event_data = usage_event_data(usage_info)
|
|
1495
|
+
should_append_run_event = True
|
|
1496
|
+
snapshot_after_event = persist_assistant()
|
|
1366
1497
|
logger.log(
|
|
1367
1498
|
TRACE_LEVEL,
|
|
1368
1499
|
"Workspace stream event=%s data=%r",
|
|
@@ -1376,30 +1507,39 @@ def create_app(
|
|
|
1376
1507
|
response_usage_info = store.read_usage_info()
|
|
1377
1508
|
final_usage_info = turn_usage_info
|
|
1378
1509
|
if final_usage_info is None:
|
|
1379
|
-
final_usage_info =
|
|
1510
|
+
final_usage_info = update_context_usage_for_response(
|
|
1380
1511
|
response_usage_info,
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
),
|
|
1385
|
-
output_content=assistant_output.content,
|
|
1386
|
-
).total_tokens,
|
|
1387
|
-
model_context_window=current_model_context_window(
|
|
1388
|
-
connection.model
|
|
1389
|
-
),
|
|
1512
|
+
messages=current_request_messages,
|
|
1513
|
+
output_content=assistant_output.content,
|
|
1514
|
+
model_context_window=context_window_limit,
|
|
1390
1515
|
)
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1516
|
+
else:
|
|
1517
|
+
final_usage_info = update_context_usage_for_response(
|
|
1518
|
+
final_usage_info,
|
|
1519
|
+
messages=current_request_messages,
|
|
1520
|
+
output_content=assistant_output.content,
|
|
1521
|
+
model_context_window=context_window_limit,
|
|
1395
1522
|
)
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1523
|
+
store.save_usage_info(final_usage_info)
|
|
1524
|
+
snapshot_after_event = persist_assistant("completed")
|
|
1525
|
+
if snapshot_after_event is not None:
|
|
1526
|
+
run_event_data = {
|
|
1527
|
+
"message": stream_message_data(snapshot_after_event)
|
|
1528
|
+
}
|
|
1529
|
+
if event.event == "done" and snapshot_after_event is not None:
|
|
1530
|
+
await append_run_snapshot(run, snapshot_after_event)
|
|
1531
|
+
await append_run_event(run, event.event, run_event_data)
|
|
1532
|
+
else:
|
|
1533
|
+
if should_append_run_event:
|
|
1534
|
+
await append_run_event(run, event.event, run_event_data)
|
|
1535
|
+
if snapshot_after_event is not None:
|
|
1536
|
+
await append_run_snapshot(run, snapshot_after_event)
|
|
1399
1537
|
except asyncio.CancelledError:
|
|
1400
1538
|
logger.info("Workspace run stopped")
|
|
1401
1539
|
if not run.discard_on_cancel:
|
|
1402
|
-
persist_assistant("interrupted")
|
|
1540
|
+
interrupted_snapshot = persist_assistant("interrupted")
|
|
1541
|
+
if interrupted_snapshot is not None:
|
|
1542
|
+
await append_run_snapshot(run, interrupted_snapshot)
|
|
1403
1543
|
await append_run_event(
|
|
1404
1544
|
run,
|
|
1405
1545
|
"error",
|
|
@@ -1423,7 +1563,9 @@ def create_app(
|
|
|
1423
1563
|
str(error) or EMPTY_MODEL_RESPONSE_DETAIL,
|
|
1424
1564
|
)
|
|
1425
1565
|
)
|
|
1426
|
-
persist_assistant("failed")
|
|
1566
|
+
failed_snapshot = persist_assistant("failed")
|
|
1567
|
+
if failed_snapshot is not None:
|
|
1568
|
+
await append_run_snapshot(run, failed_snapshot)
|
|
1427
1569
|
await append_run_event(run, "error", run_error_event_data(error_item))
|
|
1428
1570
|
finally:
|
|
1429
1571
|
run.is_done = True
|
|
@@ -1436,9 +1578,16 @@ def create_app(
|
|
|
1436
1578
|
return run
|
|
1437
1579
|
|
|
1438
1580
|
async def workspace_run_stream(
|
|
1439
|
-
run: WorkspaceRun, after: int = 0
|
|
1581
|
+
run: WorkspaceRun, after: int = 0, include_snapshots: bool = True
|
|
1440
1582
|
) -> AsyncIterator[str]:
|
|
1441
1583
|
next_event_index = after + 1
|
|
1584
|
+
reconnect_snapshot = run_snapshot_data_at(run, after) if after > 0 else None
|
|
1585
|
+
if include_snapshots and reconnect_snapshot is not None:
|
|
1586
|
+
yield stream_event(
|
|
1587
|
+
"snapshot",
|
|
1588
|
+
{"message": reconnect_snapshot},
|
|
1589
|
+
event_id=after,
|
|
1590
|
+
)
|
|
1442
1591
|
while True:
|
|
1443
1592
|
async with run.condition:
|
|
1444
1593
|
|
|
@@ -1452,7 +1601,9 @@ def create_app(
|
|
|
1452
1601
|
|
|
1453
1602
|
for index, event, data in events:
|
|
1454
1603
|
next_event_index = index + 1
|
|
1455
|
-
|
|
1604
|
+
if event == "snapshot" and not include_snapshots:
|
|
1605
|
+
continue
|
|
1606
|
+
yield stream_event(event, data, event_id=index)
|
|
1456
1607
|
if event in {"done", "error"}:
|
|
1457
1608
|
return
|
|
1458
1609
|
|
|
@@ -1498,6 +1649,7 @@ def create_app(
|
|
|
1498
1649
|
*,
|
|
1499
1650
|
checkpoint: StoredCompactionCheckpoint | None,
|
|
1500
1651
|
connection: ProviderConnection,
|
|
1652
|
+
context_window_limit: int,
|
|
1501
1653
|
state: StoredState,
|
|
1502
1654
|
) -> tuple[StoredMessage, TokenUsageInfo]:
|
|
1503
1655
|
logger.info("Workspace compact requested")
|
|
@@ -1513,6 +1665,7 @@ def create_app(
|
|
|
1513
1665
|
|
|
1514
1666
|
marker, _, usage_info = await save_context_checkpoint(
|
|
1515
1667
|
connection=connection,
|
|
1668
|
+
context_window_limit=context_window_limit,
|
|
1516
1669
|
marker_content=COMPACTED_CONTEXT_MARKER,
|
|
1517
1670
|
messages=state.messages,
|
|
1518
1671
|
model_history=model_history,
|
|
@@ -1551,12 +1704,14 @@ def create_app(
|
|
|
1551
1704
|
)
|
|
1552
1705
|
state = store.read_state()
|
|
1553
1706
|
connection = selected_connection(state)
|
|
1707
|
+
context_window_limit = context_window_for_settings(state.settings)
|
|
1554
1708
|
checkpoint = store.read_active_compaction_checkpoint()
|
|
1555
1709
|
store.save_is_compacting(True)
|
|
1556
1710
|
compact_task = asyncio.create_task(
|
|
1557
1711
|
run_manual_compact(
|
|
1558
1712
|
checkpoint=checkpoint,
|
|
1559
1713
|
connection=connection,
|
|
1714
|
+
context_window_limit=context_window_limit,
|
|
1560
1715
|
state=state,
|
|
1561
1716
|
)
|
|
1562
1717
|
)
|
|
@@ -1596,7 +1751,7 @@ def create_app(
|
|
|
1596
1751
|
logger.log(TRACE_LEVEL, "Workspace user content=%r", request.content)
|
|
1597
1752
|
run = create_workspace_run(request.content)
|
|
1598
1753
|
return StreamingResponse(
|
|
1599
|
-
workspace_run_stream(run),
|
|
1754
|
+
workspace_run_stream(run, include_snapshots=False),
|
|
1600
1755
|
media_type="text/event-stream",
|
|
1601
1756
|
)
|
|
1602
1757
|
|