@researai/deepscientist 1.5.11 → 1.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/bin/ds.js +358 -61
- package/docs/en/00_QUICK_START.md +35 -3
- package/docs/en/01_SETTINGS_REFERENCE.md +11 -0
- package/docs/en/02_START_RESEARCH_GUIDE.md +68 -4
- package/docs/en/09_DOCTOR.md +28 -3
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +21 -2
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +284 -0
- package/docs/en/README.md +4 -0
- package/docs/zh/00_QUICK_START.md +34 -2
- package/docs/zh/01_SETTINGS_REFERENCE.md +11 -0
- package/docs/zh/02_START_RESEARCH_GUIDE.md +69 -3
- package/docs/zh/09_DOCTOR.md +28 -1
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +21 -2
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +285 -0
- package/docs/zh/README.md +4 -1
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/bash_exec/monitor.py +7 -5
- package/src/deepscientist/bash_exec/service.py +84 -21
- package/src/deepscientist/channels/local.py +3 -3
- package/src/deepscientist/channels/qq.py +7 -7
- package/src/deepscientist/channels/relay.py +7 -7
- package/src/deepscientist/channels/weixin_ilink.py +90 -19
- package/src/deepscientist/config/models.py +1 -0
- package/src/deepscientist/config/service.py +121 -20
- package/src/deepscientist/daemon/app.py +314 -6
- package/src/deepscientist/doctor.py +1 -5
- package/src/deepscientist/mcp/server.py +124 -3
- package/src/deepscientist/prompts/builder.py +113 -11
- package/src/deepscientist/quest/service.py +247 -31
- package/src/deepscientist/runners/codex.py +121 -22
- package/src/deepscientist/runners/runtime_overrides.py +6 -0
- package/src/deepscientist/shared.py +33 -14
- package/src/prompts/connectors/qq.md +2 -1
- package/src/prompts/connectors/weixin.md +2 -1
- package/src/prompts/contracts/shared_interaction.md +4 -1
- package/src/prompts/system.md +59 -9
- package/src/skills/analysis-campaign/SKILL.md +46 -6
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +21 -8
- package/src/skills/baseline/SKILL.md +1 -1
- package/src/skills/decision/SKILL.md +1 -1
- package/src/skills/experiment/SKILL.md +1 -1
- package/src/skills/finalize/SKILL.md +1 -1
- package/src/skills/idea/SKILL.md +1 -1
- package/src/skills/intake-audit/SKILL.md +1 -1
- package/src/skills/rebuttal/SKILL.md +74 -1
- package/src/skills/rebuttal/references/response-letter-template.md +55 -11
- package/src/skills/review/SKILL.md +118 -1
- package/src/skills/review/references/experiment-todo-template.md +23 -0
- package/src/skills/review/references/review-report-template.md +16 -0
- package/src/skills/review/references/revision-log-template.md +4 -0
- package/src/skills/scout/SKILL.md +1 -1
- package/src/skills/write/SKILL.md +168 -7
- package/src/skills/write/references/paper-experiment-matrix-template.md +131 -0
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AiManusChatView-D0mTXG4-.js → AiManusChatView-CnJcXynW.js} +12 -12
- package/src/ui/dist/assets/{AnalysisPlugin-Db0cTXxm.js → AnalysisPlugin-DeyzPEhV.js} +1 -1
- package/src/ui/dist/assets/{CliPlugin-DrV8je02.js → CliPlugin-CB1YODQn.js} +9 -9
- package/src/ui/dist/assets/{CodeEditorPlugin-QXMSCH71.js → CodeEditorPlugin-B-xicq1e.js} +8 -8
- package/src/ui/dist/assets/{CodeViewerPlugin-7hhtWj_E.js → CodeViewerPlugin-DT54ysXa.js} +5 -5
- package/src/ui/dist/assets/{DocViewerPlugin-BWMSnRJe.js → DocViewerPlugin-DQtKT-VD.js} +3 -3
- package/src/ui/dist/assets/{GitDiffViewerPlugin-7J9h9Vy_.js → GitDiffViewerPlugin-hqHbCfnv.js} +20 -20
- package/src/ui/dist/assets/{ImageViewerPlugin-CHJl_0lr.js → ImageViewerPlugin-OcVo33jV.js} +5 -5
- package/src/ui/dist/assets/{LabCopilotPanel-1qSow1es.js → LabCopilotPanel-DdGwhEUV.js} +11 -11
- package/src/ui/dist/assets/{LabPlugin-eQpPPCEp.js → LabPlugin-Ciz1gDaX.js} +2 -2
- package/src/ui/dist/assets/{LatexPlugin-BwRfi89Z.js → LatexPlugin-BhmjNQRC.js} +37 -11
- package/src/ui/dist/assets/{MarkdownViewerPlugin-836PVQWV.js → MarkdownViewerPlugin-BzdVH9Bx.js} +4 -4
- package/src/ui/dist/assets/{MarketplacePlugin-C2y_556i.js → MarketplacePlugin-DmyHspXt.js} +3 -3
- package/src/ui/dist/assets/{NotebookEditor-DIX7Mlzu.js → NotebookEditor-BMXKrDRk.js} +1 -1
- package/src/ui/dist/assets/{NotebookEditor-BRzJbGsn.js → NotebookEditor-BTVYRGkm.js} +11 -11
- package/src/ui/dist/assets/{PdfLoader-DzRaTAlq.js → PdfLoader-CvcjJHXv.js} +1 -1
- package/src/ui/dist/assets/{PdfMarkdownPlugin-DZUfIUnp.js → PdfMarkdownPlugin-DW2ej8Vk.js} +2 -2
- package/src/ui/dist/assets/{PdfViewerPlugin-BwtICzue.js → PdfViewerPlugin-CmlDxbhU.js} +10 -10
- package/src/ui/dist/assets/{SearchPlugin-DHeIAMsx.js → SearchPlugin-DAjQZPSv.js} +1 -1
- package/src/ui/dist/assets/{TextViewerPlugin-C3tCmFox.js → TextViewerPlugin-C-nVAZb_.js} +5 -5
- package/src/ui/dist/assets/{VNCViewer-CQsKVm3t.js → VNCViewer-D7-dIYon.js} +10 -10
- package/src/ui/dist/assets/{bot-BEA2vWuK.js → bot-C_G4WtNI.js} +1 -1
- package/src/ui/dist/assets/{code-XfbSR8K2.js → code-Cd7WfiWq.js} +1 -1
- package/src/ui/dist/assets/{file-content-BjxNaIfy.js → file-content-B57zsL9y.js} +1 -1
- package/src/ui/dist/assets/{file-diff-panel-D_lLVQk0.js → file-diff-panel-DVoheLFq.js} +1 -1
- package/src/ui/dist/assets/{file-socket-D9x_5vlY.js → file-socket-B5kXFxZP.js} +1 -1
- package/src/ui/dist/assets/{image-BhWT33W1.js → image-LLOjkMHF.js} +1 -1
- package/src/ui/dist/assets/{index-Dqj-Mjb4.css → index-BQG-1s2o.css} +40 -2
- package/src/ui/dist/assets/{index--c4iXtuy.js → index-C3r2iGrp.js} +12 -12
- package/src/ui/dist/assets/{index-DZTZ8mWP.js → index-CLQauncb.js} +911 -120
- package/src/ui/dist/assets/{index-PJbSbPTy.js → index-Dxa2eYMY.js} +1 -1
- package/src/ui/dist/assets/{index-BDxipwrC.js → index-hOUOWbW2.js} +2 -2
- package/src/ui/dist/assets/{monaco-K8izTGgo.js → monaco-BGGAEii3.js} +1 -1
- package/src/ui/dist/assets/{pdf-effect-queue-DfBors6y.js → pdf-effect-queue-DlEr1_y5.js} +1 -1
- package/src/ui/dist/assets/{popover-yFK1J4fL.js → popover-CWJbJuYY.js} +1 -1
- package/src/ui/dist/assets/{project-sync-PENr2zcz.js → project-sync-CRJiucYO.js} +18 -4
- package/src/ui/dist/assets/{select-CAbJDfYv.js → select-CoHB7pvH.js} +2 -2
- package/src/ui/dist/assets/{sigma-DEuYJqTl.js → sigma-D5aJWR8J.js} +1 -1
- package/src/ui/dist/assets/{square-check-big-omoSUmcd.js → square-check-big-DUK_mnkS.js} +1 -1
- package/src/ui/dist/assets/{trash--F119N47.js → trash-ChU3SEE3.js} +1 -1
- package/src/ui/dist/assets/{useCliAccess-D31UR23I.js → useCliAccess-BrJBV3tY.js} +1 -1
- package/src/ui/dist/assets/{useFileDiffOverlay-BH6KcMzq.js → useFileDiffOverlay-C2OQaVWc.js} +1 -1
- package/src/ui/dist/assets/{wrap-text-CZ613PM5.js → wrap-text-C7Qqh-om.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-BgDLAv3z.js → zoom-out-rtX0FKya.js} +1 -1
- package/src/ui/dist/index.html +2 -2
|
@@ -257,7 +257,7 @@ class PromptBuilder:
|
|
|
257
257
|
f"- bound_external_connector_count: {surface_context['bound_external_connector_count']}",
|
|
258
258
|
"- surface_rule: treat web, TUI, and connector threads as one continuous quest, but adapt the amount of detail to the active surface.",
|
|
259
259
|
"- surface_reply_rule: use artifact.interact(...) for durable user-visible continuity; do not dump raw internal tool chatter into connector replies.",
|
|
260
|
-
"- connector_contract_rule:
|
|
260
|
+
"- connector_contract_rule: choose the active connector surface from the latest inbound external user turn when one exists; otherwise fall back to the bound external connector; keep purely local web/TUI turns on the local surface even if the quest is externally bound.",
|
|
261
261
|
]
|
|
262
262
|
|
|
263
263
|
if connector == "qq":
|
|
@@ -316,12 +316,23 @@ class PromptBuilder:
|
|
|
316
316
|
if str(parsed.get("connector") or "").strip().lower() == "local":
|
|
317
317
|
continue
|
|
318
318
|
bound_external.append(parsed)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
319
|
+
latest_connector = str((latest_user_parsed or {}).get("connector") or "").strip().lower()
|
|
320
|
+
if latest_connector and latest_connector != "local":
|
|
321
|
+
active = latest_user_parsed
|
|
322
|
+
origin = "latest_user_source"
|
|
323
|
+
elif latest_user is not None:
|
|
324
|
+
return {
|
|
325
|
+
"latest_user_source": latest_user_source,
|
|
326
|
+
"active_surface": "local",
|
|
327
|
+
"active_connector": "local",
|
|
328
|
+
"active_chat_type": "local",
|
|
329
|
+
"active_chat_id": "default",
|
|
330
|
+
"active_connector_origin": "latest_user_source_local",
|
|
331
|
+
"bound_external_connector_count": len(bound_external),
|
|
332
|
+
}
|
|
333
|
+
else:
|
|
334
|
+
active = bound_external[0] if bound_external else None
|
|
335
|
+
origin = "bound_external_binding" if active is not None else "none"
|
|
325
336
|
if active is None:
|
|
326
337
|
return {
|
|
327
338
|
"latest_user_source": latest_user_source,
|
|
@@ -687,18 +698,51 @@ class PromptBuilder:
|
|
|
687
698
|
startup_contract = snapshot.get("startup_contract")
|
|
688
699
|
if isinstance(startup_contract, dict):
|
|
689
700
|
value = str(startup_contract.get("custom_profile") or "").strip().lower()
|
|
690
|
-
if value in {"continue_existing_state", "revision_rebuttal", "freeform"}:
|
|
701
|
+
if value in {"continue_existing_state", "review_audit", "revision_rebuttal", "freeform"}:
|
|
691
702
|
return value
|
|
692
703
|
return "freeform"
|
|
693
704
|
|
|
705
|
+
@staticmethod
|
|
706
|
+
def _baseline_execution_policy(snapshot: dict) -> str:
|
|
707
|
+
startup_contract = snapshot.get("startup_contract")
|
|
708
|
+
if isinstance(startup_contract, dict):
|
|
709
|
+
value = str(startup_contract.get("baseline_execution_policy") or "").strip().lower()
|
|
710
|
+
if value in {"auto", "must_reproduce_or_verify", "reuse_existing_only", "skip_unless_blocking"}:
|
|
711
|
+
return value
|
|
712
|
+
return "auto"
|
|
713
|
+
|
|
714
|
+
@staticmethod
|
|
715
|
+
def _review_followup_policy(snapshot: dict) -> str:
|
|
716
|
+
startup_contract = snapshot.get("startup_contract")
|
|
717
|
+
if isinstance(startup_contract, dict):
|
|
718
|
+
value = str(startup_contract.get("review_followup_policy") or "").strip().lower()
|
|
719
|
+
if value in {"audit_only", "auto_execute_followups", "user_gated_followups"}:
|
|
720
|
+
return value
|
|
721
|
+
return "audit_only"
|
|
722
|
+
|
|
723
|
+
@staticmethod
|
|
724
|
+
def _manuscript_edit_mode(snapshot: dict) -> str:
|
|
725
|
+
startup_contract = snapshot.get("startup_contract")
|
|
726
|
+
if isinstance(startup_contract, dict):
|
|
727
|
+
value = str(startup_contract.get("manuscript_edit_mode") or "").strip().lower()
|
|
728
|
+
if value in {"none", "copy_ready_text", "latex_required"}:
|
|
729
|
+
return value
|
|
730
|
+
return "none"
|
|
731
|
+
|
|
694
732
|
def _research_delivery_policy_block(self, snapshot: dict) -> str:
|
|
695
733
|
need_research_paper = self._need_research_paper(snapshot)
|
|
696
734
|
launch_mode = self._launch_mode(snapshot)
|
|
697
735
|
custom_profile = self._custom_profile(snapshot)
|
|
736
|
+
baseline_execution_policy = self._baseline_execution_policy(snapshot)
|
|
737
|
+
review_followup_policy = self._review_followup_policy(snapshot)
|
|
738
|
+
manuscript_edit_mode = self._manuscript_edit_mode(snapshot)
|
|
698
739
|
lines = [
|
|
699
740
|
f"- need_research_paper: {need_research_paper}",
|
|
700
741
|
f"- launch_mode: {launch_mode}",
|
|
701
742
|
f"- custom_profile: {custom_profile if launch_mode == 'custom' else 'n/a'}",
|
|
743
|
+
f"- review_followup_policy: {review_followup_policy if custom_profile == 'review_audit' else 'n/a'}",
|
|
744
|
+
f"- baseline_execution_policy: {baseline_execution_policy if launch_mode == 'custom' else 'n/a'}",
|
|
745
|
+
f"- manuscript_edit_mode: {manuscript_edit_mode if custom_profile in {'review_audit', 'revision_rebuttal'} else 'n/a'}",
|
|
702
746
|
f"- delivery_mode: {'paper_required' if need_research_paper else 'algorithm_first'}",
|
|
703
747
|
"- idea_stage_rule: every accepted idea submission should normally create a new branch/worktree and a new user-visible research node.",
|
|
704
748
|
"- idea_draft_rule: before `artifact.submit_idea(...)`, first finish a concise durable Markdown draft for the chosen route; keep `idea.md` compact and `draft.md` richer.",
|
|
@@ -713,7 +757,7 @@ class PromptBuilder:
|
|
|
713
757
|
lines.extend(
|
|
714
758
|
[
|
|
715
759
|
"- custom_launch_rule: do not force the canonical full-research path when the custom startup contract is narrower.",
|
|
716
|
-
"- custom_context_rule: treat `entry_state_summary`, `review_summary`, and `custom_brief` as active runtime context rather than decorative metadata.",
|
|
760
|
+
"- custom_context_rule: treat `entry_state_summary`, `review_summary`, `review_materials`, and `custom_brief` as active runtime context rather than decorative metadata.",
|
|
717
761
|
]
|
|
718
762
|
)
|
|
719
763
|
if custom_profile == "continue_existing_state":
|
|
@@ -723,6 +767,31 @@ class PromptBuilder:
|
|
|
723
767
|
"- reuse_first_rule: trust-rank and reconcile existing assets before deciding to rerun anything costly.",
|
|
724
768
|
]
|
|
725
769
|
)
|
|
770
|
+
elif custom_profile == "review_audit":
|
|
771
|
+
lines.extend(
|
|
772
|
+
[
|
|
773
|
+
"- review_entry_rule: treat the current draft/paper state as the active contract; open `review` before more writing or finalization.",
|
|
774
|
+
"- review_routing_rule: if that audit finds real evidence gaps, route to `analysis-campaign`, `baseline`, `scout`, or `write` instead of polishing blindly.",
|
|
775
|
+
]
|
|
776
|
+
)
|
|
777
|
+
if review_followup_policy == "auto_execute_followups":
|
|
778
|
+
lines.extend(
|
|
779
|
+
[
|
|
780
|
+
"- review_followup_rule: after the audit artifacts are durable, continue automatically into the required experiments, manuscript deltas, and review-closure work instead of stopping at the audit report.",
|
|
781
|
+
]
|
|
782
|
+
)
|
|
783
|
+
elif review_followup_policy == "user_gated_followups":
|
|
784
|
+
lines.extend(
|
|
785
|
+
[
|
|
786
|
+
"- review_followup_rule: after the audit artifacts are durable, package the next expensive follow-up step into one structured decision instead of continuing silently.",
|
|
787
|
+
]
|
|
788
|
+
)
|
|
789
|
+
else:
|
|
790
|
+
lines.extend(
|
|
791
|
+
[
|
|
792
|
+
"- review_followup_rule: stop after the durable audit artifacts and route recommendation unless the user later asks for execution follow-up.",
|
|
793
|
+
]
|
|
794
|
+
)
|
|
726
795
|
elif custom_profile == "revision_rebuttal":
|
|
727
796
|
lines.extend(
|
|
728
797
|
[
|
|
@@ -736,6 +805,36 @@ class PromptBuilder:
|
|
|
736
805
|
"- freeform_entry_rule: prefer the custom brief over the default stage order and open only the skills actually needed.",
|
|
737
806
|
]
|
|
738
807
|
)
|
|
808
|
+
if baseline_execution_policy == "must_reproduce_or_verify":
|
|
809
|
+
lines.extend(
|
|
810
|
+
[
|
|
811
|
+
"- baseline_execution_rule: before reviewer-linked follow-up work, explicitly verify or recover the rebuttal-critical baseline/comparator instead of assuming the stored evidence is still trustworthy.",
|
|
812
|
+
]
|
|
813
|
+
)
|
|
814
|
+
elif baseline_execution_policy == "reuse_existing_only":
|
|
815
|
+
lines.extend(
|
|
816
|
+
[
|
|
817
|
+
"- baseline_execution_rule: prefer the existing trusted baseline/results and do not rerun them unless you find concrete inconsistency, corruption, or missing-evidence problems.",
|
|
818
|
+
]
|
|
819
|
+
)
|
|
820
|
+
elif baseline_execution_policy == "skip_unless_blocking":
|
|
821
|
+
lines.extend(
|
|
822
|
+
[
|
|
823
|
+
"- baseline_execution_rule: do not spend time on baseline reruns by default; only open `baseline` if a named review/rebuttal issue truly depends on a missing comparator or unusable prior evidence.",
|
|
824
|
+
]
|
|
825
|
+
)
|
|
826
|
+
if manuscript_edit_mode == "latex_required":
|
|
827
|
+
lines.extend(
|
|
828
|
+
[
|
|
829
|
+
"- manuscript_edit_rule: when manuscript revision is needed, treat the provided LaTeX tree or `paper/latex/` as the authoritative writing surface; if LaTeX source is unavailable, produce LaTeX-ready replacement text and make that blocker explicit instead of pretending the manuscript was edited.",
|
|
830
|
+
]
|
|
831
|
+
)
|
|
832
|
+
elif manuscript_edit_mode == "copy_ready_text":
|
|
833
|
+
lines.extend(
|
|
834
|
+
[
|
|
835
|
+
"- manuscript_edit_rule: when manuscript revision is needed, provide section-level copy-ready replacement text and explicit deltas even if no LaTeX source is available.",
|
|
836
|
+
]
|
|
837
|
+
)
|
|
739
838
|
if need_research_paper:
|
|
740
839
|
lines.extend(
|
|
741
840
|
[
|
|
@@ -783,7 +882,10 @@ class PromptBuilder:
|
|
|
783
882
|
"- interaction_protocol: first message may be plain conversation; after that, treat artifact.interact threads and mailbox polls as the main continuity spine across TUI, web, and connectors",
|
|
784
883
|
"- mailbox_protocol: artifact.interact(include_recent_inbound_messages=True) is the queued human-message mailbox; when it returns user text, treat that input as higher priority than background subtasks until it has been acknowledged",
|
|
785
884
|
"- acknowledgment_protocol: after artifact.interact returns any human message, immediately send one substantive artifact.interact(...) follow-up; if the active connector runtime already emitted a transport-level receipt acknowledgement, do not send a redundant receipt-only message; if answerable, answer directly, otherwise state the short plan, nearest checkpoint, and that the current background subtask is paused",
|
|
786
|
-
"- progress_protocol: emit artifact.interact(kind='progress', reply_mode='threaded', ...) at real human-meaningful checkpoints; if no natural checkpoint appears during active user-relevant work, prefer a concise keepalive once work has crossed roughly
|
|
885
|
+
"- progress_protocol: emit artifact.interact(kind='progress', reply_mode='threaded', ...) at real human-meaningful checkpoints; if no natural checkpoint appears during active user-relevant work, prefer a concise keepalive once work has crossed roughly 6 tool calls with a human-meaningful delta, and do not drift beyond roughly 12 tool calls or about 8 minutes without a user-visible update",
|
|
886
|
+
"- stage_kickoff_protocol: after entering any stage or companion skill, send one user-visible artifact.interact progress update within the first 3 tool calls of substantial work",
|
|
887
|
+
"- read_plan_keepalive_protocol: if work is still mostly reading, searching, comparison, or planning, do not wait too long for a 'big result'; send one concise user-visible checkpoint after about 5 consecutive tool calls if the user would otherwise see silence",
|
|
888
|
+
"- subtask_boundary_protocol: send a user-visible update whenever the active subtask changes materially, especially across intake -> audit, audit -> experiment planning, experiment planning -> run launch, run result -> drafting, or drafting -> review/rebuttal",
|
|
787
889
|
"- smoke_then_detach_protocol: for baseline reproduction, main experiments, and analysis experiments, first validate the command path with a bounded smoke test; once the smoke test passes, launch the real long run with bash_exec(mode='detach', ...) and usually leave timeout_seconds unset rather than guessing a fake deadline",
|
|
788
890
|
"- progress_first_monitoring_protocol: when supervising a long-running bash_exec session, judge health by forward progress rather than by whether the final artifact has already appeared within a short window",
|
|
789
891
|
"- delta_monitoring_protocol: compare deltas such as new sample counters, new task counters, new saved files, new last_output_seq values, or changed last_progress payloads; if any of these move forward, treat the run as alive and keep observing",
|
|
@@ -804,7 +906,7 @@ class PromptBuilder:
|
|
|
804
906
|
"- respect_protocol: write user-facing updates as natural, respectful, easy-to-follow chat; do not sound like a formal status report or internal tool log",
|
|
805
907
|
"- omission_protocol: for ordinary user-facing updates, omit file paths, artifact ids, branch/worktree ids, session ids, raw commands, raw logs, and internal tool names unless the user asked for them or needs them to act",
|
|
806
908
|
"- compaction_protocol: ordinary artifact.interact progress updates should usually fit in 2 to 4 short sentences and should not read like a monitoring transcript or execution diary",
|
|
807
|
-
"- tool_call_keepalive_protocol: for active multi-step work outside long detached experiment waits, prefer sending one concise artifact.interact progress update after roughly
|
|
909
|
+
"- tool_call_keepalive_protocol: for active multi-step work outside long detached experiment waits, prefer sending one concise artifact.interact progress update after roughly 6 tool calls when there is already a human-meaningful delta, and do not exceed roughly 12 tool calls or about 8 minutes without a user-visible checkpoint",
|
|
808
910
|
"- human_progress_shape_protocol: ordinary progress updates should usually make three things explicit in human language: the current task, the main difficulty or latest real progress, and the concrete next measure you will take",
|
|
809
911
|
"- milestone_graduation_protocol: keep ordinary subtask completions concise; upgrade to a richer milestone report only when a stage-significant deliverable or route-changing checkpoint becomes durably true",
|
|
810
912
|
"- eta_visibility_protocol: for baseline reproduction, main experiments, analysis experiments, and other important long-running phases, progress updates should also make the expected time to the next meaningful result, next milestone, or next user-visible update explicit; use roughly 10 to 30 minutes as the normal update window, and if the ETA is unreliable, say that and give a realistic next check-in window instead",
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import copy
|
|
4
|
+
from collections import deque
|
|
4
5
|
from contextlib import contextmanager
|
|
6
|
+
from datetime import UTC, datetime, timedelta
|
|
5
7
|
import hashlib
|
|
6
8
|
import subprocess
|
|
7
9
|
import json
|
|
@@ -23,7 +25,7 @@ from ..connector_runtime import conversation_identity_key, normalize_conversatio
|
|
|
23
25
|
from ..gitops import current_branch, export_git_graph, head_commit, init_repo
|
|
24
26
|
from ..home import repo_root
|
|
25
27
|
from ..registries import BaselineRegistry
|
|
26
|
-
from ..shared import append_jsonl, ensure_dir, generate_id, read_json, read_jsonl, read_text, read_yaml, resolve_within, run_command, sha256_text, slugify, utc_now, write_json, write_text, write_yaml
|
|
28
|
+
from ..shared import append_jsonl, ensure_dir, generate_id, iter_jsonl, read_json, read_jsonl, read_jsonl_tail, read_text, read_yaml, resolve_within, run_command, sha256_text, slugify, utc_now, write_json, write_text, write_yaml
|
|
27
29
|
from ..skills import SkillInstaller
|
|
28
30
|
from ..web_search import extract_web_search_payload
|
|
29
31
|
from .layout import (
|
|
@@ -42,6 +44,126 @@ _UNSET = object()
|
|
|
42
44
|
_NUMERIC_QUEST_ID_PATTERN = re.compile(r"^\d{1,10}$")
|
|
43
45
|
_MAX_NUMERIC_QUEST_ID_VALUE = 9_999_999_999
|
|
44
46
|
_NUMERIC_QUEST_ID_PAD_WIDTH = 3
|
|
47
|
+
_CRASH_AUTO_RESUME_WINDOW = timedelta(hours=24)
|
|
48
|
+
_JSONL_CACHE_MAX_BYTES = 4 * 1024 * 1024
|
|
49
|
+
_CODEX_HISTORY_TAIL_LIMIT = 400
|
|
50
|
+
_JSONL_STREAM_CHUNK_BYTES = 64 * 1024
|
|
51
|
+
_EVENTS_OVERSIZED_LINE_BYTES = 8 * 1024 * 1024
|
|
52
|
+
_OVERSIZED_EVENT_PREFIX_BYTES = 4096
|
|
53
|
+
_EVENT_TYPE_BYTES_RE = re.compile(rb'"(?:type|event_type)"\s*:\s*"([^"]+)"')
|
|
54
|
+
_EVENT_TOOL_NAME_BYTES_RE = re.compile(rb'"tool_name"\s*:\s*"([^"]+)"')
|
|
55
|
+
_EVENT_RUN_ID_BYTES_RE = re.compile(rb'"run_id"\s*:\s*"([^"]+)"')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _oversized_event_placeholder(*, prefix: bytes, line_bytes: int) -> dict[str, Any]:
|
|
59
|
+
def _extract(pattern: re.Pattern[bytes]) -> str | None:
|
|
60
|
+
match = pattern.search(prefix)
|
|
61
|
+
if match is None:
|
|
62
|
+
return None
|
|
63
|
+
try:
|
|
64
|
+
return match.group(1).decode("utf-8", errors="ignore").strip() or None
|
|
65
|
+
except Exception:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
event_type = _extract(_EVENT_TYPE_BYTES_RE) or "runner.tool_result"
|
|
69
|
+
tool_name = _extract(_EVENT_TOOL_NAME_BYTES_RE)
|
|
70
|
+
run_id = _extract(_EVENT_RUN_ID_BYTES_RE)
|
|
71
|
+
summary = f"Omitted oversized quest event payload ({line_bytes} bytes) while reading event history."
|
|
72
|
+
payload: dict[str, Any] = {
|
|
73
|
+
"type": event_type,
|
|
74
|
+
"status": "omitted",
|
|
75
|
+
"summary": summary,
|
|
76
|
+
"oversized_event": True,
|
|
77
|
+
"oversized_bytes": line_bytes,
|
|
78
|
+
}
|
|
79
|
+
if tool_name:
|
|
80
|
+
payload["tool_name"] = tool_name
|
|
81
|
+
if run_id:
|
|
82
|
+
payload["run_id"] = run_id
|
|
83
|
+
return payload
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _iter_jsonl_records_safely(
|
|
87
|
+
path: Path,
|
|
88
|
+
*,
|
|
89
|
+
oversized_line_bytes: int = _EVENTS_OVERSIZED_LINE_BYTES,
|
|
90
|
+
):
|
|
91
|
+
if not path.exists():
|
|
92
|
+
return
|
|
93
|
+
with path.open("rb") as handle:
|
|
94
|
+
buffer = bytearray()
|
|
95
|
+
prefix = bytearray()
|
|
96
|
+
current_bytes = 0
|
|
97
|
+
oversized = False
|
|
98
|
+
while True:
|
|
99
|
+
chunk = handle.read(_JSONL_STREAM_CHUNK_BYTES)
|
|
100
|
+
if not chunk:
|
|
101
|
+
break
|
|
102
|
+
start = 0
|
|
103
|
+
while start <= len(chunk):
|
|
104
|
+
newline_index = chunk.find(b"\n", start)
|
|
105
|
+
has_newline = newline_index >= 0
|
|
106
|
+
segment = chunk[start:newline_index] if has_newline else chunk[start:]
|
|
107
|
+
|
|
108
|
+
if oversized:
|
|
109
|
+
current_bytes += len(segment)
|
|
110
|
+
if has_newline:
|
|
111
|
+
yield _oversized_event_placeholder(prefix=bytes(prefix), line_bytes=current_bytes)
|
|
112
|
+
prefix = bytearray()
|
|
113
|
+
current_bytes = 0
|
|
114
|
+
oversized = False
|
|
115
|
+
start = newline_index + 1
|
|
116
|
+
continue
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
next_bytes = current_bytes + len(segment)
|
|
120
|
+
if next_bytes > oversized_line_bytes:
|
|
121
|
+
combined_prefix = bytes(buffer)
|
|
122
|
+
remaining = max(0, _OVERSIZED_EVENT_PREFIX_BYTES - len(combined_prefix))
|
|
123
|
+
if remaining:
|
|
124
|
+
combined_prefix += segment[:remaining]
|
|
125
|
+
prefix = bytearray(combined_prefix)
|
|
126
|
+
buffer.clear()
|
|
127
|
+
current_bytes = next_bytes
|
|
128
|
+
oversized = True
|
|
129
|
+
if has_newline:
|
|
130
|
+
yield _oversized_event_placeholder(prefix=bytes(prefix), line_bytes=current_bytes)
|
|
131
|
+
prefix = bytearray()
|
|
132
|
+
current_bytes = 0
|
|
133
|
+
oversized = False
|
|
134
|
+
start = newline_index + 1
|
|
135
|
+
continue
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
buffer.extend(segment)
|
|
139
|
+
current_bytes = next_bytes
|
|
140
|
+
if has_newline:
|
|
141
|
+
raw = bytes(buffer).strip()
|
|
142
|
+
buffer.clear()
|
|
143
|
+
line_bytes = current_bytes
|
|
144
|
+
current_bytes = 0
|
|
145
|
+
if raw:
|
|
146
|
+
try:
|
|
147
|
+
payload = json.loads(raw)
|
|
148
|
+
except json.JSONDecodeError:
|
|
149
|
+
payload = None
|
|
150
|
+
if isinstance(payload, dict):
|
|
151
|
+
yield payload
|
|
152
|
+
start = newline_index + 1
|
|
153
|
+
continue
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
if oversized:
|
|
157
|
+
yield _oversized_event_placeholder(prefix=bytes(prefix), line_bytes=current_bytes)
|
|
158
|
+
elif buffer:
|
|
159
|
+
raw = bytes(buffer).strip()
|
|
160
|
+
if raw:
|
|
161
|
+
try:
|
|
162
|
+
payload = json.loads(raw)
|
|
163
|
+
except json.JSONDecodeError:
|
|
164
|
+
payload = None
|
|
165
|
+
if isinstance(payload, dict):
|
|
166
|
+
yield payload
|
|
45
167
|
|
|
46
168
|
|
|
47
169
|
class QuestService:
|
|
@@ -808,21 +930,15 @@ class QuestService:
|
|
|
808
930
|
getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000)),
|
|
809
931
|
stat.st_size,
|
|
810
932
|
)
|
|
933
|
+
if stat.st_size > _JSONL_CACHE_MAX_BYTES:
|
|
934
|
+
with self._jsonl_cache_lock:
|
|
935
|
+
self._jsonl_cache.pop(cache_key, None)
|
|
936
|
+
return read_jsonl(path)
|
|
811
937
|
with self._jsonl_cache_lock:
|
|
812
938
|
cached = self._jsonl_cache.get(cache_key)
|
|
813
939
|
if cached and cached.get("state") == state:
|
|
814
940
|
return cached.get("records") or []
|
|
815
|
-
items
|
|
816
|
-
for line in path.read_text(encoding="utf-8").splitlines():
|
|
817
|
-
line = line.strip()
|
|
818
|
-
if not line:
|
|
819
|
-
continue
|
|
820
|
-
try:
|
|
821
|
-
payload = json.loads(line)
|
|
822
|
-
except json.JSONDecodeError:
|
|
823
|
-
continue
|
|
824
|
-
if isinstance(payload, dict):
|
|
825
|
-
items.append(payload)
|
|
941
|
+
items = read_jsonl(path)
|
|
826
942
|
with self._jsonl_cache_lock:
|
|
827
943
|
self._jsonl_cache[cache_key] = {
|
|
828
944
|
"state": state,
|
|
@@ -830,6 +946,57 @@ class QuestService:
|
|
|
830
946
|
}
|
|
831
947
|
return items
|
|
832
948
|
|
|
949
|
+
@staticmethod
|
|
950
|
+
def _read_jsonl_cursor_slice(
|
|
951
|
+
path: Path,
|
|
952
|
+
*,
|
|
953
|
+
after: int = 0,
|
|
954
|
+
before: int | None = None,
|
|
955
|
+
limit: int = 200,
|
|
956
|
+
tail: bool = False,
|
|
957
|
+
) -> tuple[list[tuple[int, dict[str, Any]]], int, bool]:
|
|
958
|
+
normalized_limit = max(int(limit or 0), 0)
|
|
959
|
+
if not path.exists():
|
|
960
|
+
return [], 0, False
|
|
961
|
+
if normalized_limit <= 0:
|
|
962
|
+
total = sum(1 for _ in _iter_jsonl_records_safely(path))
|
|
963
|
+
return [], total, False
|
|
964
|
+
|
|
965
|
+
if before is not None:
|
|
966
|
+
stop_cursor = max(int(before) - 1, 0)
|
|
967
|
+
window: deque[tuple[int, dict[str, Any]]] = deque(maxlen=normalized_limit)
|
|
968
|
+
total = 0
|
|
969
|
+
for payload in _iter_jsonl_records_safely(path):
|
|
970
|
+
total += 1
|
|
971
|
+
if total >= before:
|
|
972
|
+
break
|
|
973
|
+
window.append((total, payload))
|
|
974
|
+
has_more = bool(window and window[0][0] > 1)
|
|
975
|
+
return list(window), total, has_more
|
|
976
|
+
|
|
977
|
+
if tail:
|
|
978
|
+
window = deque(maxlen=normalized_limit)
|
|
979
|
+
total = 0
|
|
980
|
+
for payload in _iter_jsonl_records_safely(path):
|
|
981
|
+
total += 1
|
|
982
|
+
window.append((total, payload))
|
|
983
|
+
has_more = total > len(window)
|
|
984
|
+
return list(window), total, has_more
|
|
985
|
+
|
|
986
|
+
collected: list[tuple[int, dict[str, Any]]] = []
|
|
987
|
+
total = 0
|
|
988
|
+
saw_more = False
|
|
989
|
+
normalized_after = max(int(after or 0), 0)
|
|
990
|
+
for payload in _iter_jsonl_records_safely(path):
|
|
991
|
+
total += 1
|
|
992
|
+
if total <= normalized_after:
|
|
993
|
+
continue
|
|
994
|
+
if len(collected) < normalized_limit:
|
|
995
|
+
collected.append((total, payload))
|
|
996
|
+
continue
|
|
997
|
+
saw_more = True
|
|
998
|
+
return collected, total, saw_more
|
|
999
|
+
|
|
833
1000
|
@staticmethod
|
|
834
1001
|
def _path_state(path: Path) -> tuple[int, int, int] | None:
|
|
835
1002
|
if not path.exists():
|
|
@@ -1592,6 +1759,12 @@ class QuestService:
|
|
|
1592
1759
|
if not active_run_id and status != "running":
|
|
1593
1760
|
continue
|
|
1594
1761
|
previous_status = status or "running"
|
|
1762
|
+
last_transition_at = self._runtime_recovery_timestamp(runtime_state, quest_data)
|
|
1763
|
+
recoverable = self._runtime_recovery_eligible(
|
|
1764
|
+
previous_status=previous_status,
|
|
1765
|
+
active_run_id=active_run_id or None,
|
|
1766
|
+
last_transition_at=last_transition_at,
|
|
1767
|
+
)
|
|
1595
1768
|
self.update_runtime_state(
|
|
1596
1769
|
quest_root=quest_root,
|
|
1597
1770
|
status="stopped",
|
|
@@ -1602,6 +1775,8 @@ class QuestService:
|
|
|
1602
1775
|
f"Recovered quest from stale runtime state; previous status `{previous_status}`"
|
|
1603
1776
|
+ (f", abandoned run `{active_run_id}`." if active_run_id else ".")
|
|
1604
1777
|
)
|
|
1778
|
+
if recoverable:
|
|
1779
|
+
summary = f"{summary} Auto-resume is eligible within the 24-hour recovery window."
|
|
1605
1780
|
append_jsonl(
|
|
1606
1781
|
quest_root / ".ds" / "events.jsonl",
|
|
1607
1782
|
{
|
|
@@ -1610,6 +1785,8 @@ class QuestService:
|
|
|
1610
1785
|
"quest_id": quest_root.name,
|
|
1611
1786
|
"previous_status": previous_status,
|
|
1612
1787
|
"abandoned_run_id": active_run_id or None,
|
|
1788
|
+
"last_transition_at": last_transition_at,
|
|
1789
|
+
"recoverable": recoverable,
|
|
1613
1790
|
"status": "stopped",
|
|
1614
1791
|
"summary": summary,
|
|
1615
1792
|
"created_at": utc_now(),
|
|
@@ -1620,11 +1797,53 @@ class QuestService:
|
|
|
1620
1797
|
"quest_id": quest_root.name,
|
|
1621
1798
|
"previous_status": previous_status,
|
|
1622
1799
|
"abandoned_run_id": active_run_id or None,
|
|
1800
|
+
"last_transition_at": last_transition_at,
|
|
1801
|
+
"recoverable": recoverable,
|
|
1623
1802
|
"status": "stopped",
|
|
1624
1803
|
}
|
|
1625
1804
|
)
|
|
1626
1805
|
return reconciled
|
|
1627
1806
|
|
|
1807
|
+
@staticmethod
|
|
1808
|
+
def _parse_runtime_timestamp(value: Any) -> datetime | None:
|
|
1809
|
+
normalized = str(value or "").strip()
|
|
1810
|
+
if not normalized:
|
|
1811
|
+
return None
|
|
1812
|
+
candidate = normalized.replace("Z", "+00:00")
|
|
1813
|
+
try:
|
|
1814
|
+
parsed = datetime.fromisoformat(candidate)
|
|
1815
|
+
except ValueError:
|
|
1816
|
+
return None
|
|
1817
|
+
if parsed.tzinfo is None:
|
|
1818
|
+
parsed = parsed.replace(tzinfo=UTC)
|
|
1819
|
+
return parsed.astimezone(UTC)
|
|
1820
|
+
|
|
1821
|
+
def _runtime_recovery_timestamp(self, runtime_state: dict[str, Any], quest_data: dict[str, Any]) -> str | None:
|
|
1822
|
+
for candidate in (
|
|
1823
|
+
runtime_state.get("last_transition_at"),
|
|
1824
|
+
quest_data.get("updated_at"),
|
|
1825
|
+
quest_data.get("created_at"),
|
|
1826
|
+
):
|
|
1827
|
+
parsed = self._parse_runtime_timestamp(candidate)
|
|
1828
|
+
if parsed is None:
|
|
1829
|
+
continue
|
|
1830
|
+
return parsed.isoformat()
|
|
1831
|
+
return None
|
|
1832
|
+
|
|
1833
|
+
def _runtime_recovery_eligible(
|
|
1834
|
+
self,
|
|
1835
|
+
*,
|
|
1836
|
+
previous_status: str,
|
|
1837
|
+
active_run_id: str | None,
|
|
1838
|
+
last_transition_at: str | None,
|
|
1839
|
+
) -> bool:
|
|
1840
|
+
if previous_status != "running" and not str(active_run_id or "").strip():
|
|
1841
|
+
return False
|
|
1842
|
+
parsed = self._parse_runtime_timestamp(last_transition_at)
|
|
1843
|
+
if parsed is None:
|
|
1844
|
+
return False
|
|
1845
|
+
return datetime.now(UTC) - parsed <= _CRASH_AUTO_RESUME_WINDOW
|
|
1846
|
+
|
|
1628
1847
|
def history(self, quest_id: str, limit: int = 100) -> list[dict]:
|
|
1629
1848
|
return self._read_cached_jsonl(self._quest_root(quest_id) / ".ds" / "conversations" / "main.jsonl")[-limit:]
|
|
1630
1849
|
|
|
@@ -1730,40 +1949,37 @@ class QuestService:
|
|
|
1730
1949
|
limit: int = 200,
|
|
1731
1950
|
tail: bool = False,
|
|
1732
1951
|
) -> dict:
|
|
1733
|
-
|
|
1952
|
+
event_path = self._quest_root(quest_id) / ".ds" / "events.jsonl"
|
|
1734
1953
|
normalized_limit = max(limit, 0)
|
|
1735
1954
|
direction = "after"
|
|
1736
1955
|
if before is not None:
|
|
1737
1956
|
direction = "before"
|
|
1738
|
-
end = max(int(before) - 1, 0)
|
|
1739
|
-
start = max(end - normalized_limit, 0)
|
|
1740
|
-
sliced = records[start:end]
|
|
1741
1957
|
elif tail and normalized_limit > 0:
|
|
1742
1958
|
direction = "tail"
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1959
|
+
sliced_records, total_records, has_more = self._read_jsonl_cursor_slice(
|
|
1960
|
+
event_path,
|
|
1961
|
+
after=after,
|
|
1962
|
+
before=before,
|
|
1963
|
+
limit=normalized_limit,
|
|
1964
|
+
tail=tail,
|
|
1965
|
+
)
|
|
1748
1966
|
enriched = []
|
|
1749
|
-
for
|
|
1967
|
+
for cursor, item in sliced_records:
|
|
1750
1968
|
enriched.append(
|
|
1751
1969
|
{
|
|
1752
|
-
"cursor":
|
|
1753
|
-
"event_id": item.get("event_id") or f"evt-{quest_id}-{
|
|
1970
|
+
"cursor": cursor,
|
|
1971
|
+
"event_id": item.get("event_id") or f"evt-{quest_id}-{cursor}",
|
|
1754
1972
|
**item,
|
|
1755
1973
|
}
|
|
1756
1974
|
)
|
|
1757
1975
|
if before is not None:
|
|
1758
|
-
next_cursor =
|
|
1976
|
+
next_cursor = enriched[-1]["cursor"] if enriched else max(min(int(before or 0) - 1, total_records), 0)
|
|
1977
|
+
elif tail:
|
|
1978
|
+
next_cursor = total_records
|
|
1759
1979
|
else:
|
|
1760
|
-
next_cursor =
|
|
1980
|
+
next_cursor = enriched[-1]["cursor"] if enriched else max(int(after or 0), 0)
|
|
1761
1981
|
oldest_cursor = enriched[0]["cursor"] if enriched else None
|
|
1762
1982
|
newest_cursor = enriched[-1]["cursor"] if enriched else None
|
|
1763
|
-
if before is not None:
|
|
1764
|
-
has_more = start > 0
|
|
1765
|
-
else:
|
|
1766
|
-
has_more = start > 0 if tail else next_cursor < len(records)
|
|
1767
1983
|
return {
|
|
1768
1984
|
"quest_id": quest_id,
|
|
1769
1985
|
"cursor": next_cursor,
|
|
@@ -3705,7 +3921,7 @@ def _parse_codex_history(history_root: Path, *, quest_id: str, run_id: str, skil
|
|
|
3705
3921
|
entries: list[dict] = []
|
|
3706
3922
|
known_tool_names: dict[str, str] = {}
|
|
3707
3923
|
|
|
3708
|
-
for raw in
|
|
3924
|
+
for raw in read_jsonl_tail(history_path, _CODEX_HISTORY_TAIL_LIMIT):
|
|
3709
3925
|
timestamp = raw.get("timestamp")
|
|
3710
3926
|
event = raw.get("event")
|
|
3711
3927
|
if not isinstance(event, dict):
|