@researai/deepscientist 1.5.11 → 1.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +8 -8
  2. package/bin/ds.js +358 -61
  3. package/docs/en/00_QUICK_START.md +35 -3
  4. package/docs/en/01_SETTINGS_REFERENCE.md +11 -0
  5. package/docs/en/02_START_RESEARCH_GUIDE.md +68 -4
  6. package/docs/en/09_DOCTOR.md +28 -3
  7. package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +21 -2
  8. package/docs/en/15_CODEX_PROVIDER_SETUP.md +284 -0
  9. package/docs/en/README.md +4 -0
  10. package/docs/zh/00_QUICK_START.md +34 -2
  11. package/docs/zh/01_SETTINGS_REFERENCE.md +11 -0
  12. package/docs/zh/02_START_RESEARCH_GUIDE.md +69 -3
  13. package/docs/zh/09_DOCTOR.md +28 -1
  14. package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +21 -2
  15. package/docs/zh/15_CODEX_PROVIDER_SETUP.md +285 -0
  16. package/docs/zh/README.md +4 -1
  17. package/package.json +1 -1
  18. package/pyproject.toml +1 -1
  19. package/src/deepscientist/__init__.py +1 -1
  20. package/src/deepscientist/bash_exec/monitor.py +7 -5
  21. package/src/deepscientist/bash_exec/service.py +84 -21
  22. package/src/deepscientist/channels/local.py +3 -3
  23. package/src/deepscientist/channels/qq.py +7 -7
  24. package/src/deepscientist/channels/relay.py +7 -7
  25. package/src/deepscientist/channels/weixin_ilink.py +90 -19
  26. package/src/deepscientist/config/models.py +1 -0
  27. package/src/deepscientist/config/service.py +121 -20
  28. package/src/deepscientist/daemon/app.py +314 -6
  29. package/src/deepscientist/doctor.py +1 -5
  30. package/src/deepscientist/mcp/server.py +124 -3
  31. package/src/deepscientist/prompts/builder.py +113 -11
  32. package/src/deepscientist/quest/service.py +247 -31
  33. package/src/deepscientist/runners/codex.py +121 -22
  34. package/src/deepscientist/runners/runtime_overrides.py +6 -0
  35. package/src/deepscientist/shared.py +33 -14
  36. package/src/prompts/connectors/qq.md +2 -1
  37. package/src/prompts/connectors/weixin.md +2 -1
  38. package/src/prompts/contracts/shared_interaction.md +4 -1
  39. package/src/prompts/system.md +59 -9
  40. package/src/skills/analysis-campaign/SKILL.md +46 -6
  41. package/src/skills/analysis-campaign/references/campaign-plan-template.md +21 -8
  42. package/src/skills/baseline/SKILL.md +1 -1
  43. package/src/skills/decision/SKILL.md +1 -1
  44. package/src/skills/experiment/SKILL.md +1 -1
  45. package/src/skills/finalize/SKILL.md +1 -1
  46. package/src/skills/idea/SKILL.md +1 -1
  47. package/src/skills/intake-audit/SKILL.md +1 -1
  48. package/src/skills/rebuttal/SKILL.md +74 -1
  49. package/src/skills/rebuttal/references/response-letter-template.md +55 -11
  50. package/src/skills/review/SKILL.md +118 -1
  51. package/src/skills/review/references/experiment-todo-template.md +23 -0
  52. package/src/skills/review/references/review-report-template.md +16 -0
  53. package/src/skills/review/references/revision-log-template.md +4 -0
  54. package/src/skills/scout/SKILL.md +1 -1
  55. package/src/skills/write/SKILL.md +168 -7
  56. package/src/skills/write/references/paper-experiment-matrix-template.md +131 -0
  57. package/src/tui/package.json +1 -1
  58. package/src/ui/dist/assets/{AiManusChatView-D0mTXG4-.js → AiManusChatView-CnJcXynW.js} +12 -12
  59. package/src/ui/dist/assets/{AnalysisPlugin-Db0cTXxm.js → AnalysisPlugin-DeyzPEhV.js} +1 -1
  60. package/src/ui/dist/assets/{CliPlugin-DrV8je02.js → CliPlugin-CB1YODQn.js} +9 -9
  61. package/src/ui/dist/assets/{CodeEditorPlugin-QXMSCH71.js → CodeEditorPlugin-B-xicq1e.js} +8 -8
  62. package/src/ui/dist/assets/{CodeViewerPlugin-7hhtWj_E.js → CodeViewerPlugin-DT54ysXa.js} +5 -5
  63. package/src/ui/dist/assets/{DocViewerPlugin-BWMSnRJe.js → DocViewerPlugin-DQtKT-VD.js} +3 -3
  64. package/src/ui/dist/assets/{GitDiffViewerPlugin-7J9h9Vy_.js → GitDiffViewerPlugin-hqHbCfnv.js} +20 -20
  65. package/src/ui/dist/assets/{ImageViewerPlugin-CHJl_0lr.js → ImageViewerPlugin-OcVo33jV.js} +5 -5
  66. package/src/ui/dist/assets/{LabCopilotPanel-1qSow1es.js → LabCopilotPanel-DdGwhEUV.js} +11 -11
  67. package/src/ui/dist/assets/{LabPlugin-eQpPPCEp.js → LabPlugin-Ciz1gDaX.js} +2 -2
  68. package/src/ui/dist/assets/{LatexPlugin-BwRfi89Z.js → LatexPlugin-BhmjNQRC.js} +37 -11
  69. package/src/ui/dist/assets/{MarkdownViewerPlugin-836PVQWV.js → MarkdownViewerPlugin-BzdVH9Bx.js} +4 -4
  70. package/src/ui/dist/assets/{MarketplacePlugin-C2y_556i.js → MarketplacePlugin-DmyHspXt.js} +3 -3
  71. package/src/ui/dist/assets/{NotebookEditor-DIX7Mlzu.js → NotebookEditor-BMXKrDRk.js} +1 -1
  72. package/src/ui/dist/assets/{NotebookEditor-BRzJbGsn.js → NotebookEditor-BTVYRGkm.js} +11 -11
  73. package/src/ui/dist/assets/{PdfLoader-DzRaTAlq.js → PdfLoader-CvcjJHXv.js} +1 -1
  74. package/src/ui/dist/assets/{PdfMarkdownPlugin-DZUfIUnp.js → PdfMarkdownPlugin-DW2ej8Vk.js} +2 -2
  75. package/src/ui/dist/assets/{PdfViewerPlugin-BwtICzue.js → PdfViewerPlugin-CmlDxbhU.js} +10 -10
  76. package/src/ui/dist/assets/{SearchPlugin-DHeIAMsx.js → SearchPlugin-DAjQZPSv.js} +1 -1
  77. package/src/ui/dist/assets/{TextViewerPlugin-C3tCmFox.js → TextViewerPlugin-C-nVAZb_.js} +5 -5
  78. package/src/ui/dist/assets/{VNCViewer-CQsKVm3t.js → VNCViewer-D7-dIYon.js} +10 -10
  79. package/src/ui/dist/assets/{bot-BEA2vWuK.js → bot-C_G4WtNI.js} +1 -1
  80. package/src/ui/dist/assets/{code-XfbSR8K2.js → code-Cd7WfiWq.js} +1 -1
  81. package/src/ui/dist/assets/{file-content-BjxNaIfy.js → file-content-B57zsL9y.js} +1 -1
  82. package/src/ui/dist/assets/{file-diff-panel-D_lLVQk0.js → file-diff-panel-DVoheLFq.js} +1 -1
  83. package/src/ui/dist/assets/{file-socket-D9x_5vlY.js → file-socket-B5kXFxZP.js} +1 -1
  84. package/src/ui/dist/assets/{image-BhWT33W1.js → image-LLOjkMHF.js} +1 -1
  85. package/src/ui/dist/assets/{index-Dqj-Mjb4.css → index-BQG-1s2o.css} +40 -2
  86. package/src/ui/dist/assets/{index--c4iXtuy.js → index-C3r2iGrp.js} +12 -12
  87. package/src/ui/dist/assets/{index-DZTZ8mWP.js → index-CLQauncb.js} +911 -120
  88. package/src/ui/dist/assets/{index-PJbSbPTy.js → index-Dxa2eYMY.js} +1 -1
  89. package/src/ui/dist/assets/{index-BDxipwrC.js → index-hOUOWbW2.js} +2 -2
  90. package/src/ui/dist/assets/{monaco-K8izTGgo.js → monaco-BGGAEii3.js} +1 -1
  91. package/src/ui/dist/assets/{pdf-effect-queue-DfBors6y.js → pdf-effect-queue-DlEr1_y5.js} +1 -1
  92. package/src/ui/dist/assets/{popover-yFK1J4fL.js → popover-CWJbJuYY.js} +1 -1
  93. package/src/ui/dist/assets/{project-sync-PENr2zcz.js → project-sync-CRJiucYO.js} +18 -4
  94. package/src/ui/dist/assets/{select-CAbJDfYv.js → select-CoHB7pvH.js} +2 -2
  95. package/src/ui/dist/assets/{sigma-DEuYJqTl.js → sigma-D5aJWR8J.js} +1 -1
  96. package/src/ui/dist/assets/{square-check-big-omoSUmcd.js → square-check-big-DUK_mnkS.js} +1 -1
  97. package/src/ui/dist/assets/{trash--F119N47.js → trash-ChU3SEE3.js} +1 -1
  98. package/src/ui/dist/assets/{useCliAccess-D31UR23I.js → useCliAccess-BrJBV3tY.js} +1 -1
  99. package/src/ui/dist/assets/{useFileDiffOverlay-BH6KcMzq.js → useFileDiffOverlay-C2OQaVWc.js} +1 -1
  100. package/src/ui/dist/assets/{wrap-text-CZ613PM5.js → wrap-text-C7Qqh-om.js} +1 -1
  101. package/src/ui/dist/assets/{zoom-out-BgDLAv3z.js → zoom-out-rtX0FKya.js} +1 -1
  102. package/src/ui/dist/index.html +2 -2
@@ -257,7 +257,7 @@ class PromptBuilder:
257
257
  f"- bound_external_connector_count: {surface_context['bound_external_connector_count']}",
258
258
  "- surface_rule: treat web, TUI, and connector threads as one continuous quest, but adapt the amount of detail to the active surface.",
259
259
  "- surface_reply_rule: use artifact.interact(...) for durable user-visible continuity; do not dump raw internal tool chatter into connector replies.",
260
- "- connector_contract_rule: load connector-specific prompt fragments only for the active or bound external connector; do not load unused connector contracts.",
260
+ "- connector_contract_rule: choose the active connector surface from the latest inbound external user turn when one exists; otherwise fall back to the bound external connector; keep purely local web/TUI turns on the local surface even if the quest is externally bound.",
261
261
  ]
262
262
 
263
263
  if connector == "qq":
@@ -316,12 +316,23 @@ class PromptBuilder:
316
316
  if str(parsed.get("connector") or "").strip().lower() == "local":
317
317
  continue
318
318
  bound_external.append(parsed)
319
- active = bound_external[0] if bound_external else None
320
- origin = "bound_external_binding" if active is not None else "latest_user_source"
321
- if active is None and latest_user_parsed is not None:
322
- latest_connector = str(latest_user_parsed.get("connector") or "").strip().lower()
323
- if latest_connector and latest_connector != "local":
324
- active = latest_user_parsed
319
+ latest_connector = str((latest_user_parsed or {}).get("connector") or "").strip().lower()
320
+ if latest_connector and latest_connector != "local":
321
+ active = latest_user_parsed
322
+ origin = "latest_user_source"
323
+ elif latest_user is not None:
324
+ return {
325
+ "latest_user_source": latest_user_source,
326
+ "active_surface": "local",
327
+ "active_connector": "local",
328
+ "active_chat_type": "local",
329
+ "active_chat_id": "default",
330
+ "active_connector_origin": "latest_user_source_local",
331
+ "bound_external_connector_count": len(bound_external),
332
+ }
333
+ else:
334
+ active = bound_external[0] if bound_external else None
335
+ origin = "bound_external_binding" if active is not None else "none"
325
336
  if active is None:
326
337
  return {
327
338
  "latest_user_source": latest_user_source,
@@ -687,18 +698,51 @@ class PromptBuilder:
687
698
  startup_contract = snapshot.get("startup_contract")
688
699
  if isinstance(startup_contract, dict):
689
700
  value = str(startup_contract.get("custom_profile") or "").strip().lower()
690
- if value in {"continue_existing_state", "revision_rebuttal", "freeform"}:
701
+ if value in {"continue_existing_state", "review_audit", "revision_rebuttal", "freeform"}:
691
702
  return value
692
703
  return "freeform"
693
704
 
705
+ @staticmethod
706
+ def _baseline_execution_policy(snapshot: dict) -> str:
707
+ startup_contract = snapshot.get("startup_contract")
708
+ if isinstance(startup_contract, dict):
709
+ value = str(startup_contract.get("baseline_execution_policy") or "").strip().lower()
710
+ if value in {"auto", "must_reproduce_or_verify", "reuse_existing_only", "skip_unless_blocking"}:
711
+ return value
712
+ return "auto"
713
+
714
+ @staticmethod
715
+ def _review_followup_policy(snapshot: dict) -> str:
716
+ startup_contract = snapshot.get("startup_contract")
717
+ if isinstance(startup_contract, dict):
718
+ value = str(startup_contract.get("review_followup_policy") or "").strip().lower()
719
+ if value in {"audit_only", "auto_execute_followups", "user_gated_followups"}:
720
+ return value
721
+ return "audit_only"
722
+
723
+ @staticmethod
724
+ def _manuscript_edit_mode(snapshot: dict) -> str:
725
+ startup_contract = snapshot.get("startup_contract")
726
+ if isinstance(startup_contract, dict):
727
+ value = str(startup_contract.get("manuscript_edit_mode") or "").strip().lower()
728
+ if value in {"none", "copy_ready_text", "latex_required"}:
729
+ return value
730
+ return "none"
731
+
694
732
  def _research_delivery_policy_block(self, snapshot: dict) -> str:
695
733
  need_research_paper = self._need_research_paper(snapshot)
696
734
  launch_mode = self._launch_mode(snapshot)
697
735
  custom_profile = self._custom_profile(snapshot)
736
+ baseline_execution_policy = self._baseline_execution_policy(snapshot)
737
+ review_followup_policy = self._review_followup_policy(snapshot)
738
+ manuscript_edit_mode = self._manuscript_edit_mode(snapshot)
698
739
  lines = [
699
740
  f"- need_research_paper: {need_research_paper}",
700
741
  f"- launch_mode: {launch_mode}",
701
742
  f"- custom_profile: {custom_profile if launch_mode == 'custom' else 'n/a'}",
743
+ f"- review_followup_policy: {review_followup_policy if custom_profile == 'review_audit' else 'n/a'}",
744
+ f"- baseline_execution_policy: {baseline_execution_policy if launch_mode == 'custom' else 'n/a'}",
745
+ f"- manuscript_edit_mode: {manuscript_edit_mode if custom_profile in {'review_audit', 'revision_rebuttal'} else 'n/a'}",
702
746
  f"- delivery_mode: {'paper_required' if need_research_paper else 'algorithm_first'}",
703
747
  "- idea_stage_rule: every accepted idea submission should normally create a new branch/worktree and a new user-visible research node.",
704
748
  "- idea_draft_rule: before `artifact.submit_idea(...)`, first finish a concise durable Markdown draft for the chosen route; keep `idea.md` compact and `draft.md` richer.",
@@ -713,7 +757,7 @@ class PromptBuilder:
713
757
  lines.extend(
714
758
  [
715
759
  "- custom_launch_rule: do not force the canonical full-research path when the custom startup contract is narrower.",
716
- "- custom_context_rule: treat `entry_state_summary`, `review_summary`, and `custom_brief` as active runtime context rather than decorative metadata.",
760
+ "- custom_context_rule: treat `entry_state_summary`, `review_summary`, `review_materials`, and `custom_brief` as active runtime context rather than decorative metadata.",
717
761
  ]
718
762
  )
719
763
  if custom_profile == "continue_existing_state":
@@ -723,6 +767,31 @@ class PromptBuilder:
723
767
  "- reuse_first_rule: trust-rank and reconcile existing assets before deciding to rerun anything costly.",
724
768
  ]
725
769
  )
770
+ elif custom_profile == "review_audit":
771
+ lines.extend(
772
+ [
773
+ "- review_entry_rule: treat the current draft/paper state as the active contract; open `review` before more writing or finalization.",
774
+ "- review_routing_rule: if that audit finds real evidence gaps, route to `analysis-campaign`, `baseline`, `scout`, or `write` instead of polishing blindly.",
775
+ ]
776
+ )
777
+ if review_followup_policy == "auto_execute_followups":
778
+ lines.extend(
779
+ [
780
+ "- review_followup_rule: after the audit artifacts are durable, continue automatically into the required experiments, manuscript deltas, and review-closure work instead of stopping at the audit report.",
781
+ ]
782
+ )
783
+ elif review_followup_policy == "user_gated_followups":
784
+ lines.extend(
785
+ [
786
+ "- review_followup_rule: after the audit artifacts are durable, package the next expensive follow-up step into one structured decision instead of continuing silently.",
787
+ ]
788
+ )
789
+ else:
790
+ lines.extend(
791
+ [
792
+ "- review_followup_rule: stop after the durable audit artifacts and route recommendation unless the user later asks for execution follow-up.",
793
+ ]
794
+ )
726
795
  elif custom_profile == "revision_rebuttal":
727
796
  lines.extend(
728
797
  [
@@ -736,6 +805,36 @@ class PromptBuilder:
736
805
  "- freeform_entry_rule: prefer the custom brief over the default stage order and open only the skills actually needed.",
737
806
  ]
738
807
  )
808
+ if baseline_execution_policy == "must_reproduce_or_verify":
809
+ lines.extend(
810
+ [
811
+ "- baseline_execution_rule: before reviewer-linked follow-up work, explicitly verify or recover the rebuttal-critical baseline/comparator instead of assuming the stored evidence is still trustworthy.",
812
+ ]
813
+ )
814
+ elif baseline_execution_policy == "reuse_existing_only":
815
+ lines.extend(
816
+ [
817
+ "- baseline_execution_rule: prefer the existing trusted baseline/results and do not rerun them unless you find concrete inconsistency, corruption, or missing-evidence problems.",
818
+ ]
819
+ )
820
+ elif baseline_execution_policy == "skip_unless_blocking":
821
+ lines.extend(
822
+ [
823
+ "- baseline_execution_rule: do not spend time on baseline reruns by default; only open `baseline` if a named review/rebuttal issue truly depends on a missing comparator or unusable prior evidence.",
824
+ ]
825
+ )
826
+ if manuscript_edit_mode == "latex_required":
827
+ lines.extend(
828
+ [
829
+ "- manuscript_edit_rule: when manuscript revision is needed, treat the provided LaTeX tree or `paper/latex/` as the authoritative writing surface; if LaTeX source is unavailable, produce LaTeX-ready replacement text and make that blocker explicit instead of pretending the manuscript was edited.",
830
+ ]
831
+ )
832
+ elif manuscript_edit_mode == "copy_ready_text":
833
+ lines.extend(
834
+ [
835
+ "- manuscript_edit_rule: when manuscript revision is needed, provide section-level copy-ready replacement text and explicit deltas even if no LaTeX source is available.",
836
+ ]
837
+ )
739
838
  if need_research_paper:
740
839
  lines.extend(
741
840
  [
@@ -783,7 +882,10 @@ class PromptBuilder:
783
882
  "- interaction_protocol: first message may be plain conversation; after that, treat artifact.interact threads and mailbox polls as the main continuity spine across TUI, web, and connectors",
784
883
  "- mailbox_protocol: artifact.interact(include_recent_inbound_messages=True) is the queued human-message mailbox; when it returns user text, treat that input as higher priority than background subtasks until it has been acknowledged",
785
884
  "- acknowledgment_protocol: after artifact.interact returns any human message, immediately send one substantive artifact.interact(...) follow-up; if the active connector runtime already emitted a transport-level receipt acknowledgement, do not send a redundant receipt-only message; if answerable, answer directly, otherwise state the short plan, nearest checkpoint, and that the current background subtask is paused",
786
- "- progress_protocol: emit artifact.interact(kind='progress', reply_mode='threaded', ...) at real human-meaningful checkpoints; if no natural checkpoint appears during active user-relevant work, prefer a concise keepalive once work has crossed roughly 10 tool calls with a human-meaningful delta, and do not drift beyond roughly 20 tool calls or about 15 minutes without a user-visible update",
885
+ "- progress_protocol: emit artifact.interact(kind='progress', reply_mode='threaded', ...) at real human-meaningful checkpoints; if no natural checkpoint appears during active user-relevant work, prefer a concise keepalive once work has crossed roughly 6 tool calls with a human-meaningful delta, and do not drift beyond roughly 12 tool calls or about 8 minutes without a user-visible update",
886
+ "- stage_kickoff_protocol: after entering any stage or companion skill, send one user-visible artifact.interact progress update within the first 3 tool calls of substantial work",
887
+ "- read_plan_keepalive_protocol: if work is still mostly reading, searching, comparison, or planning, do not wait too long for a 'big result'; send one concise user-visible checkpoint after about 5 consecutive tool calls if the user would otherwise see silence",
888
+ "- subtask_boundary_protocol: send a user-visible update whenever the active subtask changes materially, especially across intake -> audit, audit -> experiment planning, experiment planning -> run launch, run result -> drafting, or drafting -> review/rebuttal",
787
889
  "- smoke_then_detach_protocol: for baseline reproduction, main experiments, and analysis experiments, first validate the command path with a bounded smoke test; once the smoke test passes, launch the real long run with bash_exec(mode='detach', ...) and usually leave timeout_seconds unset rather than guessing a fake deadline",
788
890
  "- progress_first_monitoring_protocol: when supervising a long-running bash_exec session, judge health by forward progress rather than by whether the final artifact has already appeared within a short window",
789
891
  "- delta_monitoring_protocol: compare deltas such as new sample counters, new task counters, new saved files, new last_output_seq values, or changed last_progress payloads; if any of these move forward, treat the run as alive and keep observing",
@@ -804,7 +906,7 @@ class PromptBuilder:
804
906
  "- respect_protocol: write user-facing updates as natural, respectful, easy-to-follow chat; do not sound like a formal status report or internal tool log",
805
907
  "- omission_protocol: for ordinary user-facing updates, omit file paths, artifact ids, branch/worktree ids, session ids, raw commands, raw logs, and internal tool names unless the user asked for them or needs them to act",
806
908
  "- compaction_protocol: ordinary artifact.interact progress updates should usually fit in 2 to 4 short sentences and should not read like a monitoring transcript or execution diary",
807
- "- tool_call_keepalive_protocol: for active multi-step work outside long detached experiment waits, prefer sending one concise artifact.interact progress update after roughly 10 tool calls when there is already a human-meaningful delta, and do not exceed roughly 20 tool calls or about 15 minutes without a user-visible checkpoint",
909
+ "- tool_call_keepalive_protocol: for active multi-step work outside long detached experiment waits, prefer sending one concise artifact.interact progress update after roughly 6 tool calls when there is already a human-meaningful delta, and do not exceed roughly 12 tool calls or about 8 minutes without a user-visible checkpoint",
808
910
  "- human_progress_shape_protocol: ordinary progress updates should usually make three things explicit in human language: the current task, the main difficulty or latest real progress, and the concrete next measure you will take",
809
911
  "- milestone_graduation_protocol: keep ordinary subtask completions concise; upgrade to a richer milestone report only when a stage-significant deliverable or route-changing checkpoint becomes durably true",
810
912
  "- eta_visibility_protocol: for baseline reproduction, main experiments, analysis experiments, and other important long-running phases, progress updates should also make the expected time to the next meaningful result, next milestone, or next user-visible update explicit; use roughly 10 to 30 minutes as the normal update window, and if the ETA is unreliable, say that and give a realistic next check-in window instead",
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import copy
4
+ from collections import deque
4
5
  from contextlib import contextmanager
6
+ from datetime import UTC, datetime, timedelta
5
7
  import hashlib
6
8
  import subprocess
7
9
  import json
@@ -23,7 +25,7 @@ from ..connector_runtime import conversation_identity_key, normalize_conversatio
23
25
  from ..gitops import current_branch, export_git_graph, head_commit, init_repo
24
26
  from ..home import repo_root
25
27
  from ..registries import BaselineRegistry
26
- from ..shared import append_jsonl, ensure_dir, generate_id, read_json, read_jsonl, read_text, read_yaml, resolve_within, run_command, sha256_text, slugify, utc_now, write_json, write_text, write_yaml
28
+ from ..shared import append_jsonl, ensure_dir, generate_id, iter_jsonl, read_json, read_jsonl, read_jsonl_tail, read_text, read_yaml, resolve_within, run_command, sha256_text, slugify, utc_now, write_json, write_text, write_yaml
27
29
  from ..skills import SkillInstaller
28
30
  from ..web_search import extract_web_search_payload
29
31
  from .layout import (
@@ -42,6 +44,126 @@ _UNSET = object()
42
44
  _NUMERIC_QUEST_ID_PATTERN = re.compile(r"^\d{1,10}$")
43
45
  _MAX_NUMERIC_QUEST_ID_VALUE = 9_999_999_999
44
46
  _NUMERIC_QUEST_ID_PAD_WIDTH = 3
47
+ _CRASH_AUTO_RESUME_WINDOW = timedelta(hours=24)
48
+ _JSONL_CACHE_MAX_BYTES = 4 * 1024 * 1024
49
+ _CODEX_HISTORY_TAIL_LIMIT = 400
50
+ _JSONL_STREAM_CHUNK_BYTES = 64 * 1024
51
+ _EVENTS_OVERSIZED_LINE_BYTES = 8 * 1024 * 1024
52
+ _OVERSIZED_EVENT_PREFIX_BYTES = 4096
53
+ _EVENT_TYPE_BYTES_RE = re.compile(rb'"(?:type|event_type)"\s*:\s*"([^"]+)"')
54
+ _EVENT_TOOL_NAME_BYTES_RE = re.compile(rb'"tool_name"\s*:\s*"([^"]+)"')
55
+ _EVENT_RUN_ID_BYTES_RE = re.compile(rb'"run_id"\s*:\s*"([^"]+)"')
56
+
57
+
58
+ def _oversized_event_placeholder(*, prefix: bytes, line_bytes: int) -> dict[str, Any]:
59
+ def _extract(pattern: re.Pattern[bytes]) -> str | None:
60
+ match = pattern.search(prefix)
61
+ if match is None:
62
+ return None
63
+ try:
64
+ return match.group(1).decode("utf-8", errors="ignore").strip() or None
65
+ except Exception:
66
+ return None
67
+
68
+ event_type = _extract(_EVENT_TYPE_BYTES_RE) or "runner.tool_result"
69
+ tool_name = _extract(_EVENT_TOOL_NAME_BYTES_RE)
70
+ run_id = _extract(_EVENT_RUN_ID_BYTES_RE)
71
+ summary = f"Omitted oversized quest event payload ({line_bytes} bytes) while reading event history."
72
+ payload: dict[str, Any] = {
73
+ "type": event_type,
74
+ "status": "omitted",
75
+ "summary": summary,
76
+ "oversized_event": True,
77
+ "oversized_bytes": line_bytes,
78
+ }
79
+ if tool_name:
80
+ payload["tool_name"] = tool_name
81
+ if run_id:
82
+ payload["run_id"] = run_id
83
+ return payload
84
+
85
+
86
+ def _iter_jsonl_records_safely(
87
+ path: Path,
88
+ *,
89
+ oversized_line_bytes: int = _EVENTS_OVERSIZED_LINE_BYTES,
90
+ ):
91
+ if not path.exists():
92
+ return
93
+ with path.open("rb") as handle:
94
+ buffer = bytearray()
95
+ prefix = bytearray()
96
+ current_bytes = 0
97
+ oversized = False
98
+ while True:
99
+ chunk = handle.read(_JSONL_STREAM_CHUNK_BYTES)
100
+ if not chunk:
101
+ break
102
+ start = 0
103
+ while start <= len(chunk):
104
+ newline_index = chunk.find(b"\n", start)
105
+ has_newline = newline_index >= 0
106
+ segment = chunk[start:newline_index] if has_newline else chunk[start:]
107
+
108
+ if oversized:
109
+ current_bytes += len(segment)
110
+ if has_newline:
111
+ yield _oversized_event_placeholder(prefix=bytes(prefix), line_bytes=current_bytes)
112
+ prefix = bytearray()
113
+ current_bytes = 0
114
+ oversized = False
115
+ start = newline_index + 1
116
+ continue
117
+ break
118
+
119
+ next_bytes = current_bytes + len(segment)
120
+ if next_bytes > oversized_line_bytes:
121
+ combined_prefix = bytes(buffer)
122
+ remaining = max(0, _OVERSIZED_EVENT_PREFIX_BYTES - len(combined_prefix))
123
+ if remaining:
124
+ combined_prefix += segment[:remaining]
125
+ prefix = bytearray(combined_prefix)
126
+ buffer.clear()
127
+ current_bytes = next_bytes
128
+ oversized = True
129
+ if has_newline:
130
+ yield _oversized_event_placeholder(prefix=bytes(prefix), line_bytes=current_bytes)
131
+ prefix = bytearray()
132
+ current_bytes = 0
133
+ oversized = False
134
+ start = newline_index + 1
135
+ continue
136
+ break
137
+
138
+ buffer.extend(segment)
139
+ current_bytes = next_bytes
140
+ if has_newline:
141
+ raw = bytes(buffer).strip()
142
+ buffer.clear()
143
+ line_bytes = current_bytes
144
+ current_bytes = 0
145
+ if raw:
146
+ try:
147
+ payload = json.loads(raw)
148
+ except json.JSONDecodeError:
149
+ payload = None
150
+ if isinstance(payload, dict):
151
+ yield payload
152
+ start = newline_index + 1
153
+ continue
154
+ break
155
+
156
+ if oversized:
157
+ yield _oversized_event_placeholder(prefix=bytes(prefix), line_bytes=current_bytes)
158
+ elif buffer:
159
+ raw = bytes(buffer).strip()
160
+ if raw:
161
+ try:
162
+ payload = json.loads(raw)
163
+ except json.JSONDecodeError:
164
+ payload = None
165
+ if isinstance(payload, dict):
166
+ yield payload
45
167
 
46
168
 
47
169
  class QuestService:
@@ -808,21 +930,15 @@ class QuestService:
808
930
  getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000)),
809
931
  stat.st_size,
810
932
  )
933
+ if stat.st_size > _JSONL_CACHE_MAX_BYTES:
934
+ with self._jsonl_cache_lock:
935
+ self._jsonl_cache.pop(cache_key, None)
936
+ return read_jsonl(path)
811
937
  with self._jsonl_cache_lock:
812
938
  cached = self._jsonl_cache.get(cache_key)
813
939
  if cached and cached.get("state") == state:
814
940
  return cached.get("records") or []
815
- items: list[dict[str, Any]] = []
816
- for line in path.read_text(encoding="utf-8").splitlines():
817
- line = line.strip()
818
- if not line:
819
- continue
820
- try:
821
- payload = json.loads(line)
822
- except json.JSONDecodeError:
823
- continue
824
- if isinstance(payload, dict):
825
- items.append(payload)
941
+ items = read_jsonl(path)
826
942
  with self._jsonl_cache_lock:
827
943
  self._jsonl_cache[cache_key] = {
828
944
  "state": state,
@@ -830,6 +946,57 @@ class QuestService:
830
946
  }
831
947
  return items
832
948
 
949
+ @staticmethod
950
+ def _read_jsonl_cursor_slice(
951
+ path: Path,
952
+ *,
953
+ after: int = 0,
954
+ before: int | None = None,
955
+ limit: int = 200,
956
+ tail: bool = False,
957
+ ) -> tuple[list[tuple[int, dict[str, Any]]], int, bool]:
958
+ normalized_limit = max(int(limit or 0), 0)
959
+ if not path.exists():
960
+ return [], 0, False
961
+ if normalized_limit <= 0:
962
+ total = sum(1 for _ in _iter_jsonl_records_safely(path))
963
+ return [], total, False
964
+
965
+ if before is not None:
966
+ stop_cursor = max(int(before) - 1, 0)
967
+ window: deque[tuple[int, dict[str, Any]]] = deque(maxlen=normalized_limit)
968
+ total = 0
969
+ for payload in _iter_jsonl_records_safely(path):
970
+ total += 1
971
+ if total >= before:
972
+ break
973
+ window.append((total, payload))
974
+ has_more = bool(window and window[0][0] > 1)
975
+ return list(window), total, has_more
976
+
977
+ if tail:
978
+ window = deque(maxlen=normalized_limit)
979
+ total = 0
980
+ for payload in _iter_jsonl_records_safely(path):
981
+ total += 1
982
+ window.append((total, payload))
983
+ has_more = total > len(window)
984
+ return list(window), total, has_more
985
+
986
+ collected: list[tuple[int, dict[str, Any]]] = []
987
+ total = 0
988
+ saw_more = False
989
+ normalized_after = max(int(after or 0), 0)
990
+ for payload in _iter_jsonl_records_safely(path):
991
+ total += 1
992
+ if total <= normalized_after:
993
+ continue
994
+ if len(collected) < normalized_limit:
995
+ collected.append((total, payload))
996
+ continue
997
+ saw_more = True
998
+ return collected, total, saw_more
999
+
833
1000
  @staticmethod
834
1001
  def _path_state(path: Path) -> tuple[int, int, int] | None:
835
1002
  if not path.exists():
@@ -1592,6 +1759,12 @@ class QuestService:
1592
1759
  if not active_run_id and status != "running":
1593
1760
  continue
1594
1761
  previous_status = status or "running"
1762
+ last_transition_at = self._runtime_recovery_timestamp(runtime_state, quest_data)
1763
+ recoverable = self._runtime_recovery_eligible(
1764
+ previous_status=previous_status,
1765
+ active_run_id=active_run_id or None,
1766
+ last_transition_at=last_transition_at,
1767
+ )
1595
1768
  self.update_runtime_state(
1596
1769
  quest_root=quest_root,
1597
1770
  status="stopped",
@@ -1602,6 +1775,8 @@ class QuestService:
1602
1775
  f"Recovered quest from stale runtime state; previous status `{previous_status}`"
1603
1776
  + (f", abandoned run `{active_run_id}`." if active_run_id else ".")
1604
1777
  )
1778
+ if recoverable:
1779
+ summary = f"{summary} Auto-resume is eligible within the 24-hour recovery window."
1605
1780
  append_jsonl(
1606
1781
  quest_root / ".ds" / "events.jsonl",
1607
1782
  {
@@ -1610,6 +1785,8 @@ class QuestService:
1610
1785
  "quest_id": quest_root.name,
1611
1786
  "previous_status": previous_status,
1612
1787
  "abandoned_run_id": active_run_id or None,
1788
+ "last_transition_at": last_transition_at,
1789
+ "recoverable": recoverable,
1613
1790
  "status": "stopped",
1614
1791
  "summary": summary,
1615
1792
  "created_at": utc_now(),
@@ -1620,11 +1797,53 @@ class QuestService:
1620
1797
  "quest_id": quest_root.name,
1621
1798
  "previous_status": previous_status,
1622
1799
  "abandoned_run_id": active_run_id or None,
1800
+ "last_transition_at": last_transition_at,
1801
+ "recoverable": recoverable,
1623
1802
  "status": "stopped",
1624
1803
  }
1625
1804
  )
1626
1805
  return reconciled
1627
1806
 
1807
+ @staticmethod
1808
+ def _parse_runtime_timestamp(value: Any) -> datetime | None:
1809
+ normalized = str(value or "").strip()
1810
+ if not normalized:
1811
+ return None
1812
+ candidate = normalized.replace("Z", "+00:00")
1813
+ try:
1814
+ parsed = datetime.fromisoformat(candidate)
1815
+ except ValueError:
1816
+ return None
1817
+ if parsed.tzinfo is None:
1818
+ parsed = parsed.replace(tzinfo=UTC)
1819
+ return parsed.astimezone(UTC)
1820
+
1821
+ def _runtime_recovery_timestamp(self, runtime_state: dict[str, Any], quest_data: dict[str, Any]) -> str | None:
1822
+ for candidate in (
1823
+ runtime_state.get("last_transition_at"),
1824
+ quest_data.get("updated_at"),
1825
+ quest_data.get("created_at"),
1826
+ ):
1827
+ parsed = self._parse_runtime_timestamp(candidate)
1828
+ if parsed is None:
1829
+ continue
1830
+ return parsed.isoformat()
1831
+ return None
1832
+
1833
+ def _runtime_recovery_eligible(
1834
+ self,
1835
+ *,
1836
+ previous_status: str,
1837
+ active_run_id: str | None,
1838
+ last_transition_at: str | None,
1839
+ ) -> bool:
1840
+ if previous_status != "running" and not str(active_run_id or "").strip():
1841
+ return False
1842
+ parsed = self._parse_runtime_timestamp(last_transition_at)
1843
+ if parsed is None:
1844
+ return False
1845
+ return datetime.now(UTC) - parsed <= _CRASH_AUTO_RESUME_WINDOW
1846
+
1628
1847
  def history(self, quest_id: str, limit: int = 100) -> list[dict]:
1629
1848
  return self._read_cached_jsonl(self._quest_root(quest_id) / ".ds" / "conversations" / "main.jsonl")[-limit:]
1630
1849
 
@@ -1730,40 +1949,37 @@ class QuestService:
1730
1949
  limit: int = 200,
1731
1950
  tail: bool = False,
1732
1951
  ) -> dict:
1733
- records = self._read_cached_jsonl(self._quest_root(quest_id) / ".ds" / "events.jsonl")
1952
+ event_path = self._quest_root(quest_id) / ".ds" / "events.jsonl"
1734
1953
  normalized_limit = max(limit, 0)
1735
1954
  direction = "after"
1736
1955
  if before is not None:
1737
1956
  direction = "before"
1738
- end = max(int(before) - 1, 0)
1739
- start = max(end - normalized_limit, 0)
1740
- sliced = records[start:end]
1741
1957
  elif tail and normalized_limit > 0:
1742
1958
  direction = "tail"
1743
- start = max(len(records) - normalized_limit, 0)
1744
- sliced = records[start : start + normalized_limit]
1745
- else:
1746
- start = max(after, 0)
1747
- sliced = records[start : start + normalized_limit]
1959
+ sliced_records, total_records, has_more = self._read_jsonl_cursor_slice(
1960
+ event_path,
1961
+ after=after,
1962
+ before=before,
1963
+ limit=normalized_limit,
1964
+ tail=tail,
1965
+ )
1748
1966
  enriched = []
1749
- for index, item in enumerate(sliced, start=start + 1):
1967
+ for cursor, item in sliced_records:
1750
1968
  enriched.append(
1751
1969
  {
1752
- "cursor": index,
1753
- "event_id": item.get("event_id") or f"evt-{quest_id}-{index}",
1970
+ "cursor": cursor,
1971
+ "event_id": item.get("event_id") or f"evt-{quest_id}-{cursor}",
1754
1972
  **item,
1755
1973
  }
1756
1974
  )
1757
1975
  if before is not None:
1758
- next_cursor = start + len(sliced)
1976
+ next_cursor = enriched[-1]["cursor"] if enriched else max(min(int(before or 0) - 1, total_records), 0)
1977
+ elif tail:
1978
+ next_cursor = total_records
1759
1979
  else:
1760
- next_cursor = len(records) if tail else start + len(sliced)
1980
+ next_cursor = enriched[-1]["cursor"] if enriched else max(int(after or 0), 0)
1761
1981
  oldest_cursor = enriched[0]["cursor"] if enriched else None
1762
1982
  newest_cursor = enriched[-1]["cursor"] if enriched else None
1763
- if before is not None:
1764
- has_more = start > 0
1765
- else:
1766
- has_more = start > 0 if tail else next_cursor < len(records)
1767
1983
  return {
1768
1984
  "quest_id": quest_id,
1769
1985
  "cursor": next_cursor,
@@ -3705,7 +3921,7 @@ def _parse_codex_history(history_root: Path, *, quest_id: str, run_id: str, skil
3705
3921
  entries: list[dict] = []
3706
3922
  known_tool_names: dict[str, str] = {}
3707
3923
 
3708
- for raw in read_jsonl(history_path):
3924
+ for raw in read_jsonl_tail(history_path, _CODEX_HISTORY_TAIL_LIMIT):
3709
3925
  timestamp = raw.get("timestamp")
3710
3926
  event = raw.get("event")
3711
3927
  if not isinstance(event, dict):