@researai/deepscientist 1.5.9 → 1.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/README.md +112 -99
  2. package/assets/branding/connector-qq.png +0 -0
  3. package/assets/branding/connector-rokid.png +0 -0
  4. package/assets/branding/connector-weixin.png +0 -0
  5. package/assets/branding/projects.png +0 -0
  6. package/bin/ds.js +519 -63
  7. package/docs/assets/branding/projects.png +0 -0
  8. package/docs/en/00_QUICK_START.md +338 -68
  9. package/docs/en/01_SETTINGS_REFERENCE.md +14 -0
  10. package/docs/en/02_START_RESEARCH_GUIDE.md +180 -4
  11. package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +62 -179
  12. package/docs/en/09_DOCTOR.md +66 -5
  13. package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +137 -0
  14. package/docs/en/11_LICENSE_AND_RISK.md +256 -0
  15. package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +446 -0
  16. package/docs/en/13_CORE_ARCHITECTURE_GUIDE.md +297 -0
  17. package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
  18. package/docs/en/15_CODEX_PROVIDER_SETUP.md +284 -0
  19. package/docs/en/99_ACKNOWLEDGEMENTS.md +4 -1
  20. package/docs/en/README.md +83 -0
  21. package/docs/images/lingzhu/rokid-agent-platform-create.png +0 -0
  22. package/docs/images/weixin/weixin-plugin-entry.png +0 -0
  23. package/docs/images/weixin/weixin-plugin-entry.svg +33 -0
  24. package/docs/images/weixin/weixin-qr-confirm.svg +30 -0
  25. package/docs/images/weixin/weixin-quest-media-flow.svg +44 -0
  26. package/docs/images/weixin/weixin-settings-bind.svg +57 -0
  27. package/docs/zh/00_QUICK_START.md +345 -72
  28. package/docs/zh/01_SETTINGS_REFERENCE.md +14 -0
  29. package/docs/zh/02_START_RESEARCH_GUIDE.md +181 -3
  30. package/docs/zh/04_LINGZHU_CONNECTOR_GUIDE.md +62 -193
  31. package/docs/zh/09_DOCTOR.md +68 -5
  32. package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +144 -0
  33. package/docs/zh/11_LICENSE_AND_RISK.md +256 -0
  34. package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +442 -0
  35. package/docs/zh/13_CORE_ARCHITECTURE_GUIDE.md +296 -0
  36. package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
  37. package/docs/zh/15_CODEX_PROVIDER_SETUP.md +285 -0
  38. package/docs/zh/99_ACKNOWLEDGEMENTS.md +4 -1
  39. package/docs/zh/README.md +129 -0
  40. package/install.sh +0 -34
  41. package/package.json +2 -2
  42. package/pyproject.toml +1 -1
  43. package/src/deepscientist/__init__.py +1 -1
  44. package/src/deepscientist/annotations.py +343 -0
  45. package/src/deepscientist/artifact/arxiv.py +484 -37
  46. package/src/deepscientist/artifact/service.py +574 -108
  47. package/src/deepscientist/arxiv_library.py +275 -0
  48. package/src/deepscientist/bash_exec/monitor.py +7 -5
  49. package/src/deepscientist/bash_exec/service.py +93 -21
  50. package/src/deepscientist/bridges/builtins.py +2 -0
  51. package/src/deepscientist/bridges/connectors.py +447 -0
  52. package/src/deepscientist/channels/__init__.py +2 -0
  53. package/src/deepscientist/channels/builtins.py +3 -1
  54. package/src/deepscientist/channels/local.py +3 -3
  55. package/src/deepscientist/channels/qq.py +8 -8
  56. package/src/deepscientist/channels/qq_gateway.py +1 -1
  57. package/src/deepscientist/channels/relay.py +14 -8
  58. package/src/deepscientist/channels/weixin.py +59 -0
  59. package/src/deepscientist/channels/weixin_ilink.py +388 -0
  60. package/src/deepscientist/config/models.py +23 -2
  61. package/src/deepscientist/config/service.py +539 -67
  62. package/src/deepscientist/connector/__init__.py +4 -0
  63. package/src/deepscientist/connector/connector_profiles.py +481 -0
  64. package/src/deepscientist/connector/lingzhu_support.py +668 -0
  65. package/src/deepscientist/connector/qq_profiles.py +206 -0
  66. package/src/deepscientist/connector/weixin_support.py +663 -0
  67. package/src/deepscientist/connector_profiles.py +1 -374
  68. package/src/deepscientist/connector_runtime.py +2 -0
  69. package/src/deepscientist/daemon/api/handlers.py +165 -5
  70. package/src/deepscientist/daemon/api/router.py +13 -1
  71. package/src/deepscientist/daemon/app.py +1444 -67
  72. package/src/deepscientist/doctor.py +4 -5
  73. package/src/deepscientist/gitops/diff.py +120 -29
  74. package/src/deepscientist/lingzhu_support.py +1 -182
  75. package/src/deepscientist/mcp/server.py +135 -7
  76. package/src/deepscientist/prompts/builder.py +128 -11
  77. package/src/deepscientist/qq_profiles.py +1 -196
  78. package/src/deepscientist/quest/node_traces.py +23 -0
  79. package/src/deepscientist/quest/service.py +359 -74
  80. package/src/deepscientist/quest/stage_views.py +71 -5
  81. package/src/deepscientist/runners/codex.py +170 -19
  82. package/src/deepscientist/runners/runtime_overrides.py +6 -0
  83. package/src/deepscientist/shared.py +33 -14
  84. package/src/deepscientist/weixin_support.py +1 -0
  85. package/src/prompts/connectors/lingzhu.md +3 -1
  86. package/src/prompts/connectors/qq.md +2 -1
  87. package/src/prompts/connectors/weixin.md +231 -0
  88. package/src/prompts/contracts/shared_interaction.md +4 -1
  89. package/src/prompts/system.md +61 -9
  90. package/src/skills/analysis-campaign/SKILL.md +46 -6
  91. package/src/skills/analysis-campaign/references/campaign-plan-template.md +21 -8
  92. package/src/skills/baseline/SKILL.md +1 -1
  93. package/src/skills/decision/SKILL.md +1 -1
  94. package/src/skills/experiment/SKILL.md +1 -1
  95. package/src/skills/finalize/SKILL.md +1 -1
  96. package/src/skills/idea/SKILL.md +1 -1
  97. package/src/skills/intake-audit/SKILL.md +1 -1
  98. package/src/skills/rebuttal/SKILL.md +74 -1
  99. package/src/skills/rebuttal/references/response-letter-template.md +55 -11
  100. package/src/skills/review/SKILL.md +118 -1
  101. package/src/skills/review/references/experiment-todo-template.md +23 -0
  102. package/src/skills/review/references/review-report-template.md +16 -0
  103. package/src/skills/review/references/revision-log-template.md +4 -0
  104. package/src/skills/scout/SKILL.md +1 -1
  105. package/src/skills/write/SKILL.md +168 -7
  106. package/src/skills/write/references/paper-experiment-matrix-template.md +131 -0
  107. package/src/tui/package.json +1 -1
  108. package/src/ui/dist/assets/{AiManusChatView-BKZ103sn.js → AiManusChatView-CnJcXynW.js} +156 -48
  109. package/src/ui/dist/assets/{AnalysisPlugin-mTTzGAlK.js → AnalysisPlugin-DeyzPEhV.js} +1 -1
  110. package/src/ui/dist/assets/{CliPlugin-BH58n3GY.js → CliPlugin-CB1YODQn.js} +164 -9
  111. package/src/ui/dist/assets/{CodeEditorPlugin-BKGRUH7e.js → CodeEditorPlugin-B-xicq1e.js} +8 -8
  112. package/src/ui/dist/assets/{CodeViewerPlugin-BMADwFWJ.js → CodeViewerPlugin-DT54ysXa.js} +5 -5
  113. package/src/ui/dist/assets/{DocViewerPlugin-ZOnTIHLN.js → DocViewerPlugin-DQtKT-VD.js} +3 -3
  114. package/src/ui/dist/assets/{GitDiffViewerPlugin-CQ7h1Djm.js → GitDiffViewerPlugin-hqHbCfnv.js} +20 -21
  115. package/src/ui/dist/assets/{ImageViewerPlugin-GVS5MsnC.js → ImageViewerPlugin-OcVo33jV.js} +5 -5
  116. package/src/ui/dist/assets/{LabCopilotPanel-BZNv1JML.js → LabCopilotPanel-DdGwhEUV.js} +11 -11
  117. package/src/ui/dist/assets/{LabPlugin-TWcJsdQA.js → LabPlugin-Ciz1gDaX.js} +2 -1
  118. package/src/ui/dist/assets/{LatexPlugin-DIjHiR2x.js → LatexPlugin-BhmjNQRC.js} +37 -11
  119. package/src/ui/dist/assets/{MarkdownViewerPlugin-D3ooGAH0.js → MarkdownViewerPlugin-BzdVH9Bx.js} +4 -4
  120. package/src/ui/dist/assets/{MarketplacePlugin-DfVfE9hN.js → MarketplacePlugin-DmyHspXt.js} +3 -3
  121. package/src/ui/dist/assets/{NotebookEditor-DDl0_Mc0.js → NotebookEditor-BMXKrDRk.js} +1 -1
  122. package/src/ui/dist/assets/{NotebookEditor-s8JhzuX1.js → NotebookEditor-BTVYRGkm.js} +12 -12
  123. package/src/ui/dist/assets/{PdfLoader-C2Sf6SJM.js → PdfLoader-CvcjJHXv.js} +14 -7
  124. package/src/ui/dist/assets/{PdfMarkdownPlugin-CXFLoIsa.js → PdfMarkdownPlugin-DW2ej8Vk.js} +73 -6
  125. package/src/ui/dist/assets/{PdfViewerPlugin-BYTmz2fK.js → PdfViewerPlugin-CmlDxbhU.js} +103 -34
  126. package/src/ui/dist/assets/PdfViewerPlugin-DQ11QcSf.css +3627 -0
  127. package/src/ui/dist/assets/{SearchPlugin-CjWBI1O9.js → SearchPlugin-DAjQZPSv.js} +1 -1
  128. package/src/ui/dist/assets/{TextViewerPlugin-DdOBU3-S.js → TextViewerPlugin-C-nVAZb_.js} +5 -4
  129. package/src/ui/dist/assets/{VNCViewer-B8HGgLwQ.js → VNCViewer-D7-dIYon.js} +10 -10
  130. package/src/ui/dist/assets/bot-C_G4WtNI.js +21 -0
  131. package/src/ui/dist/assets/branding/logo-rokid.png +0 -0
  132. package/src/ui/dist/assets/browser-BAcuE0Xj.js +2895 -0
  133. package/src/ui/dist/assets/{code-BWAY76JP.js → code-Cd7WfiWq.js} +1 -1
  134. package/src/ui/dist/assets/{file-content-C1NwU5oQ.js → file-content-B57zsL9y.js} +1 -1
  135. package/src/ui/dist/assets/{file-diff-panel-CywslwB9.js → file-diff-panel-DVoheLFq.js} +1 -1
  136. package/src/ui/dist/assets/{file-socket-B4kzuOBQ.js → file-socket-B5kXFxZP.js} +1 -1
  137. package/src/ui/dist/assets/{image-D-NZM-6P.js → image-LLOjkMHF.js} +1 -1
  138. package/src/ui/dist/assets/{index-DGIYDuTv.css → index-BQG-1s2o.css} +40 -13
  139. package/src/ui/dist/assets/{index-DHZJ_0TI.js → index-C3r2iGrp.js} +12 -12
  140. package/src/ui/dist/assets/{index-7Chr1g9c.js → index-CLQauncb.js} +15050 -9561
  141. package/src/ui/dist/assets/index-Dxa2eYMY.js +25 -0
  142. package/src/ui/dist/assets/{index-BdM1Gqfr.js → index-hOUOWbW2.js} +2 -2
  143. package/src/ui/dist/assets/{monaco-Cb2uKKe6.js → monaco-BGGAEii3.js} +1 -1
  144. package/src/ui/dist/assets/{pdf-effect-queue-DSw_D3RV.js → pdf-effect-queue-DlEr1_y5.js} +16 -1
  145. package/src/ui/dist/assets/pdf.worker.min-yatZIOMy.mjs +21 -0
  146. package/src/ui/dist/assets/{popover-Bg72DGgT.js → popover-CWJbJuYY.js} +1 -1
  147. package/src/ui/dist/assets/{project-sync-Ce_0BglY.js → project-sync-CRJiucYO.js} +18 -77
  148. package/src/ui/dist/assets/select-CoHB7pvH.js +1690 -0
  149. package/src/ui/dist/assets/{sigma-DPaACDrh.js → sigma-D5aJWR8J.js} +1 -1
  150. package/src/ui/dist/assets/{index-CDxNdQdz.js → square-check-big-DUK_mnkS.js} +2 -13
  151. package/src/ui/dist/assets/{trash-BvTgE5__.js → trash-ChU3SEE3.js} +1 -1
  152. package/src/ui/dist/assets/{useCliAccess-CgPeMOwP.js → useCliAccess-BrJBV3tY.js} +1 -1
  153. package/src/ui/dist/assets/{useFileDiffOverlay-xPhz7P5B.js → useFileDiffOverlay-C2OQaVWc.js} +1 -1
  154. package/src/ui/dist/assets/{wrap-text-C3Un3YQr.js → wrap-text-C7Qqh-om.js} +1 -1
  155. package/src/ui/dist/assets/{zoom-out-BgxLa0Ri.js → zoom-out-rtX0FKya.js} +1 -1
  156. package/src/ui/dist/index.html +2 -2
  157. package/src/ui/dist/assets/AutoFigurePlugin-BGxN8Umr.css +0 -3056
  158. package/src/ui/dist/assets/AutoFigurePlugin-C_wWw4AP.js +0 -8149
  159. package/src/ui/dist/assets/PdfViewerPlugin-BJXtIwj_.css +0 -260
  160. package/src/ui/dist/assets/Stepper-B0Dd8CxK.js +0 -158
  161. package/src/ui/dist/assets/bibtex-CKaefIN2.js +0 -189
  162. package/src/ui/dist/assets/file-utils-H2fjA46S.js +0 -109
  163. package/src/ui/dist/assets/message-square-BzjLiXir.js +0 -16
  164. package/src/ui/dist/assets/pdfjs-DU1YE8WO.js +0 -3
  165. package/src/ui/dist/assets/tooltip-C_mA6R0w.js +0 -108
@@ -0,0 +1,275 @@
1
+ from __future__ import annotations
2
+
3
+ import threading
4
+ from pathlib import Path
5
+ from typing import Any
6
+ from urllib.request import Request, urlopen
7
+
8
+ from .artifact.arxiv import USER_AGENT, normalize_arxiv_id
9
+ from .shared import ensure_dir, read_json, utc_now, write_json
10
+
11
+
12
+ class ArxivLibraryService:
13
+ _SCHEMA_VERSION = 2
14
+
15
+ def __init__(self) -> None:
16
+ self._manifest_lock = threading.Lock()
17
+ self._download_lock = threading.Lock()
18
+ self._inflight_downloads: set[tuple[str, str]] = set()
19
+
20
+ @staticmethod
21
+ def _root(quest_root: Path) -> Path:
22
+ return quest_root / "literature" / "arxiv"
23
+
24
+ @classmethod
25
+ def _index_path(cls, quest_root: Path) -> Path:
26
+ return cls._root(quest_root) / "index.json"
27
+
28
+ @classmethod
29
+ def _pdf_dir(cls, quest_root: Path) -> Path:
30
+ return cls._root(quest_root) / "pdfs"
31
+
32
+ @staticmethod
33
+ def _pdf_file_name(arxiv_id: str) -> str:
34
+ return f"{arxiv_id}.pdf"
35
+
36
+ @classmethod
37
+ def pdf_relative_path(cls, arxiv_id: str) -> str:
38
+ return f"literature/arxiv/pdfs/{cls._pdf_file_name(arxiv_id)}"
39
+
40
+ @classmethod
41
+ def pdf_path(cls, quest_root: Path, arxiv_id: str) -> Path:
42
+ return cls._pdf_dir(quest_root) / cls._pdf_file_name(arxiv_id)
43
+
44
+ @classmethod
45
+ def _empty_payload(cls) -> dict[str, Any]:
46
+ return {
47
+ "schema_version": cls._SCHEMA_VERSION,
48
+ "updated_at": utc_now(),
49
+ "items": [],
50
+ }
51
+
52
+ def load_manifest(self, quest_root: Path) -> dict[str, Any]:
53
+ path = self._index_path(quest_root)
54
+ payload = read_json(path, default=None)
55
+ if not isinstance(payload, dict):
56
+ payload = self._empty_payload()
57
+ items = payload.get("items")
58
+ if not isinstance(items, list):
59
+ payload["items"] = []
60
+ payload["schema_version"] = self._SCHEMA_VERSION
61
+ payload["updated_at"] = str(payload.get("updated_at") or utc_now())
62
+ return payload
63
+
64
+ def save_manifest(self, quest_root: Path, payload: dict[str, Any]) -> dict[str, Any]:
65
+ normalized = dict(payload or {})
66
+ normalized["schema_version"] = self._SCHEMA_VERSION
67
+ normalized["updated_at"] = utc_now()
68
+ if not isinstance(normalized.get("items"), list):
69
+ normalized["items"] = []
70
+ ensure_dir(self._root(quest_root))
71
+ write_json(self._index_path(quest_root), normalized)
72
+ return normalized
73
+
74
+ @staticmethod
75
+ def _normalize_item(item: dict[str, Any]) -> dict[str, Any]:
76
+ normalized = dict(item or {})
77
+ normalized["arxiv_id"] = str(normalized.get("arxiv_id") or "").strip()
78
+ normalized["status"] = str(normalized.get("status") or "processing").strip() or "processing"
79
+ metadata_status = str(normalized.get("metadata_status") or "").strip()
80
+ if not metadata_status:
81
+ metadata_status = "ready" if str(normalized.get("metadata_source") or "").strip() else ""
82
+ normalized["metadata_status"] = metadata_status or None
83
+ normalized["title"] = str(normalized.get("title") or normalized.get("display_name") or normalized["arxiv_id"]).strip()
84
+ normalized["display_name"] = str(
85
+ normalized.get("display_name") or normalized.get("title") or normalized["arxiv_id"]
86
+ ).strip()
87
+ normalized["abstract"] = str(normalized.get("abstract") or "").strip()
88
+ normalized["overview"] = str(normalized.get("overview") or "").strip()
89
+ normalized["overview_markdown"] = str(normalized.get("overview_markdown") or "").strip()
90
+ normalized["summary_source"] = str(normalized.get("summary_source") or "").strip() or None
91
+ normalized["overview_source"] = str(normalized.get("overview_source") or "").strip() or None
92
+ normalized["metadata_source"] = str(normalized.get("metadata_source") or "").strip() or None
93
+ normalized["published_at"] = str(normalized.get("published_at") or "").strip()
94
+ normalized["primary_class"] = str(normalized.get("primary_class") or "").strip()
95
+ bibtex = str(normalized.get("bibtex") or "").strip()
96
+ normalized["bibtex"] = bibtex or None
97
+ normalized["abs_url"] = str(normalized.get("abs_url") or "").strip() or None
98
+ normalized["pdf_url"] = str(normalized.get("pdf_url") or "").strip() or None
99
+ normalized["created_at"] = str(normalized.get("created_at") or utc_now()).strip()
100
+ normalized["updated_at"] = str(normalized.get("updated_at") or utc_now()).strip()
101
+ normalized["authors"] = [str(item).strip() for item in (normalized.get("authors") or []) if str(item).strip()]
102
+ normalized["categories"] = [str(item).strip() for item in (normalized.get("categories") or []) if str(item).strip()]
103
+ normalized["tags"] = [str(item).strip() for item in (normalized.get("tags") or []) if str(item).strip()]
104
+ version = normalized.get("version")
105
+ normalized["version"] = int(version) if isinstance(version, int) or str(version).isdigit() else None
106
+ normalized["pdf_rel_path"] = str(normalized.get("pdf_rel_path") or "").strip() or None
107
+ normalized["error"] = str(normalized.get("error") or "").strip() or None
108
+ return normalized
109
+
110
+ def get_item(self, quest_root: Path, arxiv_id: str) -> dict[str, Any] | None:
111
+ normalized_id = normalize_arxiv_id(arxiv_id)
112
+ if not normalized_id:
113
+ return None
114
+ payload = self.load_manifest(quest_root)
115
+ for raw_item in payload.get("items") or []:
116
+ if str(raw_item.get("arxiv_id") or "").strip() == normalized_id:
117
+ return self._materialize_item(quest_root, self._normalize_item(dict(raw_item)))
118
+ return None
119
+
120
+ def list_items(self, quest_root: Path) -> list[dict[str, Any]]:
121
+ payload = self.load_manifest(quest_root)
122
+ items = [
123
+ self._materialize_item(quest_root, self._normalize_item(dict(item)))
124
+ for item in payload.get("items") or []
125
+ if str(item.get("arxiv_id") or "").strip()
126
+ ]
127
+ return sorted(items, key=lambda item: str(item.get("updated_at") or ""), reverse=True)
128
+
129
+ def upsert_item(self, quest_root: Path, item: dict[str, Any]) -> dict[str, Any]:
130
+ normalized = self._normalize_item(item)
131
+ if not normalized["arxiv_id"]:
132
+ raise ValueError("`arxiv_id` is required.")
133
+ with self._manifest_lock:
134
+ payload = self.load_manifest(quest_root)
135
+ items = [dict(existing) for existing in (payload.get("items") or []) if isinstance(existing, dict)]
136
+ updated = False
137
+ for index, existing in enumerate(items):
138
+ if str(existing.get("arxiv_id") or "").strip() != normalized["arxiv_id"]:
139
+ continue
140
+ merged = {**existing, **normalized, "updated_at": utc_now()}
141
+ if not existing.get("created_at"):
142
+ merged["created_at"] = normalized["created_at"]
143
+ items[index] = merged
144
+ updated = True
145
+ break
146
+ if not updated:
147
+ items.append({**normalized, "created_at": utc_now(), "updated_at": utc_now()})
148
+ payload["items"] = items
149
+ self.save_manifest(quest_root, payload)
150
+ return self.get_item(quest_root, normalized["arxiv_id"]) or normalized
151
+
152
+ def mark_processing(self, quest_root: Path, arxiv_id: str, *, display_name: str | None = None) -> dict[str, Any]:
153
+ normalized_id = normalize_arxiv_id(arxiv_id)
154
+ if not normalized_id:
155
+ raise ValueError("Invalid arXiv id.")
156
+ current = self.get_item(quest_root, normalized_id) or {}
157
+ return self.upsert_item(
158
+ quest_root,
159
+ {
160
+ **current,
161
+ "arxiv_id": normalized_id,
162
+ "display_name": display_name or current.get("display_name") or normalized_id,
163
+ "status": "processing",
164
+ "pdf_rel_path": self.pdf_relative_path(normalized_id),
165
+ "error": None,
166
+ },
167
+ )
168
+
169
+ def mark_failed(self, quest_root: Path, arxiv_id: str, *, error: str) -> dict[str, Any]:
170
+ normalized_id = normalize_arxiv_id(arxiv_id)
171
+ if not normalized_id:
172
+ raise ValueError("Invalid arXiv id.")
173
+ current = self.get_item(quest_root, normalized_id) or {}
174
+ return self.upsert_item(
175
+ quest_root,
176
+ {
177
+ **current,
178
+ "arxiv_id": normalized_id,
179
+ "status": "failed",
180
+ "error": error,
181
+ "pdf_rel_path": current.get("pdf_rel_path") or self.pdf_relative_path(normalized_id),
182
+ },
183
+ )
184
+
185
+ def mark_ready(self, quest_root: Path, arxiv_id: str) -> dict[str, Any]:
186
+ normalized_id = normalize_arxiv_id(arxiv_id)
187
+ if not normalized_id:
188
+ raise ValueError("Invalid arXiv id.")
189
+ current = self.get_item(quest_root, normalized_id) or {}
190
+ return self.upsert_item(
191
+ quest_root,
192
+ {
193
+ **current,
194
+ "arxiv_id": normalized_id,
195
+ "status": "ready",
196
+ "error": None,
197
+ "pdf_rel_path": current.get("pdf_rel_path") or self.pdf_relative_path(normalized_id),
198
+ },
199
+ )
200
+
201
+ def _materialize_item(self, quest_root: Path, item: dict[str, Any]) -> dict[str, Any]:
202
+ normalized = self._normalize_item(item)
203
+ arxiv_id = normalized["arxiv_id"]
204
+ pdf_rel_path = normalized.get("pdf_rel_path") or self.pdf_relative_path(arxiv_id)
205
+ normalized["pdf_rel_path"] = pdf_rel_path
206
+ pdf_path = quest_root / pdf_rel_path
207
+ if pdf_path.exists() and pdf_path.is_file():
208
+ relative = pdf_path.relative_to(quest_root).as_posix()
209
+ normalized["path"] = relative
210
+ normalized["document_id"] = f"questpath::{relative}"
211
+ else:
212
+ normalized["path"] = None
213
+ normalized["document_id"] = None
214
+ return normalized
215
+
216
+ def queue_pdf_download(self, quest_root: Path, arxiv_id: str, *, pdf_url: str | None = None) -> bool:
217
+ normalized_id = normalize_arxiv_id(arxiv_id)
218
+ if not normalized_id:
219
+ return False
220
+ target_path = self.pdf_path(quest_root, normalized_id)
221
+ if target_path.exists() and target_path.is_file():
222
+ self.mark_ready(quest_root, normalized_id)
223
+ return False
224
+ target_url = str(pdf_url or "").strip() or f"https://arxiv.org/pdf/{normalized_id}.pdf"
225
+ inflight_key = (str(quest_root.resolve()), normalized_id)
226
+ with self._download_lock:
227
+ if inflight_key in self._inflight_downloads:
228
+ return False
229
+ self._inflight_downloads.add(inflight_key)
230
+
231
+ thread = threading.Thread(
232
+ target=self._download_pdf_worker,
233
+ kwargs={
234
+ "quest_root": quest_root,
235
+ "arxiv_id": normalized_id,
236
+ "pdf_url": target_url,
237
+ "inflight_key": inflight_key,
238
+ },
239
+ daemon=True,
240
+ name=f"deepscientist-arxiv-{normalized_id}",
241
+ )
242
+ thread.start()
243
+ return True
244
+
245
+ def _download_pdf_worker(
246
+ self,
247
+ *,
248
+ quest_root: Path,
249
+ arxiv_id: str,
250
+ pdf_url: str,
251
+ inflight_key: tuple[str, str],
252
+ ) -> None:
253
+ try:
254
+ ensure_dir(self._pdf_dir(quest_root))
255
+ target_path = self.pdf_path(quest_root, arxiv_id)
256
+ request = Request(
257
+ pdf_url,
258
+ headers={
259
+ "User-Agent": USER_AGENT,
260
+ "Accept": "application/pdf,*/*;q=0.8",
261
+ },
262
+ )
263
+ with urlopen(request, timeout=20) as response: # noqa: S310
264
+ payload = response.read()
265
+ if not payload.startswith(b"%PDF"):
266
+ raise ValueError("Downloaded payload is not a PDF.")
267
+ temp_path = target_path.with_suffix(f"{target_path.suffix}.tmp")
268
+ temp_path.write_bytes(payload)
269
+ temp_path.replace(target_path)
270
+ self.mark_ready(quest_root, arxiv_id)
271
+ except Exception as exc: # noqa: BLE001
272
+ self.mark_failed(quest_root, arxiv_id, error=str(exc).strip() or "download_failed")
273
+ finally:
274
+ with self._download_lock:
275
+ self._inflight_downloads.discard(inflight_key)
@@ -22,7 +22,7 @@ from .service import (
22
22
  _coerce_session_status,
23
23
  _parse_progress_marker,
24
24
  )
25
- from ..shared import append_jsonl, ensure_dir, read_json, read_jsonl, utc_now
25
+ from ..shared import append_jsonl, ensure_dir, iter_jsonl, read_json, read_jsonl, utc_now
26
26
 
27
27
  DEFAULT_STOP_GRACE_SECONDS = 5
28
28
  TERMINAL_IO_POLL_SECONDS = 0.02
@@ -298,7 +298,7 @@ def run_monitor(session_dir: Path) -> int:
298
298
  log_path.touch(exist_ok=True)
299
299
  input_path.touch(exist_ok=True)
300
300
  if not input_cursor_path.exists():
301
- _atomic_write_json(input_cursor_path, {"offset": len(read_jsonl(input_path)), "updated_at": utc_now()})
301
+ _atomic_write_json(input_cursor_path, {"offset": sum(1 for _ in iter_jsonl(input_path)), "updated_at": utc_now()})
302
302
 
303
303
  tool_env = os.environ.pop("DS_BASH_EXEC_TOOL_ENV", "")
304
304
  env_payload = os.environ.copy()
@@ -451,9 +451,11 @@ def run_monitor(session_dir: Path) -> int:
451
451
  if output_fd is not None and process.poll() is None:
452
452
  cursor_payload = read_json(input_cursor_path, {}) or {}
453
453
  offset = int(cursor_payload.get("offset") or 0)
454
- input_entries = read_jsonl(input_path)
455
- if offset < len(input_entries):
456
- for entry in input_entries[offset:]:
454
+ total_input_entries = sum(1 for _ in iter_jsonl(input_path))
455
+ if offset < total_input_entries:
456
+ for index, entry in enumerate(iter_jsonl(input_path)):
457
+ if index < offset:
458
+ continue
457
459
  raw_data = str(entry.get("data") or "")
458
460
  if raw_data:
459
461
  try:
@@ -11,12 +11,13 @@ import sys
11
11
  import tempfile
12
12
  import threading
13
13
  import time
14
+ from collections import deque
14
15
  from datetime import UTC, datetime
15
16
  from pathlib import Path
16
17
  from typing import Any
17
18
 
18
19
  from ..mcp.context import McpContext
19
- from ..shared import append_jsonl, ensure_dir, generate_id, read_json, read_jsonl, utc_now
20
+ from ..shared import append_jsonl, ensure_dir, generate_id, iter_jsonl, read_json, read_jsonl, read_jsonl_tail, utc_now
20
21
  from .runtime import TerminalRuntimeManager
21
22
 
22
23
  BASH_STATUS_MARKER_PREFIX = "__DS_BASH_STATUS__"
@@ -24,6 +25,9 @@ BASH_CARRIAGE_RETURN_PREFIX = "__DS_BASH_CR__"
24
25
  BASH_PROGRESS_PREFIX = "__DS_PROGRESS__"
25
26
  BASH_TERMINAL_PROMPT_PREFIX = "__DS_TERMINAL_PROMPT__"
26
27
  DEFAULT_LOG_TAIL_LIMIT = 200
28
+ DEFAULT_INLINE_BASH_LOG_LINE_LIMIT = 2000
29
+ DEFAULT_INLINE_BASH_LOG_HEAD_LINES = 500
30
+ DEFAULT_INLINE_BASH_LOG_TAIL_LINES = 1500
27
31
  DEFAULT_POLL_INTERVAL_SECONDS = 0.35
28
32
  TERMINAL_STATUSES = {"completed", "failed", "terminated"}
29
33
  DEFAULT_TERMINAL_SESSION_ID = "terminal-main"
@@ -46,6 +50,52 @@ def _atomic_write_json(path: Path, payload: Any) -> None:
46
50
  temp_path.replace(path)
47
51
 
48
52
 
53
+ def _count_jsonl_records(path: Path) -> int:
54
+ return sum(1 for _ in iter_jsonl(path))
55
+
56
+
57
+ def _build_terminal_log_preview_payload(path: Path) -> dict[str, Any]:
58
+ if not path.exists():
59
+ return {
60
+ "log": "",
61
+ "log_line_count": 0,
62
+ "log_truncated": False,
63
+ }
64
+
65
+ head_lines: list[str] = []
66
+ tail_lines: deque[str] = deque(maxlen=DEFAULT_INLINE_BASH_LOG_TAIL_LINES)
67
+ total = 0
68
+ with path.open("r", encoding="utf-8", errors="replace") as handle:
69
+ for raw_line in handle:
70
+ line = raw_line.rstrip("\n")
71
+ total += 1
72
+ if total <= DEFAULT_INLINE_BASH_LOG_HEAD_LINES:
73
+ head_lines.append(line)
74
+ tail_lines.append(line)
75
+
76
+ if total <= DEFAULT_INLINE_BASH_LOG_LINE_LIMIT:
77
+ return {
78
+ "log": "\n".join(list(tail_lines)),
79
+ "log_line_count": total,
80
+ "log_truncated": False,
81
+ }
82
+
83
+ omitted = max(0, total - DEFAULT_INLINE_BASH_LOG_HEAD_LINES - DEFAULT_INLINE_BASH_LOG_TAIL_LINES)
84
+ marker = (
85
+ "[... omitted "
86
+ f"{omitted} lines from the middle of this log. "
87
+ "Use bash_exec(mode='read', id=..., start=..., tail=...) for a specific window.]"
88
+ )
89
+ return {
90
+ "log": "\n".join(head_lines + [marker] + list(tail_lines)),
91
+ "log_line_count": total,
92
+ "log_truncated": True,
93
+ "log_preview_head_lines": DEFAULT_INLINE_BASH_LOG_HEAD_LINES,
94
+ "log_preview_tail_lines": DEFAULT_INLINE_BASH_LOG_TAIL_LINES,
95
+ "log_preview_omitted_lines": omitted,
96
+ }
97
+
98
+
49
99
  def _normalize_string(value: object) -> str:
50
100
  return str(value or "").strip()
51
101
 
@@ -67,6 +117,14 @@ def _session_sort_key(session: dict[str, Any]) -> tuple[str, str]:
67
117
  def _is_process_alive(pid: object) -> bool:
68
118
  if not isinstance(pid, int) or pid <= 0:
69
119
  return False
120
+ proc_stat_path = Path("/proc") / str(pid) / "stat"
121
+ if proc_stat_path.exists():
122
+ try:
123
+ parts = proc_stat_path.read_text(encoding="utf-8").split()
124
+ except OSError:
125
+ parts = []
126
+ if len(parts) >= 3 and parts[2] == "Z":
127
+ return False
70
128
  try:
71
129
  os.kill(pid, 0)
72
130
  except ProcessLookupError:
@@ -560,7 +618,8 @@ class BashExecService:
560
618
  if not self.meta_path(quest_root, bash_id).exists():
561
619
  raise FileNotFoundError(f"Unknown bash session `{bash_id}`.")
562
620
  deadline = time.monotonic() + 0.6
563
- entries = read_jsonl(self.log_path(quest_root, bash_id))
621
+ path = self.log_path(quest_root, bash_id)
622
+ entries = read_jsonl_tail(path, max(1, limit))
564
623
  while time.monotonic() < deadline:
565
624
  if any(str(entry.get("stream") or "") not in {"system", "prompt"} for entry in entries):
566
625
  break
@@ -572,24 +631,33 @@ class BashExecService:
572
631
  time.sleep(0.05)
573
632
  else:
574
633
  time.sleep(0.03)
575
- entries = read_jsonl(self.log_path(quest_root, bash_id))
634
+ entries = read_jsonl_tail(path, max(1, limit))
576
635
  latest_seq = int(entries[-1].get("seq") or 0) if entries else 0
577
636
  normalized_before = before_seq if isinstance(before_seq, int) and before_seq > 0 else None
578
637
  normalized_after = after_seq if isinstance(after_seq, int) and after_seq >= 0 else None
579
- if normalized_after is not None:
580
- entries = [entry for entry in entries if int(entry.get("seq") or 0) > normalized_after]
581
- if normalized_before is not None:
582
- entries = [entry for entry in entries if int(entry.get("seq") or 0) < normalized_before]
583
- selection_pool = entries
584
- if prefer_visible:
585
- visible_entries = [
586
- entry for entry in entries if str(entry.get("stream") or "") not in {"system", "prompt"}
587
- ]
588
- if visible_entries:
589
- selection_pool = visible_entries
590
638
  normalized_limit = max(1, limit)
591
- truncated = len(selection_pool) > normalized_limit
592
- selected = selection_pool[-normalized_limit:]
639
+ selection_pool: deque[dict[str, Any]] = deque(maxlen=normalized_limit)
640
+ visible_pool: deque[dict[str, Any]] = deque(maxlen=normalized_limit)
641
+ total_filtered = 0
642
+ for entry in iter_jsonl(path):
643
+ seq = int(entry.get("seq") or 0)
644
+ latest_seq = max(latest_seq, seq)
645
+ if normalized_after is not None and seq <= normalized_after:
646
+ continue
647
+ if normalized_before is not None and seq >= normalized_before:
648
+ continue
649
+ total_filtered += 1
650
+ selection_pool.append(entry)
651
+ if str(entry.get("stream") or "") not in {"system", "prompt"}:
652
+ visible_pool.append(entry)
653
+ selected_source: list[dict[str, Any]]
654
+ if prefer_visible and visible_pool:
655
+ selected_source = list(visible_pool)
656
+ truncated = total_filtered > len(visible_pool)
657
+ else:
658
+ selected_source = list(selection_pool)
659
+ truncated = total_filtered > len(selection_pool)
660
+ selected = selected_source[-normalized_limit:]
593
661
  if order == "desc":
594
662
  selected = list(reversed(selected))
595
663
  tail_start_seq = int(selected[0].get("seq") or 0) if selected else None
@@ -860,7 +928,7 @@ class BashExecService:
860
928
  "last_input_at": None,
861
929
  "last_prompt_at": None,
862
930
  "last_command": None,
863
- "history_count": len(read_jsonl(self.history_path(quest_root, bash_id))),
931
+ "history_count": _count_jsonl_records(self.history_path(quest_root, bash_id)),
864
932
  }
865
933
 
866
934
  def ensure_terminal_session(
@@ -910,7 +978,7 @@ class BashExecService:
910
978
  self.prompt_events_path(resolved_quest_root, bash_id).touch()
911
979
  _atomic_write_json(
912
980
  self.input_cursor_path(resolved_quest_root, bash_id),
913
- {"offset": len(read_jsonl(self.input_path(resolved_quest_root, bash_id))), "updated_at": utc_now()},
981
+ {"offset": _count_jsonl_records(self.input_path(resolved_quest_root, bash_id)), "updated_at": utc_now()},
914
982
  )
915
983
  _atomic_write_json(
916
984
  self.line_buffer_path(resolved_quest_root, bash_id),
@@ -1064,7 +1132,7 @@ class BashExecService:
1064
1132
  append_jsonl(self.history_path(quest_root, bash_id), item)
1065
1133
  meta = read_json(self.meta_path(quest_root, bash_id), {})
1066
1134
  meta["last_command"] = completed[-1]["command"]
1067
- meta["history_count"] = len(read_jsonl(self.history_path(quest_root, bash_id)))
1135
+ meta["history_count"] = _count_jsonl_records(self.history_path(quest_root, bash_id))
1068
1136
  meta["updated_at"] = utc_now()
1069
1137
  meta["last_input_at"] = utc_now()
1070
1138
  self._write_meta(quest_root, bash_id, meta)
@@ -1130,7 +1198,7 @@ class BashExecService:
1130
1198
  before_seq=None,
1131
1199
  order="asc",
1132
1200
  )
1133
- history = read_jsonl(self.history_path(quest_root, bash_id))
1201
+ history = read_jsonl_tail(self.history_path(quest_root, bash_id), max(1, command_limit))
1134
1202
  latest_commands = [
1135
1203
  {
1136
1204
  "command_id": item.get("command_id"),
@@ -1181,6 +1249,7 @@ class BashExecService:
1181
1249
  "label": session.get("label"),
1182
1250
  "command": session.get("command"),
1183
1251
  "workdir": session.get("workdir"),
1252
+ "cwd": session.get("cwd"),
1184
1253
  "started_at": session.get("started_at"),
1185
1254
  "finished_at": session.get("finished_at"),
1186
1255
  "exit_code": session.get("exit_code"),
@@ -1199,7 +1268,7 @@ class BashExecService:
1199
1268
  "watchdog_overdue": session.get("watchdog_overdue"),
1200
1269
  }
1201
1270
  if include_log:
1202
- result["log"] = self.read_terminal_log(quest_root, str(session["bash_id"]))
1271
+ result.update(self._log_preview_payload(quest_root, str(session["bash_id"])))
1203
1272
  if export_log or _normalize_string(export_log_to):
1204
1273
  cwd, _ = self.resolve_workdir(context, str(session.get("workdir") or ""))
1205
1274
  result.update(
@@ -1212,3 +1281,6 @@ class BashExecService:
1212
1281
  )
1213
1282
  )
1214
1283
  return result
1284
+
1285
+ def _log_preview_payload(self, quest_root: Path, bash_id: str) -> dict[str, Any]:
1286
+ return _build_terminal_log_preview_payload(self.terminal_log_path(quest_root, bash_id))
@@ -6,6 +6,7 @@ from .connectors import (
6
6
  QQConnectorBridge,
7
7
  SlackConnectorBridge,
8
8
  TelegramConnectorBridge,
9
+ WeixinConnectorBridge,
9
10
  WhatsAppConnectorBridge,
10
11
  )
11
12
  from .registry import register_connector_bridge
@@ -13,6 +14,7 @@ from .registry import register_connector_bridge
13
14
 
14
15
  def register_builtin_connector_bridges() -> None:
15
16
  register_connector_bridge("qq", QQConnectorBridge)
17
+ register_connector_bridge("weixin", WeixinConnectorBridge)
16
18
  register_connector_bridge("telegram", TelegramConnectorBridge)
17
19
  register_connector_bridge("discord", DiscordConnectorBridge)
18
20
  register_connector_bridge("slack", SlackConnectorBridge)