@researai/deepscientist 1.5.9 → 1.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -99
- package/assets/branding/connector-qq.png +0 -0
- package/assets/branding/connector-rokid.png +0 -0
- package/assets/branding/connector-weixin.png +0 -0
- package/assets/branding/projects.png +0 -0
- package/bin/ds.js +519 -63
- package/docs/assets/branding/projects.png +0 -0
- package/docs/en/00_QUICK_START.md +338 -68
- package/docs/en/01_SETTINGS_REFERENCE.md +14 -0
- package/docs/en/02_START_RESEARCH_GUIDE.md +180 -4
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +62 -179
- package/docs/en/09_DOCTOR.md +66 -5
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +137 -0
- package/docs/en/11_LICENSE_AND_RISK.md +256 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +446 -0
- package/docs/en/13_CORE_ARCHITECTURE_GUIDE.md +297 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +284 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +4 -1
- package/docs/en/README.md +83 -0
- package/docs/images/lingzhu/rokid-agent-platform-create.png +0 -0
- package/docs/images/weixin/weixin-plugin-entry.png +0 -0
- package/docs/images/weixin/weixin-plugin-entry.svg +33 -0
- package/docs/images/weixin/weixin-qr-confirm.svg +30 -0
- package/docs/images/weixin/weixin-quest-media-flow.svg +44 -0
- package/docs/images/weixin/weixin-settings-bind.svg +57 -0
- package/docs/zh/00_QUICK_START.md +345 -72
- package/docs/zh/01_SETTINGS_REFERENCE.md +14 -0
- package/docs/zh/02_START_RESEARCH_GUIDE.md +181 -3
- package/docs/zh/04_LINGZHU_CONNECTOR_GUIDE.md +62 -193
- package/docs/zh/09_DOCTOR.md +68 -5
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +144 -0
- package/docs/zh/11_LICENSE_AND_RISK.md +256 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +442 -0
- package/docs/zh/13_CORE_ARCHITECTURE_GUIDE.md +296 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +285 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +4 -1
- package/docs/zh/README.md +129 -0
- package/install.sh +0 -34
- package/package.json +2 -2
- package/pyproject.toml +1 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/annotations.py +343 -0
- package/src/deepscientist/artifact/arxiv.py +484 -37
- package/src/deepscientist/artifact/service.py +574 -108
- package/src/deepscientist/arxiv_library.py +275 -0
- package/src/deepscientist/bash_exec/monitor.py +7 -5
- package/src/deepscientist/bash_exec/service.py +93 -21
- package/src/deepscientist/bridges/builtins.py +2 -0
- package/src/deepscientist/bridges/connectors.py +447 -0
- package/src/deepscientist/channels/__init__.py +2 -0
- package/src/deepscientist/channels/builtins.py +3 -1
- package/src/deepscientist/channels/local.py +3 -3
- package/src/deepscientist/channels/qq.py +8 -8
- package/src/deepscientist/channels/qq_gateway.py +1 -1
- package/src/deepscientist/channels/relay.py +14 -8
- package/src/deepscientist/channels/weixin.py +59 -0
- package/src/deepscientist/channels/weixin_ilink.py +388 -0
- package/src/deepscientist/config/models.py +23 -2
- package/src/deepscientist/config/service.py +539 -67
- package/src/deepscientist/connector/__init__.py +4 -0
- package/src/deepscientist/connector/connector_profiles.py +481 -0
- package/src/deepscientist/connector/lingzhu_support.py +668 -0
- package/src/deepscientist/connector/qq_profiles.py +206 -0
- package/src/deepscientist/connector/weixin_support.py +663 -0
- package/src/deepscientist/connector_profiles.py +1 -374
- package/src/deepscientist/connector_runtime.py +2 -0
- package/src/deepscientist/daemon/api/handlers.py +165 -5
- package/src/deepscientist/daemon/api/router.py +13 -1
- package/src/deepscientist/daemon/app.py +1444 -67
- package/src/deepscientist/doctor.py +4 -5
- package/src/deepscientist/gitops/diff.py +120 -29
- package/src/deepscientist/lingzhu_support.py +1 -182
- package/src/deepscientist/mcp/server.py +135 -7
- package/src/deepscientist/prompts/builder.py +128 -11
- package/src/deepscientist/qq_profiles.py +1 -196
- package/src/deepscientist/quest/node_traces.py +23 -0
- package/src/deepscientist/quest/service.py +359 -74
- package/src/deepscientist/quest/stage_views.py +71 -5
- package/src/deepscientist/runners/codex.py +170 -19
- package/src/deepscientist/runners/runtime_overrides.py +6 -0
- package/src/deepscientist/shared.py +33 -14
- package/src/deepscientist/weixin_support.py +1 -0
- package/src/prompts/connectors/lingzhu.md +3 -1
- package/src/prompts/connectors/qq.md +2 -1
- package/src/prompts/connectors/weixin.md +231 -0
- package/src/prompts/contracts/shared_interaction.md +4 -1
- package/src/prompts/system.md +61 -9
- package/src/skills/analysis-campaign/SKILL.md +46 -6
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +21 -8
- package/src/skills/baseline/SKILL.md +1 -1
- package/src/skills/decision/SKILL.md +1 -1
- package/src/skills/experiment/SKILL.md +1 -1
- package/src/skills/finalize/SKILL.md +1 -1
- package/src/skills/idea/SKILL.md +1 -1
- package/src/skills/intake-audit/SKILL.md +1 -1
- package/src/skills/rebuttal/SKILL.md +74 -1
- package/src/skills/rebuttal/references/response-letter-template.md +55 -11
- package/src/skills/review/SKILL.md +118 -1
- package/src/skills/review/references/experiment-todo-template.md +23 -0
- package/src/skills/review/references/review-report-template.md +16 -0
- package/src/skills/review/references/revision-log-template.md +4 -0
- package/src/skills/scout/SKILL.md +1 -1
- package/src/skills/write/SKILL.md +168 -7
- package/src/skills/write/references/paper-experiment-matrix-template.md +131 -0
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AiManusChatView-BKZ103sn.js → AiManusChatView-CnJcXynW.js} +156 -48
- package/src/ui/dist/assets/{AnalysisPlugin-mTTzGAlK.js → AnalysisPlugin-DeyzPEhV.js} +1 -1
- package/src/ui/dist/assets/{CliPlugin-BH58n3GY.js → CliPlugin-CB1YODQn.js} +164 -9
- package/src/ui/dist/assets/{CodeEditorPlugin-BKGRUH7e.js → CodeEditorPlugin-B-xicq1e.js} +8 -8
- package/src/ui/dist/assets/{CodeViewerPlugin-BMADwFWJ.js → CodeViewerPlugin-DT54ysXa.js} +5 -5
- package/src/ui/dist/assets/{DocViewerPlugin-ZOnTIHLN.js → DocViewerPlugin-DQtKT-VD.js} +3 -3
- package/src/ui/dist/assets/{GitDiffViewerPlugin-CQ7h1Djm.js → GitDiffViewerPlugin-hqHbCfnv.js} +20 -21
- package/src/ui/dist/assets/{ImageViewerPlugin-GVS5MsnC.js → ImageViewerPlugin-OcVo33jV.js} +5 -5
- package/src/ui/dist/assets/{LabCopilotPanel-BZNv1JML.js → LabCopilotPanel-DdGwhEUV.js} +11 -11
- package/src/ui/dist/assets/{LabPlugin-TWcJsdQA.js → LabPlugin-Ciz1gDaX.js} +2 -1
- package/src/ui/dist/assets/{LatexPlugin-DIjHiR2x.js → LatexPlugin-BhmjNQRC.js} +37 -11
- package/src/ui/dist/assets/{MarkdownViewerPlugin-D3ooGAH0.js → MarkdownViewerPlugin-BzdVH9Bx.js} +4 -4
- package/src/ui/dist/assets/{MarketplacePlugin-DfVfE9hN.js → MarketplacePlugin-DmyHspXt.js} +3 -3
- package/src/ui/dist/assets/{NotebookEditor-DDl0_Mc0.js → NotebookEditor-BMXKrDRk.js} +1 -1
- package/src/ui/dist/assets/{NotebookEditor-s8JhzuX1.js → NotebookEditor-BTVYRGkm.js} +12 -12
- package/src/ui/dist/assets/{PdfLoader-C2Sf6SJM.js → PdfLoader-CvcjJHXv.js} +14 -7
- package/src/ui/dist/assets/{PdfMarkdownPlugin-CXFLoIsa.js → PdfMarkdownPlugin-DW2ej8Vk.js} +73 -6
- package/src/ui/dist/assets/{PdfViewerPlugin-BYTmz2fK.js → PdfViewerPlugin-CmlDxbhU.js} +103 -34
- package/src/ui/dist/assets/PdfViewerPlugin-DQ11QcSf.css +3627 -0
- package/src/ui/dist/assets/{SearchPlugin-CjWBI1O9.js → SearchPlugin-DAjQZPSv.js} +1 -1
- package/src/ui/dist/assets/{TextViewerPlugin-DdOBU3-S.js → TextViewerPlugin-C-nVAZb_.js} +5 -4
- package/src/ui/dist/assets/{VNCViewer-B8HGgLwQ.js → VNCViewer-D7-dIYon.js} +10 -10
- package/src/ui/dist/assets/bot-C_G4WtNI.js +21 -0
- package/src/ui/dist/assets/branding/logo-rokid.png +0 -0
- package/src/ui/dist/assets/browser-BAcuE0Xj.js +2895 -0
- package/src/ui/dist/assets/{code-BWAY76JP.js → code-Cd7WfiWq.js} +1 -1
- package/src/ui/dist/assets/{file-content-C1NwU5oQ.js → file-content-B57zsL9y.js} +1 -1
- package/src/ui/dist/assets/{file-diff-panel-CywslwB9.js → file-diff-panel-DVoheLFq.js} +1 -1
- package/src/ui/dist/assets/{file-socket-B4kzuOBQ.js → file-socket-B5kXFxZP.js} +1 -1
- package/src/ui/dist/assets/{image-D-NZM-6P.js → image-LLOjkMHF.js} +1 -1
- package/src/ui/dist/assets/{index-DGIYDuTv.css → index-BQG-1s2o.css} +40 -13
- package/src/ui/dist/assets/{index-DHZJ_0TI.js → index-C3r2iGrp.js} +12 -12
- package/src/ui/dist/assets/{index-7Chr1g9c.js → index-CLQauncb.js} +15050 -9561
- package/src/ui/dist/assets/index-Dxa2eYMY.js +25 -0
- package/src/ui/dist/assets/{index-BdM1Gqfr.js → index-hOUOWbW2.js} +2 -2
- package/src/ui/dist/assets/{monaco-Cb2uKKe6.js → monaco-BGGAEii3.js} +1 -1
- package/src/ui/dist/assets/{pdf-effect-queue-DSw_D3RV.js → pdf-effect-queue-DlEr1_y5.js} +16 -1
- package/src/ui/dist/assets/pdf.worker.min-yatZIOMy.mjs +21 -0
- package/src/ui/dist/assets/{popover-Bg72DGgT.js → popover-CWJbJuYY.js} +1 -1
- package/src/ui/dist/assets/{project-sync-Ce_0BglY.js → project-sync-CRJiucYO.js} +18 -77
- package/src/ui/dist/assets/select-CoHB7pvH.js +1690 -0
- package/src/ui/dist/assets/{sigma-DPaACDrh.js → sigma-D5aJWR8J.js} +1 -1
- package/src/ui/dist/assets/{index-CDxNdQdz.js → square-check-big-DUK_mnkS.js} +2 -13
- package/src/ui/dist/assets/{trash-BvTgE5__.js → trash-ChU3SEE3.js} +1 -1
- package/src/ui/dist/assets/{useCliAccess-CgPeMOwP.js → useCliAccess-BrJBV3tY.js} +1 -1
- package/src/ui/dist/assets/{useFileDiffOverlay-xPhz7P5B.js → useFileDiffOverlay-C2OQaVWc.js} +1 -1
- package/src/ui/dist/assets/{wrap-text-C3Un3YQr.js → wrap-text-C7Qqh-om.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-BgxLa0Ri.js → zoom-out-rtX0FKya.js} +1 -1
- package/src/ui/dist/index.html +2 -2
- package/src/ui/dist/assets/AutoFigurePlugin-BGxN8Umr.css +0 -3056
- package/src/ui/dist/assets/AutoFigurePlugin-C_wWw4AP.js +0 -8149
- package/src/ui/dist/assets/PdfViewerPlugin-BJXtIwj_.css +0 -260
- package/src/ui/dist/assets/Stepper-B0Dd8CxK.js +0 -158
- package/src/ui/dist/assets/bibtex-CKaefIN2.js +0 -189
- package/src/ui/dist/assets/file-utils-H2fjA46S.js +0 -109
- package/src/ui/dist/assets/message-square-BzjLiXir.js +0 -16
- package/src/ui/dist/assets/pdfjs-DU1YE8WO.js +0 -3
- package/src/ui/dist/assets/tooltip-C_mA6R0w.js +0 -108
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.request import Request, urlopen
|
|
7
|
+
|
|
8
|
+
from .artifact.arxiv import USER_AGENT, normalize_arxiv_id
|
|
9
|
+
from .shared import ensure_dir, read_json, utc_now, write_json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ArxivLibraryService:
|
|
13
|
+
_SCHEMA_VERSION = 2
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self._manifest_lock = threading.Lock()
|
|
17
|
+
self._download_lock = threading.Lock()
|
|
18
|
+
self._inflight_downloads: set[tuple[str, str]] = set()
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def _root(quest_root: Path) -> Path:
|
|
22
|
+
return quest_root / "literature" / "arxiv"
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def _index_path(cls, quest_root: Path) -> Path:
|
|
26
|
+
return cls._root(quest_root) / "index.json"
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def _pdf_dir(cls, quest_root: Path) -> Path:
|
|
30
|
+
return cls._root(quest_root) / "pdfs"
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _pdf_file_name(arxiv_id: str) -> str:
|
|
34
|
+
return f"{arxiv_id}.pdf"
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def pdf_relative_path(cls, arxiv_id: str) -> str:
|
|
38
|
+
return f"literature/arxiv/pdfs/{cls._pdf_file_name(arxiv_id)}"
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def pdf_path(cls, quest_root: Path, arxiv_id: str) -> Path:
|
|
42
|
+
return cls._pdf_dir(quest_root) / cls._pdf_file_name(arxiv_id)
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _empty_payload(cls) -> dict[str, Any]:
|
|
46
|
+
return {
|
|
47
|
+
"schema_version": cls._SCHEMA_VERSION,
|
|
48
|
+
"updated_at": utc_now(),
|
|
49
|
+
"items": [],
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def load_manifest(self, quest_root: Path) -> dict[str, Any]:
|
|
53
|
+
path = self._index_path(quest_root)
|
|
54
|
+
payload = read_json(path, default=None)
|
|
55
|
+
if not isinstance(payload, dict):
|
|
56
|
+
payload = self._empty_payload()
|
|
57
|
+
items = payload.get("items")
|
|
58
|
+
if not isinstance(items, list):
|
|
59
|
+
payload["items"] = []
|
|
60
|
+
payload["schema_version"] = self._SCHEMA_VERSION
|
|
61
|
+
payload["updated_at"] = str(payload.get("updated_at") or utc_now())
|
|
62
|
+
return payload
|
|
63
|
+
|
|
64
|
+
def save_manifest(self, quest_root: Path, payload: dict[str, Any]) -> dict[str, Any]:
|
|
65
|
+
normalized = dict(payload or {})
|
|
66
|
+
normalized["schema_version"] = self._SCHEMA_VERSION
|
|
67
|
+
normalized["updated_at"] = utc_now()
|
|
68
|
+
if not isinstance(normalized.get("items"), list):
|
|
69
|
+
normalized["items"] = []
|
|
70
|
+
ensure_dir(self._root(quest_root))
|
|
71
|
+
write_json(self._index_path(quest_root), normalized)
|
|
72
|
+
return normalized
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def _normalize_item(item: dict[str, Any]) -> dict[str, Any]:
|
|
76
|
+
normalized = dict(item or {})
|
|
77
|
+
normalized["arxiv_id"] = str(normalized.get("arxiv_id") or "").strip()
|
|
78
|
+
normalized["status"] = str(normalized.get("status") or "processing").strip() or "processing"
|
|
79
|
+
metadata_status = str(normalized.get("metadata_status") or "").strip()
|
|
80
|
+
if not metadata_status:
|
|
81
|
+
metadata_status = "ready" if str(normalized.get("metadata_source") or "").strip() else ""
|
|
82
|
+
normalized["metadata_status"] = metadata_status or None
|
|
83
|
+
normalized["title"] = str(normalized.get("title") or normalized.get("display_name") or normalized["arxiv_id"]).strip()
|
|
84
|
+
normalized["display_name"] = str(
|
|
85
|
+
normalized.get("display_name") or normalized.get("title") or normalized["arxiv_id"]
|
|
86
|
+
).strip()
|
|
87
|
+
normalized["abstract"] = str(normalized.get("abstract") or "").strip()
|
|
88
|
+
normalized["overview"] = str(normalized.get("overview") or "").strip()
|
|
89
|
+
normalized["overview_markdown"] = str(normalized.get("overview_markdown") or "").strip()
|
|
90
|
+
normalized["summary_source"] = str(normalized.get("summary_source") or "").strip() or None
|
|
91
|
+
normalized["overview_source"] = str(normalized.get("overview_source") or "").strip() or None
|
|
92
|
+
normalized["metadata_source"] = str(normalized.get("metadata_source") or "").strip() or None
|
|
93
|
+
normalized["published_at"] = str(normalized.get("published_at") or "").strip()
|
|
94
|
+
normalized["primary_class"] = str(normalized.get("primary_class") or "").strip()
|
|
95
|
+
bibtex = str(normalized.get("bibtex") or "").strip()
|
|
96
|
+
normalized["bibtex"] = bibtex or None
|
|
97
|
+
normalized["abs_url"] = str(normalized.get("abs_url") or "").strip() or None
|
|
98
|
+
normalized["pdf_url"] = str(normalized.get("pdf_url") or "").strip() or None
|
|
99
|
+
normalized["created_at"] = str(normalized.get("created_at") or utc_now()).strip()
|
|
100
|
+
normalized["updated_at"] = str(normalized.get("updated_at") or utc_now()).strip()
|
|
101
|
+
normalized["authors"] = [str(item).strip() for item in (normalized.get("authors") or []) if str(item).strip()]
|
|
102
|
+
normalized["categories"] = [str(item).strip() for item in (normalized.get("categories") or []) if str(item).strip()]
|
|
103
|
+
normalized["tags"] = [str(item).strip() for item in (normalized.get("tags") or []) if str(item).strip()]
|
|
104
|
+
version = normalized.get("version")
|
|
105
|
+
normalized["version"] = int(version) if isinstance(version, int) or str(version).isdigit() else None
|
|
106
|
+
normalized["pdf_rel_path"] = str(normalized.get("pdf_rel_path") or "").strip() or None
|
|
107
|
+
normalized["error"] = str(normalized.get("error") or "").strip() or None
|
|
108
|
+
return normalized
|
|
109
|
+
|
|
110
|
+
def get_item(self, quest_root: Path, arxiv_id: str) -> dict[str, Any] | None:
|
|
111
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
112
|
+
if not normalized_id:
|
|
113
|
+
return None
|
|
114
|
+
payload = self.load_manifest(quest_root)
|
|
115
|
+
for raw_item in payload.get("items") or []:
|
|
116
|
+
if str(raw_item.get("arxiv_id") or "").strip() == normalized_id:
|
|
117
|
+
return self._materialize_item(quest_root, self._normalize_item(dict(raw_item)))
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
def list_items(self, quest_root: Path) -> list[dict[str, Any]]:
|
|
121
|
+
payload = self.load_manifest(quest_root)
|
|
122
|
+
items = [
|
|
123
|
+
self._materialize_item(quest_root, self._normalize_item(dict(item)))
|
|
124
|
+
for item in payload.get("items") or []
|
|
125
|
+
if str(item.get("arxiv_id") or "").strip()
|
|
126
|
+
]
|
|
127
|
+
return sorted(items, key=lambda item: str(item.get("updated_at") or ""), reverse=True)
|
|
128
|
+
|
|
129
|
+
def upsert_item(self, quest_root: Path, item: dict[str, Any]) -> dict[str, Any]:
|
|
130
|
+
normalized = self._normalize_item(item)
|
|
131
|
+
if not normalized["arxiv_id"]:
|
|
132
|
+
raise ValueError("`arxiv_id` is required.")
|
|
133
|
+
with self._manifest_lock:
|
|
134
|
+
payload = self.load_manifest(quest_root)
|
|
135
|
+
items = [dict(existing) for existing in (payload.get("items") or []) if isinstance(existing, dict)]
|
|
136
|
+
updated = False
|
|
137
|
+
for index, existing in enumerate(items):
|
|
138
|
+
if str(existing.get("arxiv_id") or "").strip() != normalized["arxiv_id"]:
|
|
139
|
+
continue
|
|
140
|
+
merged = {**existing, **normalized, "updated_at": utc_now()}
|
|
141
|
+
if not existing.get("created_at"):
|
|
142
|
+
merged["created_at"] = normalized["created_at"]
|
|
143
|
+
items[index] = merged
|
|
144
|
+
updated = True
|
|
145
|
+
break
|
|
146
|
+
if not updated:
|
|
147
|
+
items.append({**normalized, "created_at": utc_now(), "updated_at": utc_now()})
|
|
148
|
+
payload["items"] = items
|
|
149
|
+
self.save_manifest(quest_root, payload)
|
|
150
|
+
return self.get_item(quest_root, normalized["arxiv_id"]) or normalized
|
|
151
|
+
|
|
152
|
+
def mark_processing(self, quest_root: Path, arxiv_id: str, *, display_name: str | None = None) -> dict[str, Any]:
|
|
153
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
154
|
+
if not normalized_id:
|
|
155
|
+
raise ValueError("Invalid arXiv id.")
|
|
156
|
+
current = self.get_item(quest_root, normalized_id) or {}
|
|
157
|
+
return self.upsert_item(
|
|
158
|
+
quest_root,
|
|
159
|
+
{
|
|
160
|
+
**current,
|
|
161
|
+
"arxiv_id": normalized_id,
|
|
162
|
+
"display_name": display_name or current.get("display_name") or normalized_id,
|
|
163
|
+
"status": "processing",
|
|
164
|
+
"pdf_rel_path": self.pdf_relative_path(normalized_id),
|
|
165
|
+
"error": None,
|
|
166
|
+
},
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def mark_failed(self, quest_root: Path, arxiv_id: str, *, error: str) -> dict[str, Any]:
|
|
170
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
171
|
+
if not normalized_id:
|
|
172
|
+
raise ValueError("Invalid arXiv id.")
|
|
173
|
+
current = self.get_item(quest_root, normalized_id) or {}
|
|
174
|
+
return self.upsert_item(
|
|
175
|
+
quest_root,
|
|
176
|
+
{
|
|
177
|
+
**current,
|
|
178
|
+
"arxiv_id": normalized_id,
|
|
179
|
+
"status": "failed",
|
|
180
|
+
"error": error,
|
|
181
|
+
"pdf_rel_path": current.get("pdf_rel_path") or self.pdf_relative_path(normalized_id),
|
|
182
|
+
},
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def mark_ready(self, quest_root: Path, arxiv_id: str) -> dict[str, Any]:
|
|
186
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
187
|
+
if not normalized_id:
|
|
188
|
+
raise ValueError("Invalid arXiv id.")
|
|
189
|
+
current = self.get_item(quest_root, normalized_id) or {}
|
|
190
|
+
return self.upsert_item(
|
|
191
|
+
quest_root,
|
|
192
|
+
{
|
|
193
|
+
**current,
|
|
194
|
+
"arxiv_id": normalized_id,
|
|
195
|
+
"status": "ready",
|
|
196
|
+
"error": None,
|
|
197
|
+
"pdf_rel_path": current.get("pdf_rel_path") or self.pdf_relative_path(normalized_id),
|
|
198
|
+
},
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def _materialize_item(self, quest_root: Path, item: dict[str, Any]) -> dict[str, Any]:
|
|
202
|
+
normalized = self._normalize_item(item)
|
|
203
|
+
arxiv_id = normalized["arxiv_id"]
|
|
204
|
+
pdf_rel_path = normalized.get("pdf_rel_path") or self.pdf_relative_path(arxiv_id)
|
|
205
|
+
normalized["pdf_rel_path"] = pdf_rel_path
|
|
206
|
+
pdf_path = quest_root / pdf_rel_path
|
|
207
|
+
if pdf_path.exists() and pdf_path.is_file():
|
|
208
|
+
relative = pdf_path.relative_to(quest_root).as_posix()
|
|
209
|
+
normalized["path"] = relative
|
|
210
|
+
normalized["document_id"] = f"questpath::{relative}"
|
|
211
|
+
else:
|
|
212
|
+
normalized["path"] = None
|
|
213
|
+
normalized["document_id"] = None
|
|
214
|
+
return normalized
|
|
215
|
+
|
|
216
|
+
def queue_pdf_download(self, quest_root: Path, arxiv_id: str, *, pdf_url: str | None = None) -> bool:
|
|
217
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
218
|
+
if not normalized_id:
|
|
219
|
+
return False
|
|
220
|
+
target_path = self.pdf_path(quest_root, normalized_id)
|
|
221
|
+
if target_path.exists() and target_path.is_file():
|
|
222
|
+
self.mark_ready(quest_root, normalized_id)
|
|
223
|
+
return False
|
|
224
|
+
target_url = str(pdf_url or "").strip() or f"https://arxiv.org/pdf/{normalized_id}.pdf"
|
|
225
|
+
inflight_key = (str(quest_root.resolve()), normalized_id)
|
|
226
|
+
with self._download_lock:
|
|
227
|
+
if inflight_key in self._inflight_downloads:
|
|
228
|
+
return False
|
|
229
|
+
self._inflight_downloads.add(inflight_key)
|
|
230
|
+
|
|
231
|
+
thread = threading.Thread(
|
|
232
|
+
target=self._download_pdf_worker,
|
|
233
|
+
kwargs={
|
|
234
|
+
"quest_root": quest_root,
|
|
235
|
+
"arxiv_id": normalized_id,
|
|
236
|
+
"pdf_url": target_url,
|
|
237
|
+
"inflight_key": inflight_key,
|
|
238
|
+
},
|
|
239
|
+
daemon=True,
|
|
240
|
+
name=f"deepscientist-arxiv-{normalized_id}",
|
|
241
|
+
)
|
|
242
|
+
thread.start()
|
|
243
|
+
return True
|
|
244
|
+
|
|
245
|
+
def _download_pdf_worker(
|
|
246
|
+
self,
|
|
247
|
+
*,
|
|
248
|
+
quest_root: Path,
|
|
249
|
+
arxiv_id: str,
|
|
250
|
+
pdf_url: str,
|
|
251
|
+
inflight_key: tuple[str, str],
|
|
252
|
+
) -> None:
|
|
253
|
+
try:
|
|
254
|
+
ensure_dir(self._pdf_dir(quest_root))
|
|
255
|
+
target_path = self.pdf_path(quest_root, arxiv_id)
|
|
256
|
+
request = Request(
|
|
257
|
+
pdf_url,
|
|
258
|
+
headers={
|
|
259
|
+
"User-Agent": USER_AGENT,
|
|
260
|
+
"Accept": "application/pdf,*/*;q=0.8",
|
|
261
|
+
},
|
|
262
|
+
)
|
|
263
|
+
with urlopen(request, timeout=20) as response: # noqa: S310
|
|
264
|
+
payload = response.read()
|
|
265
|
+
if not payload.startswith(b"%PDF"):
|
|
266
|
+
raise ValueError("Downloaded payload is not a PDF.")
|
|
267
|
+
temp_path = target_path.with_suffix(f"{target_path.suffix}.tmp")
|
|
268
|
+
temp_path.write_bytes(payload)
|
|
269
|
+
temp_path.replace(target_path)
|
|
270
|
+
self.mark_ready(quest_root, arxiv_id)
|
|
271
|
+
except Exception as exc: # noqa: BLE001
|
|
272
|
+
self.mark_failed(quest_root, arxiv_id, error=str(exc).strip() or "download_failed")
|
|
273
|
+
finally:
|
|
274
|
+
with self._download_lock:
|
|
275
|
+
self._inflight_downloads.discard(inflight_key)
|
|
@@ -22,7 +22,7 @@ from .service import (
|
|
|
22
22
|
_coerce_session_status,
|
|
23
23
|
_parse_progress_marker,
|
|
24
24
|
)
|
|
25
|
-
from ..shared import append_jsonl, ensure_dir, read_json, read_jsonl, utc_now
|
|
25
|
+
from ..shared import append_jsonl, ensure_dir, iter_jsonl, read_json, read_jsonl, utc_now
|
|
26
26
|
|
|
27
27
|
DEFAULT_STOP_GRACE_SECONDS = 5
|
|
28
28
|
TERMINAL_IO_POLL_SECONDS = 0.02
|
|
@@ -298,7 +298,7 @@ def run_monitor(session_dir: Path) -> int:
|
|
|
298
298
|
log_path.touch(exist_ok=True)
|
|
299
299
|
input_path.touch(exist_ok=True)
|
|
300
300
|
if not input_cursor_path.exists():
|
|
301
|
-
_atomic_write_json(input_cursor_path, {"offset":
|
|
301
|
+
_atomic_write_json(input_cursor_path, {"offset": sum(1 for _ in iter_jsonl(input_path)), "updated_at": utc_now()})
|
|
302
302
|
|
|
303
303
|
tool_env = os.environ.pop("DS_BASH_EXEC_TOOL_ENV", "")
|
|
304
304
|
env_payload = os.environ.copy()
|
|
@@ -451,9 +451,11 @@ def run_monitor(session_dir: Path) -> int:
|
|
|
451
451
|
if output_fd is not None and process.poll() is None:
|
|
452
452
|
cursor_payload = read_json(input_cursor_path, {}) or {}
|
|
453
453
|
offset = int(cursor_payload.get("offset") or 0)
|
|
454
|
-
|
|
455
|
-
if offset <
|
|
456
|
-
for entry in
|
|
454
|
+
total_input_entries = sum(1 for _ in iter_jsonl(input_path))
|
|
455
|
+
if offset < total_input_entries:
|
|
456
|
+
for index, entry in enumerate(iter_jsonl(input_path)):
|
|
457
|
+
if index < offset:
|
|
458
|
+
continue
|
|
457
459
|
raw_data = str(entry.get("data") or "")
|
|
458
460
|
if raw_data:
|
|
459
461
|
try:
|
|
@@ -11,12 +11,13 @@ import sys
|
|
|
11
11
|
import tempfile
|
|
12
12
|
import threading
|
|
13
13
|
import time
|
|
14
|
+
from collections import deque
|
|
14
15
|
from datetime import UTC, datetime
|
|
15
16
|
from pathlib import Path
|
|
16
17
|
from typing import Any
|
|
17
18
|
|
|
18
19
|
from ..mcp.context import McpContext
|
|
19
|
-
from ..shared import append_jsonl, ensure_dir, generate_id, read_json, read_jsonl, utc_now
|
|
20
|
+
from ..shared import append_jsonl, ensure_dir, generate_id, iter_jsonl, read_json, read_jsonl, read_jsonl_tail, utc_now
|
|
20
21
|
from .runtime import TerminalRuntimeManager
|
|
21
22
|
|
|
22
23
|
BASH_STATUS_MARKER_PREFIX = "__DS_BASH_STATUS__"
|
|
@@ -24,6 +25,9 @@ BASH_CARRIAGE_RETURN_PREFIX = "__DS_BASH_CR__"
|
|
|
24
25
|
BASH_PROGRESS_PREFIX = "__DS_PROGRESS__"
|
|
25
26
|
BASH_TERMINAL_PROMPT_PREFIX = "__DS_TERMINAL_PROMPT__"
|
|
26
27
|
DEFAULT_LOG_TAIL_LIMIT = 200
|
|
28
|
+
DEFAULT_INLINE_BASH_LOG_LINE_LIMIT = 2000
|
|
29
|
+
DEFAULT_INLINE_BASH_LOG_HEAD_LINES = 500
|
|
30
|
+
DEFAULT_INLINE_BASH_LOG_TAIL_LINES = 1500
|
|
27
31
|
DEFAULT_POLL_INTERVAL_SECONDS = 0.35
|
|
28
32
|
TERMINAL_STATUSES = {"completed", "failed", "terminated"}
|
|
29
33
|
DEFAULT_TERMINAL_SESSION_ID = "terminal-main"
|
|
@@ -46,6 +50,52 @@ def _atomic_write_json(path: Path, payload: Any) -> None:
|
|
|
46
50
|
temp_path.replace(path)
|
|
47
51
|
|
|
48
52
|
|
|
53
|
+
def _count_jsonl_records(path: Path) -> int:
|
|
54
|
+
return sum(1 for _ in iter_jsonl(path))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _build_terminal_log_preview_payload(path: Path) -> dict[str, Any]:
|
|
58
|
+
if not path.exists():
|
|
59
|
+
return {
|
|
60
|
+
"log": "",
|
|
61
|
+
"log_line_count": 0,
|
|
62
|
+
"log_truncated": False,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
head_lines: list[str] = []
|
|
66
|
+
tail_lines: deque[str] = deque(maxlen=DEFAULT_INLINE_BASH_LOG_TAIL_LINES)
|
|
67
|
+
total = 0
|
|
68
|
+
with path.open("r", encoding="utf-8", errors="replace") as handle:
|
|
69
|
+
for raw_line in handle:
|
|
70
|
+
line = raw_line.rstrip("\n")
|
|
71
|
+
total += 1
|
|
72
|
+
if total <= DEFAULT_INLINE_BASH_LOG_HEAD_LINES:
|
|
73
|
+
head_lines.append(line)
|
|
74
|
+
tail_lines.append(line)
|
|
75
|
+
|
|
76
|
+
if total <= DEFAULT_INLINE_BASH_LOG_LINE_LIMIT:
|
|
77
|
+
return {
|
|
78
|
+
"log": "\n".join(list(tail_lines)),
|
|
79
|
+
"log_line_count": total,
|
|
80
|
+
"log_truncated": False,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
omitted = max(0, total - DEFAULT_INLINE_BASH_LOG_HEAD_LINES - DEFAULT_INLINE_BASH_LOG_TAIL_LINES)
|
|
84
|
+
marker = (
|
|
85
|
+
"[... omitted "
|
|
86
|
+
f"{omitted} lines from the middle of this log. "
|
|
87
|
+
"Use bash_exec(mode='read', id=..., start=..., tail=...) for a specific window.]"
|
|
88
|
+
)
|
|
89
|
+
return {
|
|
90
|
+
"log": "\n".join(head_lines + [marker] + list(tail_lines)),
|
|
91
|
+
"log_line_count": total,
|
|
92
|
+
"log_truncated": True,
|
|
93
|
+
"log_preview_head_lines": DEFAULT_INLINE_BASH_LOG_HEAD_LINES,
|
|
94
|
+
"log_preview_tail_lines": DEFAULT_INLINE_BASH_LOG_TAIL_LINES,
|
|
95
|
+
"log_preview_omitted_lines": omitted,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
49
99
|
def _normalize_string(value: object) -> str:
|
|
50
100
|
return str(value or "").strip()
|
|
51
101
|
|
|
@@ -67,6 +117,14 @@ def _session_sort_key(session: dict[str, Any]) -> tuple[str, str]:
|
|
|
67
117
|
def _is_process_alive(pid: object) -> bool:
|
|
68
118
|
if not isinstance(pid, int) or pid <= 0:
|
|
69
119
|
return False
|
|
120
|
+
proc_stat_path = Path("/proc") / str(pid) / "stat"
|
|
121
|
+
if proc_stat_path.exists():
|
|
122
|
+
try:
|
|
123
|
+
parts = proc_stat_path.read_text(encoding="utf-8").split()
|
|
124
|
+
except OSError:
|
|
125
|
+
parts = []
|
|
126
|
+
if len(parts) >= 3 and parts[2] == "Z":
|
|
127
|
+
return False
|
|
70
128
|
try:
|
|
71
129
|
os.kill(pid, 0)
|
|
72
130
|
except ProcessLookupError:
|
|
@@ -560,7 +618,8 @@ class BashExecService:
|
|
|
560
618
|
if not self.meta_path(quest_root, bash_id).exists():
|
|
561
619
|
raise FileNotFoundError(f"Unknown bash session `{bash_id}`.")
|
|
562
620
|
deadline = time.monotonic() + 0.6
|
|
563
|
-
|
|
621
|
+
path = self.log_path(quest_root, bash_id)
|
|
622
|
+
entries = read_jsonl_tail(path, max(1, limit))
|
|
564
623
|
while time.monotonic() < deadline:
|
|
565
624
|
if any(str(entry.get("stream") or "") not in {"system", "prompt"} for entry in entries):
|
|
566
625
|
break
|
|
@@ -572,24 +631,33 @@ class BashExecService:
|
|
|
572
631
|
time.sleep(0.05)
|
|
573
632
|
else:
|
|
574
633
|
time.sleep(0.03)
|
|
575
|
-
entries =
|
|
634
|
+
entries = read_jsonl_tail(path, max(1, limit))
|
|
576
635
|
latest_seq = int(entries[-1].get("seq") or 0) if entries else 0
|
|
577
636
|
normalized_before = before_seq if isinstance(before_seq, int) and before_seq > 0 else None
|
|
578
637
|
normalized_after = after_seq if isinstance(after_seq, int) and after_seq >= 0 else None
|
|
579
|
-
if normalized_after is not None:
|
|
580
|
-
entries = [entry for entry in entries if int(entry.get("seq") or 0) > normalized_after]
|
|
581
|
-
if normalized_before is not None:
|
|
582
|
-
entries = [entry for entry in entries if int(entry.get("seq") or 0) < normalized_before]
|
|
583
|
-
selection_pool = entries
|
|
584
|
-
if prefer_visible:
|
|
585
|
-
visible_entries = [
|
|
586
|
-
entry for entry in entries if str(entry.get("stream") or "") not in {"system", "prompt"}
|
|
587
|
-
]
|
|
588
|
-
if visible_entries:
|
|
589
|
-
selection_pool = visible_entries
|
|
590
638
|
normalized_limit = max(1, limit)
|
|
591
|
-
|
|
592
|
-
|
|
639
|
+
selection_pool: deque[dict[str, Any]] = deque(maxlen=normalized_limit)
|
|
640
|
+
visible_pool: deque[dict[str, Any]] = deque(maxlen=normalized_limit)
|
|
641
|
+
total_filtered = 0
|
|
642
|
+
for entry in iter_jsonl(path):
|
|
643
|
+
seq = int(entry.get("seq") or 0)
|
|
644
|
+
latest_seq = max(latest_seq, seq)
|
|
645
|
+
if normalized_after is not None and seq <= normalized_after:
|
|
646
|
+
continue
|
|
647
|
+
if normalized_before is not None and seq >= normalized_before:
|
|
648
|
+
continue
|
|
649
|
+
total_filtered += 1
|
|
650
|
+
selection_pool.append(entry)
|
|
651
|
+
if str(entry.get("stream") or "") not in {"system", "prompt"}:
|
|
652
|
+
visible_pool.append(entry)
|
|
653
|
+
selected_source: list[dict[str, Any]]
|
|
654
|
+
if prefer_visible and visible_pool:
|
|
655
|
+
selected_source = list(visible_pool)
|
|
656
|
+
truncated = total_filtered > len(visible_pool)
|
|
657
|
+
else:
|
|
658
|
+
selected_source = list(selection_pool)
|
|
659
|
+
truncated = total_filtered > len(selection_pool)
|
|
660
|
+
selected = selected_source[-normalized_limit:]
|
|
593
661
|
if order == "desc":
|
|
594
662
|
selected = list(reversed(selected))
|
|
595
663
|
tail_start_seq = int(selected[0].get("seq") or 0) if selected else None
|
|
@@ -860,7 +928,7 @@ class BashExecService:
|
|
|
860
928
|
"last_input_at": None,
|
|
861
929
|
"last_prompt_at": None,
|
|
862
930
|
"last_command": None,
|
|
863
|
-
"history_count":
|
|
931
|
+
"history_count": _count_jsonl_records(self.history_path(quest_root, bash_id)),
|
|
864
932
|
}
|
|
865
933
|
|
|
866
934
|
def ensure_terminal_session(
|
|
@@ -910,7 +978,7 @@ class BashExecService:
|
|
|
910
978
|
self.prompt_events_path(resolved_quest_root, bash_id).touch()
|
|
911
979
|
_atomic_write_json(
|
|
912
980
|
self.input_cursor_path(resolved_quest_root, bash_id),
|
|
913
|
-
{"offset":
|
|
981
|
+
{"offset": _count_jsonl_records(self.input_path(resolved_quest_root, bash_id)), "updated_at": utc_now()},
|
|
914
982
|
)
|
|
915
983
|
_atomic_write_json(
|
|
916
984
|
self.line_buffer_path(resolved_quest_root, bash_id),
|
|
@@ -1064,7 +1132,7 @@ class BashExecService:
|
|
|
1064
1132
|
append_jsonl(self.history_path(quest_root, bash_id), item)
|
|
1065
1133
|
meta = read_json(self.meta_path(quest_root, bash_id), {})
|
|
1066
1134
|
meta["last_command"] = completed[-1]["command"]
|
|
1067
|
-
meta["history_count"] =
|
|
1135
|
+
meta["history_count"] = _count_jsonl_records(self.history_path(quest_root, bash_id))
|
|
1068
1136
|
meta["updated_at"] = utc_now()
|
|
1069
1137
|
meta["last_input_at"] = utc_now()
|
|
1070
1138
|
self._write_meta(quest_root, bash_id, meta)
|
|
@@ -1130,7 +1198,7 @@ class BashExecService:
|
|
|
1130
1198
|
before_seq=None,
|
|
1131
1199
|
order="asc",
|
|
1132
1200
|
)
|
|
1133
|
-
history =
|
|
1201
|
+
history = read_jsonl_tail(self.history_path(quest_root, bash_id), max(1, command_limit))
|
|
1134
1202
|
latest_commands = [
|
|
1135
1203
|
{
|
|
1136
1204
|
"command_id": item.get("command_id"),
|
|
@@ -1181,6 +1249,7 @@ class BashExecService:
|
|
|
1181
1249
|
"label": session.get("label"),
|
|
1182
1250
|
"command": session.get("command"),
|
|
1183
1251
|
"workdir": session.get("workdir"),
|
|
1252
|
+
"cwd": session.get("cwd"),
|
|
1184
1253
|
"started_at": session.get("started_at"),
|
|
1185
1254
|
"finished_at": session.get("finished_at"),
|
|
1186
1255
|
"exit_code": session.get("exit_code"),
|
|
@@ -1199,7 +1268,7 @@ class BashExecService:
|
|
|
1199
1268
|
"watchdog_overdue": session.get("watchdog_overdue"),
|
|
1200
1269
|
}
|
|
1201
1270
|
if include_log:
|
|
1202
|
-
result
|
|
1271
|
+
result.update(self._log_preview_payload(quest_root, str(session["bash_id"])))
|
|
1203
1272
|
if export_log or _normalize_string(export_log_to):
|
|
1204
1273
|
cwd, _ = self.resolve_workdir(context, str(session.get("workdir") or ""))
|
|
1205
1274
|
result.update(
|
|
@@ -1212,3 +1281,6 @@ class BashExecService:
|
|
|
1212
1281
|
)
|
|
1213
1282
|
)
|
|
1214
1283
|
return result
|
|
1284
|
+
|
|
1285
|
+
def _log_preview_payload(self, quest_root: Path, bash_id: str) -> dict[str, Any]:
|
|
1286
|
+
return _build_terminal_log_preview_payload(self.terminal_log_path(quest_root, bash_id))
|
|
@@ -6,6 +6,7 @@ from .connectors import (
|
|
|
6
6
|
QQConnectorBridge,
|
|
7
7
|
SlackConnectorBridge,
|
|
8
8
|
TelegramConnectorBridge,
|
|
9
|
+
WeixinConnectorBridge,
|
|
9
10
|
WhatsAppConnectorBridge,
|
|
10
11
|
)
|
|
11
12
|
from .registry import register_connector_bridge
|
|
@@ -13,6 +14,7 @@ from .registry import register_connector_bridge
|
|
|
13
14
|
|
|
14
15
|
def register_builtin_connector_bridges() -> None:
|
|
15
16
|
register_connector_bridge("qq", QQConnectorBridge)
|
|
17
|
+
register_connector_bridge("weixin", WeixinConnectorBridge)
|
|
16
18
|
register_connector_bridge("telegram", TelegramConnectorBridge)
|
|
17
19
|
register_connector_bridge("discord", DiscordConnectorBridge)
|
|
18
20
|
register_connector_bridge("slack", SlackConnectorBridge)
|