@researai/deepscientist 1.5.8 → 1.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +186 -21
- package/README.md +108 -95
- package/assets/branding/connector-qq.png +0 -0
- package/assets/branding/connector-rokid.png +0 -0
- package/assets/branding/connector-weixin.png +0 -0
- package/assets/branding/projects.png +0 -0
- package/bin/ds.js +172 -13
- package/docs/assets/branding/projects.png +0 -0
- package/docs/en/00_QUICK_START.md +308 -70
- package/docs/en/01_SETTINGS_REFERENCE.md +3 -0
- package/docs/en/02_START_RESEARCH_GUIDE.md +112 -0
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +62 -179
- package/docs/en/09_DOCTOR.md +41 -5
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +137 -0
- package/docs/en/11_LICENSE_AND_RISK.md +256 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +427 -0
- package/docs/en/13_CORE_ARCHITECTURE_GUIDE.md +297 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +4 -1
- package/docs/en/README.md +79 -0
- package/docs/images/lingzhu/rokid-agent-platform-create.png +0 -0
- package/docs/images/weixin/weixin-plugin-entry.png +0 -0
- package/docs/images/weixin/weixin-plugin-entry.svg +33 -0
- package/docs/images/weixin/weixin-qr-confirm.svg +30 -0
- package/docs/images/weixin/weixin-quest-media-flow.svg +44 -0
- package/docs/images/weixin/weixin-settings-bind.svg +57 -0
- package/docs/zh/00_QUICK_START.md +315 -74
- package/docs/zh/01_SETTINGS_REFERENCE.md +3 -0
- package/docs/zh/02_START_RESEARCH_GUIDE.md +112 -0
- package/docs/zh/04_LINGZHU_CONNECTOR_GUIDE.md +62 -193
- package/docs/zh/09_DOCTOR.md +41 -5
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +144 -0
- package/docs/zh/11_LICENSE_AND_RISK.md +256 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +423 -0
- package/docs/zh/13_CORE_ARCHITECTURE_GUIDE.md +296 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +4 -1
- package/docs/zh/README.md +126 -0
- package/install.sh +0 -34
- package/package.json +3 -3
- package/pyproject.toml +2 -2
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/annotations.py +343 -0
- package/src/deepscientist/artifact/arxiv.py +484 -37
- package/src/deepscientist/artifact/metrics.py +1 -3
- package/src/deepscientist/artifact/service.py +1347 -111
- package/src/deepscientist/arxiv_library.py +275 -0
- package/src/deepscientist/bash_exec/service.py +9 -0
- package/src/deepscientist/bridges/builtins.py +2 -0
- package/src/deepscientist/bridges/connectors.py +447 -0
- package/src/deepscientist/channels/__init__.py +2 -0
- package/src/deepscientist/channels/builtins.py +3 -1
- package/src/deepscientist/channels/qq.py +1 -1
- package/src/deepscientist/channels/qq_gateway.py +1 -1
- package/src/deepscientist/channels/relay.py +7 -1
- package/src/deepscientist/channels/weixin.py +59 -0
- package/src/deepscientist/channels/weixin_ilink.py +317 -0
- package/src/deepscientist/config/models.py +22 -2
- package/src/deepscientist/config/service.py +431 -60
- package/src/deepscientist/connector/__init__.py +4 -0
- package/src/deepscientist/connector/connector_profiles.py +481 -0
- package/src/deepscientist/connector/lingzhu_support.py +668 -0
- package/src/deepscientist/connector/qq_profiles.py +206 -0
- package/src/deepscientist/connector/weixin_support.py +663 -0
- package/src/deepscientist/connector_profiles.py +1 -374
- package/src/deepscientist/connector_runtime.py +2 -0
- package/src/deepscientist/daemon/api/handlers.py +295 -5
- package/src/deepscientist/daemon/api/router.py +16 -1
- package/src/deepscientist/daemon/app.py +1130 -61
- package/src/deepscientist/doctor.py +5 -2
- package/src/deepscientist/gitops/diff.py +120 -29
- package/src/deepscientist/lingzhu_support.py +1 -182
- package/src/deepscientist/mcp/server.py +14 -5
- package/src/deepscientist/prompts/builder.py +29 -1
- package/src/deepscientist/qq_profiles.py +1 -196
- package/src/deepscientist/quest/node_traces.py +152 -2
- package/src/deepscientist/quest/service.py +169 -43
- package/src/deepscientist/quest/stage_views.py +172 -9
- package/src/deepscientist/registries/baseline.py +56 -4
- package/src/deepscientist/runners/codex.py +55 -3
- package/src/deepscientist/weixin_support.py +1 -0
- package/src/prompts/connectors/lingzhu.md +3 -1
- package/src/prompts/connectors/weixin.md +230 -0
- package/src/prompts/system.md +9 -0
- package/src/skills/idea/SKILL.md +16 -0
- package/src/skills/idea/references/literature-survey-template.md +24 -0
- package/src/skills/idea/references/related-work-playbook.md +4 -0
- package/src/skills/idea/references/selection-gate.md +9 -0
- package/src/skills/write/SKILL.md +1 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AiManusChatView-m2FNtwbn.js → AiManusChatView-D0mTXG4-.js} +156 -48
- package/src/ui/dist/assets/{AnalysisPlugin-BMTF8EGL.js → AnalysisPlugin-Db0cTXxm.js} +1 -1
- package/src/ui/dist/assets/{CliPlugin-BEOWgxCI.js → CliPlugin-DrV8je02.js} +164 -9
- package/src/ui/dist/assets/{CodeEditorPlugin-BCXvjqmb.js → CodeEditorPlugin-QXMSCH71.js} +8 -8
- package/src/ui/dist/assets/{CodeViewerPlugin-DaJcy3nD.js → CodeViewerPlugin-7hhtWj_E.js} +5 -5
- package/src/ui/dist/assets/{DocViewerPlugin-ByfeIq4K.js → DocViewerPlugin-BWMSnRJe.js} +3 -3
- package/src/ui/dist/assets/{GitDiffViewerPlugin-Cksf3VZ-.js → GitDiffViewerPlugin-7J9h9Vy_.js} +20 -21
- package/src/ui/dist/assets/{ImageViewerPlugin-CFz-OsTS.js → ImageViewerPlugin-CHJl_0lr.js} +5 -5
- package/src/ui/dist/assets/{LabCopilotPanel-CJ1cJzoX.js → LabCopilotPanel-1qSow1es.js} +11 -11
- package/src/ui/dist/assets/{LabPlugin-BF3dVJwa.js → LabPlugin-eQpPPCEp.js} +2 -1
- package/src/ui/dist/assets/{LatexPlugin-DDkwZ6Sj.js → LatexPlugin-BwRfi89Z.js} +7 -7
- package/src/ui/dist/assets/{MarkdownViewerPlugin-HAuvurcT.js → MarkdownViewerPlugin-836PVQWV.js} +4 -4
- package/src/ui/dist/assets/{MarketplacePlugin-BtoTYy2C.js → MarketplacePlugin-C2y_556i.js} +3 -3
- package/src/ui/dist/assets/{NotebookEditor-CSJYx7b-.js → NotebookEditor-BRzJbGsn.js} +12 -12
- package/src/ui/dist/assets/{NotebookEditor-DQgRezm_.js → NotebookEditor-DIX7Mlzu.js} +1 -1
- package/src/ui/dist/assets/{PdfLoader-DPa_-fv6.js → PdfLoader-DzRaTAlq.js} +14 -7
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BZpXOEjm.js → PdfMarkdownPlugin-DZUfIUnp.js} +73 -6
- package/src/ui/dist/assets/{PdfViewerPlugin-BT8a6wGR.js → PdfViewerPlugin-BwtICzue.js} +103 -34
- package/src/ui/dist/assets/PdfViewerPlugin-DQ11QcSf.css +3627 -0
- package/src/ui/dist/assets/{SearchPlugin-D_blveZi.js → SearchPlugin-DHeIAMsx.js} +1 -1
- package/src/ui/dist/assets/{TextViewerPlugin-Btx0M3hX.js → TextViewerPlugin-C3tCmFox.js} +5 -4
- package/src/ui/dist/assets/{VNCViewer-DImJO4rO.js → VNCViewer-CQsKVm3t.js} +10 -10
- package/src/ui/dist/assets/bot-BEA2vWuK.js +21 -0
- package/src/ui/dist/assets/branding/logo-rokid.png +0 -0
- package/src/ui/dist/assets/browser-BAcuE0Xj.js +2895 -0
- package/src/ui/dist/assets/{code-BUfXGJSl.js → code-XfbSR8K2.js} +1 -1
- package/src/ui/dist/assets/{file-content-VqamwI3X.js → file-content-BjxNaIfy.js} +1 -1
- package/src/ui/dist/assets/{file-diff-panel-C_wOoS7a.js → file-diff-panel-D_lLVQk0.js} +1 -1
- package/src/ui/dist/assets/{file-socket-D2bTuMVP.js → file-socket-D9x_5vlY.js} +1 -1
- package/src/ui/dist/assets/{image-BZkGJ4mM.js → image-BhWT33W1.js} +1 -1
- package/src/ui/dist/assets/{index-DdRW6RMJ.js → index--c4iXtuy.js} +12 -12
- package/src/ui/dist/assets/{index-CxkvSeKw.js → index-BDxipwrC.js} +2 -2
- package/src/ui/dist/assets/{index-DjggJovS.js → index-DZTZ8mWP.js} +14934 -9613
- package/src/ui/dist/assets/{index-DXZ1daiJ.css → index-Dqj-Mjb4.css} +2 -13
- package/src/ui/dist/assets/index-PJbSbPTy.js +25 -0
- package/src/ui/dist/assets/{monaco-DHMc7kKM.js → monaco-K8izTGgo.js} +1 -1
- package/src/ui/dist/assets/{pdf-effect-queue-DSw_D3RV.js → pdf-effect-queue-DfBors6y.js} +16 -1
- package/src/ui/dist/assets/pdf.worker.min-yatZIOMy.mjs +21 -0
- package/src/ui/dist/assets/{popover-B85oCgCS.js → popover-yFK1J4fL.js} +1 -1
- package/src/ui/dist/assets/{project-sync-DOMCcPac.js → project-sync-PENr2zcz.js} +1 -74
- package/src/ui/dist/assets/select-CAbJDfYv.js +1690 -0
- package/src/ui/dist/assets/{sigma-BO2rQrl3.js → sigma-DEuYJqTl.js} +1 -1
- package/src/ui/dist/assets/{index-D9QIGcmc.js → square-check-big-omoSUmcd.js} +2 -13
- package/src/ui/dist/assets/{trash-BsVEH_dV.js → trash--F119N47.js} +1 -1
- package/src/ui/dist/assets/{useCliAccess-b8L6JuZm.js → useCliAccess-D31UR23I.js} +1 -1
- package/src/ui/dist/assets/{useFileDiffOverlay-BY7uA9hV.js → useFileDiffOverlay-BH6KcMzq.js} +1 -1
- package/src/ui/dist/assets/{wrap-text-BwyVuUIK.js → wrap-text-CZ613PM5.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-RDpLugQP.js → zoom-out-BgDLAv3z.js} +1 -1
- package/src/ui/dist/index.html +2 -2
- package/src/ui/dist/assets/AutoFigurePlugin-BGxN8Umr.css +0 -3056
- package/src/ui/dist/assets/AutoFigurePlugin-DxPdMUNb.js +0 -8149
- package/src/ui/dist/assets/PdfViewerPlugin-BJXtIwj_.css +0 -260
- package/src/ui/dist/assets/Stepper-DH2k75Vo.js +0 -158
- package/src/ui/dist/assets/bibtex-B-Hqu0Sg.js +0 -189
- package/src/ui/dist/assets/file-utils--zJCPN1i.js +0 -109
- package/src/ui/dist/assets/message-square-FUIPIhU2.js +0 -16
- package/src/ui/dist/assets/pdfjs-DU1YE8WO.js +0 -3
- package/src/ui/dist/assets/tooltip-B1OspAkx.js +0 -108
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.request import Request, urlopen
|
|
7
|
+
|
|
8
|
+
from .artifact.arxiv import USER_AGENT, normalize_arxiv_id
|
|
9
|
+
from .shared import ensure_dir, read_json, utc_now, write_json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ArxivLibraryService:
|
|
13
|
+
_SCHEMA_VERSION = 2
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self._manifest_lock = threading.Lock()
|
|
17
|
+
self._download_lock = threading.Lock()
|
|
18
|
+
self._inflight_downloads: set[tuple[str, str]] = set()
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def _root(quest_root: Path) -> Path:
|
|
22
|
+
return quest_root / "literature" / "arxiv"
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def _index_path(cls, quest_root: Path) -> Path:
|
|
26
|
+
return cls._root(quest_root) / "index.json"
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def _pdf_dir(cls, quest_root: Path) -> Path:
|
|
30
|
+
return cls._root(quest_root) / "pdfs"
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _pdf_file_name(arxiv_id: str) -> str:
|
|
34
|
+
return f"{arxiv_id}.pdf"
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def pdf_relative_path(cls, arxiv_id: str) -> str:
|
|
38
|
+
return f"literature/arxiv/pdfs/{cls._pdf_file_name(arxiv_id)}"
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def pdf_path(cls, quest_root: Path, arxiv_id: str) -> Path:
|
|
42
|
+
return cls._pdf_dir(quest_root) / cls._pdf_file_name(arxiv_id)
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _empty_payload(cls) -> dict[str, Any]:
|
|
46
|
+
return {
|
|
47
|
+
"schema_version": cls._SCHEMA_VERSION,
|
|
48
|
+
"updated_at": utc_now(),
|
|
49
|
+
"items": [],
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def load_manifest(self, quest_root: Path) -> dict[str, Any]:
|
|
53
|
+
path = self._index_path(quest_root)
|
|
54
|
+
payload = read_json(path, default=None)
|
|
55
|
+
if not isinstance(payload, dict):
|
|
56
|
+
payload = self._empty_payload()
|
|
57
|
+
items = payload.get("items")
|
|
58
|
+
if not isinstance(items, list):
|
|
59
|
+
payload["items"] = []
|
|
60
|
+
payload["schema_version"] = self._SCHEMA_VERSION
|
|
61
|
+
payload["updated_at"] = str(payload.get("updated_at") or utc_now())
|
|
62
|
+
return payload
|
|
63
|
+
|
|
64
|
+
def save_manifest(self, quest_root: Path, payload: dict[str, Any]) -> dict[str, Any]:
|
|
65
|
+
normalized = dict(payload or {})
|
|
66
|
+
normalized["schema_version"] = self._SCHEMA_VERSION
|
|
67
|
+
normalized["updated_at"] = utc_now()
|
|
68
|
+
if not isinstance(normalized.get("items"), list):
|
|
69
|
+
normalized["items"] = []
|
|
70
|
+
ensure_dir(self._root(quest_root))
|
|
71
|
+
write_json(self._index_path(quest_root), normalized)
|
|
72
|
+
return normalized
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def _normalize_item(item: dict[str, Any]) -> dict[str, Any]:
|
|
76
|
+
normalized = dict(item or {})
|
|
77
|
+
normalized["arxiv_id"] = str(normalized.get("arxiv_id") or "").strip()
|
|
78
|
+
normalized["status"] = str(normalized.get("status") or "processing").strip() or "processing"
|
|
79
|
+
metadata_status = str(normalized.get("metadata_status") or "").strip()
|
|
80
|
+
if not metadata_status:
|
|
81
|
+
metadata_status = "ready" if str(normalized.get("metadata_source") or "").strip() else ""
|
|
82
|
+
normalized["metadata_status"] = metadata_status or None
|
|
83
|
+
normalized["title"] = str(normalized.get("title") or normalized.get("display_name") or normalized["arxiv_id"]).strip()
|
|
84
|
+
normalized["display_name"] = str(
|
|
85
|
+
normalized.get("display_name") or normalized.get("title") or normalized["arxiv_id"]
|
|
86
|
+
).strip()
|
|
87
|
+
normalized["abstract"] = str(normalized.get("abstract") or "").strip()
|
|
88
|
+
normalized["overview"] = str(normalized.get("overview") or "").strip()
|
|
89
|
+
normalized["overview_markdown"] = str(normalized.get("overview_markdown") or "").strip()
|
|
90
|
+
normalized["summary_source"] = str(normalized.get("summary_source") or "").strip() or None
|
|
91
|
+
normalized["overview_source"] = str(normalized.get("overview_source") or "").strip() or None
|
|
92
|
+
normalized["metadata_source"] = str(normalized.get("metadata_source") or "").strip() or None
|
|
93
|
+
normalized["published_at"] = str(normalized.get("published_at") or "").strip()
|
|
94
|
+
normalized["primary_class"] = str(normalized.get("primary_class") or "").strip()
|
|
95
|
+
bibtex = str(normalized.get("bibtex") or "").strip()
|
|
96
|
+
normalized["bibtex"] = bibtex or None
|
|
97
|
+
normalized["abs_url"] = str(normalized.get("abs_url") or "").strip() or None
|
|
98
|
+
normalized["pdf_url"] = str(normalized.get("pdf_url") or "").strip() or None
|
|
99
|
+
normalized["created_at"] = str(normalized.get("created_at") or utc_now()).strip()
|
|
100
|
+
normalized["updated_at"] = str(normalized.get("updated_at") or utc_now()).strip()
|
|
101
|
+
normalized["authors"] = [str(item).strip() for item in (normalized.get("authors") or []) if str(item).strip()]
|
|
102
|
+
normalized["categories"] = [str(item).strip() for item in (normalized.get("categories") or []) if str(item).strip()]
|
|
103
|
+
normalized["tags"] = [str(item).strip() for item in (normalized.get("tags") or []) if str(item).strip()]
|
|
104
|
+
version = normalized.get("version")
|
|
105
|
+
normalized["version"] = int(version) if isinstance(version, int) or str(version).isdigit() else None
|
|
106
|
+
normalized["pdf_rel_path"] = str(normalized.get("pdf_rel_path") or "").strip() or None
|
|
107
|
+
normalized["error"] = str(normalized.get("error") or "").strip() or None
|
|
108
|
+
return normalized
|
|
109
|
+
|
|
110
|
+
def get_item(self, quest_root: Path, arxiv_id: str) -> dict[str, Any] | None:
|
|
111
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
112
|
+
if not normalized_id:
|
|
113
|
+
return None
|
|
114
|
+
payload = self.load_manifest(quest_root)
|
|
115
|
+
for raw_item in payload.get("items") or []:
|
|
116
|
+
if str(raw_item.get("arxiv_id") or "").strip() == normalized_id:
|
|
117
|
+
return self._materialize_item(quest_root, self._normalize_item(dict(raw_item)))
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
def list_items(self, quest_root: Path) -> list[dict[str, Any]]:
|
|
121
|
+
payload = self.load_manifest(quest_root)
|
|
122
|
+
items = [
|
|
123
|
+
self._materialize_item(quest_root, self._normalize_item(dict(item)))
|
|
124
|
+
for item in payload.get("items") or []
|
|
125
|
+
if str(item.get("arxiv_id") or "").strip()
|
|
126
|
+
]
|
|
127
|
+
return sorted(items, key=lambda item: str(item.get("updated_at") or ""), reverse=True)
|
|
128
|
+
|
|
129
|
+
def upsert_item(self, quest_root: Path, item: dict[str, Any]) -> dict[str, Any]:
|
|
130
|
+
normalized = self._normalize_item(item)
|
|
131
|
+
if not normalized["arxiv_id"]:
|
|
132
|
+
raise ValueError("`arxiv_id` is required.")
|
|
133
|
+
with self._manifest_lock:
|
|
134
|
+
payload = self.load_manifest(quest_root)
|
|
135
|
+
items = [dict(existing) for existing in (payload.get("items") or []) if isinstance(existing, dict)]
|
|
136
|
+
updated = False
|
|
137
|
+
for index, existing in enumerate(items):
|
|
138
|
+
if str(existing.get("arxiv_id") or "").strip() != normalized["arxiv_id"]:
|
|
139
|
+
continue
|
|
140
|
+
merged = {**existing, **normalized, "updated_at": utc_now()}
|
|
141
|
+
if not existing.get("created_at"):
|
|
142
|
+
merged["created_at"] = normalized["created_at"]
|
|
143
|
+
items[index] = merged
|
|
144
|
+
updated = True
|
|
145
|
+
break
|
|
146
|
+
if not updated:
|
|
147
|
+
items.append({**normalized, "created_at": utc_now(), "updated_at": utc_now()})
|
|
148
|
+
payload["items"] = items
|
|
149
|
+
self.save_manifest(quest_root, payload)
|
|
150
|
+
return self.get_item(quest_root, normalized["arxiv_id"]) or normalized
|
|
151
|
+
|
|
152
|
+
def mark_processing(self, quest_root: Path, arxiv_id: str, *, display_name: str | None = None) -> dict[str, Any]:
|
|
153
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
154
|
+
if not normalized_id:
|
|
155
|
+
raise ValueError("Invalid arXiv id.")
|
|
156
|
+
current = self.get_item(quest_root, normalized_id) or {}
|
|
157
|
+
return self.upsert_item(
|
|
158
|
+
quest_root,
|
|
159
|
+
{
|
|
160
|
+
**current,
|
|
161
|
+
"arxiv_id": normalized_id,
|
|
162
|
+
"display_name": display_name or current.get("display_name") or normalized_id,
|
|
163
|
+
"status": "processing",
|
|
164
|
+
"pdf_rel_path": self.pdf_relative_path(normalized_id),
|
|
165
|
+
"error": None,
|
|
166
|
+
},
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def mark_failed(self, quest_root: Path, arxiv_id: str, *, error: str) -> dict[str, Any]:
|
|
170
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
171
|
+
if not normalized_id:
|
|
172
|
+
raise ValueError("Invalid arXiv id.")
|
|
173
|
+
current = self.get_item(quest_root, normalized_id) or {}
|
|
174
|
+
return self.upsert_item(
|
|
175
|
+
quest_root,
|
|
176
|
+
{
|
|
177
|
+
**current,
|
|
178
|
+
"arxiv_id": normalized_id,
|
|
179
|
+
"status": "failed",
|
|
180
|
+
"error": error,
|
|
181
|
+
"pdf_rel_path": current.get("pdf_rel_path") or self.pdf_relative_path(normalized_id),
|
|
182
|
+
},
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def mark_ready(self, quest_root: Path, arxiv_id: str) -> dict[str, Any]:
|
|
186
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
187
|
+
if not normalized_id:
|
|
188
|
+
raise ValueError("Invalid arXiv id.")
|
|
189
|
+
current = self.get_item(quest_root, normalized_id) or {}
|
|
190
|
+
return self.upsert_item(
|
|
191
|
+
quest_root,
|
|
192
|
+
{
|
|
193
|
+
**current,
|
|
194
|
+
"arxiv_id": normalized_id,
|
|
195
|
+
"status": "ready",
|
|
196
|
+
"error": None,
|
|
197
|
+
"pdf_rel_path": current.get("pdf_rel_path") or self.pdf_relative_path(normalized_id),
|
|
198
|
+
},
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def _materialize_item(self, quest_root: Path, item: dict[str, Any]) -> dict[str, Any]:
|
|
202
|
+
normalized = self._normalize_item(item)
|
|
203
|
+
arxiv_id = normalized["arxiv_id"]
|
|
204
|
+
pdf_rel_path = normalized.get("pdf_rel_path") or self.pdf_relative_path(arxiv_id)
|
|
205
|
+
normalized["pdf_rel_path"] = pdf_rel_path
|
|
206
|
+
pdf_path = quest_root / pdf_rel_path
|
|
207
|
+
if pdf_path.exists() and pdf_path.is_file():
|
|
208
|
+
relative = pdf_path.relative_to(quest_root).as_posix()
|
|
209
|
+
normalized["path"] = relative
|
|
210
|
+
normalized["document_id"] = f"questpath::{relative}"
|
|
211
|
+
else:
|
|
212
|
+
normalized["path"] = None
|
|
213
|
+
normalized["document_id"] = None
|
|
214
|
+
return normalized
|
|
215
|
+
|
|
216
|
+
def queue_pdf_download(self, quest_root: Path, arxiv_id: str, *, pdf_url: str | None = None) -> bool:
|
|
217
|
+
normalized_id = normalize_arxiv_id(arxiv_id)
|
|
218
|
+
if not normalized_id:
|
|
219
|
+
return False
|
|
220
|
+
target_path = self.pdf_path(quest_root, normalized_id)
|
|
221
|
+
if target_path.exists() and target_path.is_file():
|
|
222
|
+
self.mark_ready(quest_root, normalized_id)
|
|
223
|
+
return False
|
|
224
|
+
target_url = str(pdf_url or "").strip() or f"https://arxiv.org/pdf/{normalized_id}.pdf"
|
|
225
|
+
inflight_key = (str(quest_root.resolve()), normalized_id)
|
|
226
|
+
with self._download_lock:
|
|
227
|
+
if inflight_key in self._inflight_downloads:
|
|
228
|
+
return False
|
|
229
|
+
self._inflight_downloads.add(inflight_key)
|
|
230
|
+
|
|
231
|
+
thread = threading.Thread(
|
|
232
|
+
target=self._download_pdf_worker,
|
|
233
|
+
kwargs={
|
|
234
|
+
"quest_root": quest_root,
|
|
235
|
+
"arxiv_id": normalized_id,
|
|
236
|
+
"pdf_url": target_url,
|
|
237
|
+
"inflight_key": inflight_key,
|
|
238
|
+
},
|
|
239
|
+
daemon=True,
|
|
240
|
+
name=f"deepscientist-arxiv-{normalized_id}",
|
|
241
|
+
)
|
|
242
|
+
thread.start()
|
|
243
|
+
return True
|
|
244
|
+
|
|
245
|
+
def _download_pdf_worker(
|
|
246
|
+
self,
|
|
247
|
+
*,
|
|
248
|
+
quest_root: Path,
|
|
249
|
+
arxiv_id: str,
|
|
250
|
+
pdf_url: str,
|
|
251
|
+
inflight_key: tuple[str, str],
|
|
252
|
+
) -> None:
|
|
253
|
+
try:
|
|
254
|
+
ensure_dir(self._pdf_dir(quest_root))
|
|
255
|
+
target_path = self.pdf_path(quest_root, arxiv_id)
|
|
256
|
+
request = Request(
|
|
257
|
+
pdf_url,
|
|
258
|
+
headers={
|
|
259
|
+
"User-Agent": USER_AGENT,
|
|
260
|
+
"Accept": "application/pdf,*/*;q=0.8",
|
|
261
|
+
},
|
|
262
|
+
)
|
|
263
|
+
with urlopen(request, timeout=20) as response: # noqa: S310
|
|
264
|
+
payload = response.read()
|
|
265
|
+
if not payload.startswith(b"%PDF"):
|
|
266
|
+
raise ValueError("Downloaded payload is not a PDF.")
|
|
267
|
+
temp_path = target_path.with_suffix(f"{target_path.suffix}.tmp")
|
|
268
|
+
temp_path.write_bytes(payload)
|
|
269
|
+
temp_path.replace(target_path)
|
|
270
|
+
self.mark_ready(quest_root, arxiv_id)
|
|
271
|
+
except Exception as exc: # noqa: BLE001
|
|
272
|
+
self.mark_failed(quest_root, arxiv_id, error=str(exc).strip() or "download_failed")
|
|
273
|
+
finally:
|
|
274
|
+
with self._download_lock:
|
|
275
|
+
self._inflight_downloads.discard(inflight_key)
|
|
@@ -67,6 +67,14 @@ def _session_sort_key(session: dict[str, Any]) -> tuple[str, str]:
|
|
|
67
67
|
def _is_process_alive(pid: object) -> bool:
|
|
68
68
|
if not isinstance(pid, int) or pid <= 0:
|
|
69
69
|
return False
|
|
70
|
+
proc_stat_path = Path("/proc") / str(pid) / "stat"
|
|
71
|
+
if proc_stat_path.exists():
|
|
72
|
+
try:
|
|
73
|
+
parts = proc_stat_path.read_text(encoding="utf-8").split()
|
|
74
|
+
except OSError:
|
|
75
|
+
parts = []
|
|
76
|
+
if len(parts) >= 3 and parts[2] == "Z":
|
|
77
|
+
return False
|
|
70
78
|
try:
|
|
71
79
|
os.kill(pid, 0)
|
|
72
80
|
except ProcessLookupError:
|
|
@@ -1181,6 +1189,7 @@ class BashExecService:
|
|
|
1181
1189
|
"label": session.get("label"),
|
|
1182
1190
|
"command": session.get("command"),
|
|
1183
1191
|
"workdir": session.get("workdir"),
|
|
1192
|
+
"cwd": session.get("cwd"),
|
|
1184
1193
|
"started_at": session.get("started_at"),
|
|
1185
1194
|
"finished_at": session.get("finished_at"),
|
|
1186
1195
|
"exit_code": session.get("exit_code"),
|
|
@@ -6,6 +6,7 @@ from .connectors import (
|
|
|
6
6
|
QQConnectorBridge,
|
|
7
7
|
SlackConnectorBridge,
|
|
8
8
|
TelegramConnectorBridge,
|
|
9
|
+
WeixinConnectorBridge,
|
|
9
10
|
WhatsAppConnectorBridge,
|
|
10
11
|
)
|
|
11
12
|
from .registry import register_connector_bridge
|
|
@@ -13,6 +14,7 @@ from .registry import register_connector_bridge
|
|
|
13
14
|
|
|
14
15
|
def register_builtin_connector_bridges() -> None:
|
|
15
16
|
register_connector_bridge("qq", QQConnectorBridge)
|
|
17
|
+
register_connector_bridge("weixin", WeixinConnectorBridge)
|
|
16
18
|
register_connector_bridge("telegram", TelegramConnectorBridge)
|
|
17
19
|
register_connector_bridge("discord", DiscordConnectorBridge)
|
|
18
20
|
register_connector_bridge("slack", SlackConnectorBridge)
|