@researai/deepscientist 1.5.8 → 1.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +186 -21
- package/README.md +108 -95
- package/assets/branding/connector-qq.png +0 -0
- package/assets/branding/connector-rokid.png +0 -0
- package/assets/branding/connector-weixin.png +0 -0
- package/assets/branding/projects.png +0 -0
- package/bin/ds.js +172 -13
- package/docs/assets/branding/projects.png +0 -0
- package/docs/en/00_QUICK_START.md +308 -70
- package/docs/en/01_SETTINGS_REFERENCE.md +3 -0
- package/docs/en/02_START_RESEARCH_GUIDE.md +112 -0
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +62 -179
- package/docs/en/09_DOCTOR.md +41 -5
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +137 -0
- package/docs/en/11_LICENSE_AND_RISK.md +256 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +427 -0
- package/docs/en/13_CORE_ARCHITECTURE_GUIDE.md +297 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +4 -1
- package/docs/en/README.md +79 -0
- package/docs/images/lingzhu/rokid-agent-platform-create.png +0 -0
- package/docs/images/weixin/weixin-plugin-entry.png +0 -0
- package/docs/images/weixin/weixin-plugin-entry.svg +33 -0
- package/docs/images/weixin/weixin-qr-confirm.svg +30 -0
- package/docs/images/weixin/weixin-quest-media-flow.svg +44 -0
- package/docs/images/weixin/weixin-settings-bind.svg +57 -0
- package/docs/zh/00_QUICK_START.md +315 -74
- package/docs/zh/01_SETTINGS_REFERENCE.md +3 -0
- package/docs/zh/02_START_RESEARCH_GUIDE.md +112 -0
- package/docs/zh/04_LINGZHU_CONNECTOR_GUIDE.md +62 -193
- package/docs/zh/09_DOCTOR.md +41 -5
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +144 -0
- package/docs/zh/11_LICENSE_AND_RISK.md +256 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +423 -0
- package/docs/zh/13_CORE_ARCHITECTURE_GUIDE.md +296 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +506 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +4 -1
- package/docs/zh/README.md +126 -0
- package/install.sh +0 -34
- package/package.json +3 -3
- package/pyproject.toml +2 -2
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/annotations.py +343 -0
- package/src/deepscientist/artifact/arxiv.py +484 -37
- package/src/deepscientist/artifact/metrics.py +1 -3
- package/src/deepscientist/artifact/service.py +1347 -111
- package/src/deepscientist/arxiv_library.py +275 -0
- package/src/deepscientist/bash_exec/service.py +9 -0
- package/src/deepscientist/bridges/builtins.py +2 -0
- package/src/deepscientist/bridges/connectors.py +447 -0
- package/src/deepscientist/channels/__init__.py +2 -0
- package/src/deepscientist/channels/builtins.py +3 -1
- package/src/deepscientist/channels/qq.py +1 -1
- package/src/deepscientist/channels/qq_gateway.py +1 -1
- package/src/deepscientist/channels/relay.py +7 -1
- package/src/deepscientist/channels/weixin.py +59 -0
- package/src/deepscientist/channels/weixin_ilink.py +317 -0
- package/src/deepscientist/config/models.py +22 -2
- package/src/deepscientist/config/service.py +431 -60
- package/src/deepscientist/connector/__init__.py +4 -0
- package/src/deepscientist/connector/connector_profiles.py +481 -0
- package/src/deepscientist/connector/lingzhu_support.py +668 -0
- package/src/deepscientist/connector/qq_profiles.py +206 -0
- package/src/deepscientist/connector/weixin_support.py +663 -0
- package/src/deepscientist/connector_profiles.py +1 -374
- package/src/deepscientist/connector_runtime.py +2 -0
- package/src/deepscientist/daemon/api/handlers.py +295 -5
- package/src/deepscientist/daemon/api/router.py +16 -1
- package/src/deepscientist/daemon/app.py +1130 -61
- package/src/deepscientist/doctor.py +5 -2
- package/src/deepscientist/gitops/diff.py +120 -29
- package/src/deepscientist/lingzhu_support.py +1 -182
- package/src/deepscientist/mcp/server.py +14 -5
- package/src/deepscientist/prompts/builder.py +29 -1
- package/src/deepscientist/qq_profiles.py +1 -196
- package/src/deepscientist/quest/node_traces.py +152 -2
- package/src/deepscientist/quest/service.py +169 -43
- package/src/deepscientist/quest/stage_views.py +172 -9
- package/src/deepscientist/registries/baseline.py +56 -4
- package/src/deepscientist/runners/codex.py +55 -3
- package/src/deepscientist/weixin_support.py +1 -0
- package/src/prompts/connectors/lingzhu.md +3 -1
- package/src/prompts/connectors/weixin.md +230 -0
- package/src/prompts/system.md +9 -0
- package/src/skills/idea/SKILL.md +16 -0
- package/src/skills/idea/references/literature-survey-template.md +24 -0
- package/src/skills/idea/references/related-work-playbook.md +4 -0
- package/src/skills/idea/references/selection-gate.md +9 -0
- package/src/skills/write/SKILL.md +1 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AiManusChatView-m2FNtwbn.js → AiManusChatView-D0mTXG4-.js} +156 -48
- package/src/ui/dist/assets/{AnalysisPlugin-BMTF8EGL.js → AnalysisPlugin-Db0cTXxm.js} +1 -1
- package/src/ui/dist/assets/{CliPlugin-BEOWgxCI.js → CliPlugin-DrV8je02.js} +164 -9
- package/src/ui/dist/assets/{CodeEditorPlugin-BCXvjqmb.js → CodeEditorPlugin-QXMSCH71.js} +8 -8
- package/src/ui/dist/assets/{CodeViewerPlugin-DaJcy3nD.js → CodeViewerPlugin-7hhtWj_E.js} +5 -5
- package/src/ui/dist/assets/{DocViewerPlugin-ByfeIq4K.js → DocViewerPlugin-BWMSnRJe.js} +3 -3
- package/src/ui/dist/assets/{GitDiffViewerPlugin-Cksf3VZ-.js → GitDiffViewerPlugin-7J9h9Vy_.js} +20 -21
- package/src/ui/dist/assets/{ImageViewerPlugin-CFz-OsTS.js → ImageViewerPlugin-CHJl_0lr.js} +5 -5
- package/src/ui/dist/assets/{LabCopilotPanel-CJ1cJzoX.js → LabCopilotPanel-1qSow1es.js} +11 -11
- package/src/ui/dist/assets/{LabPlugin-BF3dVJwa.js → LabPlugin-eQpPPCEp.js} +2 -1
- package/src/ui/dist/assets/{LatexPlugin-DDkwZ6Sj.js → LatexPlugin-BwRfi89Z.js} +7 -7
- package/src/ui/dist/assets/{MarkdownViewerPlugin-HAuvurcT.js → MarkdownViewerPlugin-836PVQWV.js} +4 -4
- package/src/ui/dist/assets/{MarketplacePlugin-BtoTYy2C.js → MarketplacePlugin-C2y_556i.js} +3 -3
- package/src/ui/dist/assets/{NotebookEditor-CSJYx7b-.js → NotebookEditor-BRzJbGsn.js} +12 -12
- package/src/ui/dist/assets/{NotebookEditor-DQgRezm_.js → NotebookEditor-DIX7Mlzu.js} +1 -1
- package/src/ui/dist/assets/{PdfLoader-DPa_-fv6.js → PdfLoader-DzRaTAlq.js} +14 -7
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BZpXOEjm.js → PdfMarkdownPlugin-DZUfIUnp.js} +73 -6
- package/src/ui/dist/assets/{PdfViewerPlugin-BT8a6wGR.js → PdfViewerPlugin-BwtICzue.js} +103 -34
- package/src/ui/dist/assets/PdfViewerPlugin-DQ11QcSf.css +3627 -0
- package/src/ui/dist/assets/{SearchPlugin-D_blveZi.js → SearchPlugin-DHeIAMsx.js} +1 -1
- package/src/ui/dist/assets/{TextViewerPlugin-Btx0M3hX.js → TextViewerPlugin-C3tCmFox.js} +5 -4
- package/src/ui/dist/assets/{VNCViewer-DImJO4rO.js → VNCViewer-CQsKVm3t.js} +10 -10
- package/src/ui/dist/assets/bot-BEA2vWuK.js +21 -0
- package/src/ui/dist/assets/branding/logo-rokid.png +0 -0
- package/src/ui/dist/assets/browser-BAcuE0Xj.js +2895 -0
- package/src/ui/dist/assets/{code-BUfXGJSl.js → code-XfbSR8K2.js} +1 -1
- package/src/ui/dist/assets/{file-content-VqamwI3X.js → file-content-BjxNaIfy.js} +1 -1
- package/src/ui/dist/assets/{file-diff-panel-C_wOoS7a.js → file-diff-panel-D_lLVQk0.js} +1 -1
- package/src/ui/dist/assets/{file-socket-D2bTuMVP.js → file-socket-D9x_5vlY.js} +1 -1
- package/src/ui/dist/assets/{image-BZkGJ4mM.js → image-BhWT33W1.js} +1 -1
- package/src/ui/dist/assets/{index-DdRW6RMJ.js → index--c4iXtuy.js} +12 -12
- package/src/ui/dist/assets/{index-CxkvSeKw.js → index-BDxipwrC.js} +2 -2
- package/src/ui/dist/assets/{index-DjggJovS.js → index-DZTZ8mWP.js} +14934 -9613
- package/src/ui/dist/assets/{index-DXZ1daiJ.css → index-Dqj-Mjb4.css} +2 -13
- package/src/ui/dist/assets/index-PJbSbPTy.js +25 -0
- package/src/ui/dist/assets/{monaco-DHMc7kKM.js → monaco-K8izTGgo.js} +1 -1
- package/src/ui/dist/assets/{pdf-effect-queue-DSw_D3RV.js → pdf-effect-queue-DfBors6y.js} +16 -1
- package/src/ui/dist/assets/pdf.worker.min-yatZIOMy.mjs +21 -0
- package/src/ui/dist/assets/{popover-B85oCgCS.js → popover-yFK1J4fL.js} +1 -1
- package/src/ui/dist/assets/{project-sync-DOMCcPac.js → project-sync-PENr2zcz.js} +1 -74
- package/src/ui/dist/assets/select-CAbJDfYv.js +1690 -0
- package/src/ui/dist/assets/{sigma-BO2rQrl3.js → sigma-DEuYJqTl.js} +1 -1
- package/src/ui/dist/assets/{index-D9QIGcmc.js → square-check-big-omoSUmcd.js} +2 -13
- package/src/ui/dist/assets/{trash-BsVEH_dV.js → trash--F119N47.js} +1 -1
- package/src/ui/dist/assets/{useCliAccess-b8L6JuZm.js → useCliAccess-D31UR23I.js} +1 -1
- package/src/ui/dist/assets/{useFileDiffOverlay-BY7uA9hV.js → useFileDiffOverlay-BH6KcMzq.js} +1 -1
- package/src/ui/dist/assets/{wrap-text-BwyVuUIK.js → wrap-text-CZ613PM5.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-RDpLugQP.js → zoom-out-BgDLAv3z.js} +1 -1
- package/src/ui/dist/index.html +2 -2
- package/src/ui/dist/assets/AutoFigurePlugin-BGxN8Umr.css +0 -3056
- package/src/ui/dist/assets/AutoFigurePlugin-DxPdMUNb.js +0 -8149
- package/src/ui/dist/assets/PdfViewerPlugin-BJXtIwj_.css +0 -260
- package/src/ui/dist/assets/Stepper-DH2k75Vo.js +0 -158
- package/src/ui/dist/assets/bibtex-B-Hqu0Sg.js +0 -189
- package/src/ui/dist/assets/file-utils--zJCPN1i.js +0 -109
- package/src/ui/dist/assets/message-square-FUIPIhU2.js +0 -16
- package/src/ui/dist/assets/pdfjs-DU1YE8WO.js +0 -3
- package/src/ui/dist/assets/tooltip-B1OspAkx.js +0 -108
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
+
import xml.etree.ElementTree as ET
|
|
4
5
|
from collections.abc import Callable
|
|
5
6
|
from dataclasses import dataclass
|
|
6
7
|
from html import unescape
|
|
@@ -10,6 +11,11 @@ from urllib.request import Request, urlopen
|
|
|
10
11
|
|
|
11
12
|
DEFAULT_TIMEOUT_SECONDS = 6
|
|
12
13
|
USER_AGENT = "DeepScientist/0.1"
|
|
14
|
+
ARXIV_API_URL = "http://export.arxiv.org/api/query?id_list={paper_id}"
|
|
15
|
+
ARXIV_XML_NAMESPACES = {
|
|
16
|
+
"atom": "http://www.w3.org/2005/Atom",
|
|
17
|
+
"arxiv": "http://arxiv.org/schemas/atom",
|
|
18
|
+
}
|
|
13
19
|
|
|
14
20
|
|
|
15
21
|
@dataclass(frozen=True)
|
|
@@ -115,14 +121,171 @@ def read_arxiv_content(paper_id: str, *, full_text: bool = False) -> dict[str, A
|
|
|
115
121
|
"guidance": "Pass an arXiv id like `2010.11929` or `2401.12345v2`.",
|
|
116
122
|
}
|
|
117
123
|
|
|
124
|
+
metadata = fetch_arxiv_metadata(normalized_id)
|
|
125
|
+
attempts: list[dict[str, Any]] = list(metadata.get("attempts") or [])
|
|
126
|
+
if not metadata.get("ok"):
|
|
127
|
+
mode = "full text" if full_text else "overview"
|
|
128
|
+
return {
|
|
129
|
+
"ok": False,
|
|
130
|
+
"paper_id": normalized_id,
|
|
131
|
+
"requested_full_text": full_text,
|
|
132
|
+
"error": f"Unable to fetch arXiv {mode} content for `{normalized_id}`.",
|
|
133
|
+
"attempts": attempts,
|
|
134
|
+
"guidance": "Use web search to confirm the paper id or try again later.",
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
merged: dict[str, Any] = {
|
|
138
|
+
"ok": True,
|
|
139
|
+
"paper_id": metadata.get("paper_id") or normalized_id,
|
|
140
|
+
"requested_full_text": full_text,
|
|
141
|
+
"title": metadata.get("title"),
|
|
142
|
+
"authors": metadata.get("authors") or [],
|
|
143
|
+
"categories": metadata.get("categories") or [],
|
|
144
|
+
"abstract": metadata.get("abstract") or "",
|
|
145
|
+
"published_at": metadata.get("published_at") or "",
|
|
146
|
+
"version": metadata.get("version"),
|
|
147
|
+
"primary_class": metadata.get("primary_class") or "",
|
|
148
|
+
"bibtex": metadata.get("bibtex") or "",
|
|
149
|
+
"metadata_source": metadata.get("metadata_source") or metadata.get("source"),
|
|
150
|
+
"abs_url": metadata.get("abs_url") or f"https://arxiv.org/abs/{normalized_id}",
|
|
151
|
+
"pdf_url": metadata.get("pdf_url") or f"https://arxiv.org/pdf/{normalized_id}.pdf",
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if full_text:
|
|
155
|
+
for plan in _full_text_plans(normalized_id):
|
|
156
|
+
try:
|
|
157
|
+
payload = _fetch_text(plan.url, timeout=plan.timeout)
|
|
158
|
+
parsed = plan.parser(normalized_id, payload, plan.url)
|
|
159
|
+
content = str(parsed.get("content") or "").strip()
|
|
160
|
+
if not content:
|
|
161
|
+
attempts.append(
|
|
162
|
+
{
|
|
163
|
+
"source": plan.name,
|
|
164
|
+
"url": plan.url,
|
|
165
|
+
"ok": False,
|
|
166
|
+
"error": "Empty response.",
|
|
167
|
+
}
|
|
168
|
+
)
|
|
169
|
+
continue
|
|
170
|
+
attempts.append(
|
|
171
|
+
{
|
|
172
|
+
"source": plan.name,
|
|
173
|
+
"url": plan.url,
|
|
174
|
+
"ok": True,
|
|
175
|
+
"content_mode": plan.content_mode,
|
|
176
|
+
}
|
|
177
|
+
)
|
|
178
|
+
return {
|
|
179
|
+
**merged,
|
|
180
|
+
"content_mode": plan.content_mode,
|
|
181
|
+
"source": plan.name,
|
|
182
|
+
"source_url": plan.url,
|
|
183
|
+
"summary_source": metadata.get("metadata_source") or metadata.get("source"),
|
|
184
|
+
"overview": "",
|
|
185
|
+
"overview_source": None,
|
|
186
|
+
"content": _build_full_text_content(merged, content),
|
|
187
|
+
"attempts": attempts,
|
|
188
|
+
"guidance": "Use web search for discovery. Use `artifact.arxiv(...)` after you already know the arXiv paper id.",
|
|
189
|
+
}
|
|
190
|
+
except Exception as exc: # noqa: BLE001
|
|
191
|
+
attempts.append(
|
|
192
|
+
{
|
|
193
|
+
"source": plan.name,
|
|
194
|
+
"url": plan.url,
|
|
195
|
+
"ok": False,
|
|
196
|
+
"error": _format_error(exc),
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
**merged,
|
|
202
|
+
"content_mode": "abstract",
|
|
203
|
+
"source": metadata.get("source"),
|
|
204
|
+
"source_url": metadata.get("source_url"),
|
|
205
|
+
"summary_source": metadata.get("metadata_source") or metadata.get("source"),
|
|
206
|
+
"overview": "",
|
|
207
|
+
"overview_source": None,
|
|
208
|
+
"content": _build_overview_content(merged, None),
|
|
209
|
+
"attempts": attempts,
|
|
210
|
+
"guidance": "Use web search for discovery. Use `artifact.arxiv(...)` after you already know the arXiv paper id.",
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
overview_text = ""
|
|
214
|
+
overview_markdown = ""
|
|
215
|
+
overview_source: str | None = None
|
|
216
|
+
overview_url: str | None = None
|
|
217
|
+
for plan in _overview_plans(normalized_id):
|
|
218
|
+
try:
|
|
219
|
+
payload = _fetch_text(plan.url, timeout=plan.timeout)
|
|
220
|
+
parsed = plan.parser(normalized_id, payload, plan.url)
|
|
221
|
+
candidate = str(parsed.get("abstract") or parsed.get("content") or "").strip()
|
|
222
|
+
candidate_markdown = str(parsed.get("overview_markdown") or parsed.get("content") or "").strip()
|
|
223
|
+
if not candidate:
|
|
224
|
+
attempts.append(
|
|
225
|
+
{
|
|
226
|
+
"source": plan.name,
|
|
227
|
+
"url": plan.url,
|
|
228
|
+
"ok": False,
|
|
229
|
+
"error": "Empty response.",
|
|
230
|
+
}
|
|
231
|
+
)
|
|
232
|
+
continue
|
|
233
|
+
attempts.append(
|
|
234
|
+
{
|
|
235
|
+
"source": plan.name,
|
|
236
|
+
"url": plan.url,
|
|
237
|
+
"ok": True,
|
|
238
|
+
"content_mode": "overview",
|
|
239
|
+
}
|
|
240
|
+
)
|
|
241
|
+
overview_text = candidate
|
|
242
|
+
overview_markdown = candidate_markdown
|
|
243
|
+
overview_source = plan.name
|
|
244
|
+
overview_url = plan.url
|
|
245
|
+
break
|
|
246
|
+
except Exception as exc: # noqa: BLE001
|
|
247
|
+
attempts.append(
|
|
248
|
+
{
|
|
249
|
+
"source": plan.name,
|
|
250
|
+
"url": plan.url,
|
|
251
|
+
"ok": False,
|
|
252
|
+
"error": _format_error(exc),
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return {
|
|
257
|
+
**merged,
|
|
258
|
+
"content_mode": "overview" if overview_text else "abstract",
|
|
259
|
+
"source": overview_source or metadata.get("source"),
|
|
260
|
+
"source_url": overview_url or metadata.get("source_url"),
|
|
261
|
+
"summary_source": overview_source or metadata.get("metadata_source") or metadata.get("source"),
|
|
262
|
+
"overview": overview_text,
|
|
263
|
+
"overview_markdown": overview_markdown,
|
|
264
|
+
"overview_source": overview_source,
|
|
265
|
+
"content": _build_overview_content(merged, overview_text or None),
|
|
266
|
+
"attempts": attempts,
|
|
267
|
+
"guidance": "Use web search for discovery. Use `artifact.arxiv(...)` after you already know the arXiv paper id.",
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def fetch_arxiv_metadata(paper_id: str) -> dict[str, Any]:
|
|
272
|
+
normalized_id = normalize_arxiv_id(paper_id)
|
|
273
|
+
if not normalized_id:
|
|
274
|
+
return {
|
|
275
|
+
"ok": False,
|
|
276
|
+
"paper_id": str(paper_id or "").strip(),
|
|
277
|
+
"error": "Invalid arXiv paper id.",
|
|
278
|
+
"attempts": [],
|
|
279
|
+
}
|
|
280
|
+
|
|
118
281
|
attempts: list[dict[str, Any]] = []
|
|
119
|
-
|
|
120
|
-
for plan in plans:
|
|
282
|
+
for plan in _metadata_plans(normalized_id):
|
|
121
283
|
try:
|
|
122
284
|
payload = _fetch_text(plan.url, timeout=plan.timeout)
|
|
123
285
|
parsed = plan.parser(normalized_id, payload, plan.url)
|
|
124
|
-
|
|
125
|
-
|
|
286
|
+
title = str(parsed.get("title") or "").strip()
|
|
287
|
+
abstract = str(parsed.get("abstract") or "").strip()
|
|
288
|
+
if not title and not abstract:
|
|
126
289
|
attempts.append(
|
|
127
290
|
{
|
|
128
291
|
"source": plan.name,
|
|
@@ -140,19 +303,30 @@ def read_arxiv_content(paper_id: str, *, full_text: bool = False) -> dict[str, A
|
|
|
140
303
|
"content_mode": plan.content_mode,
|
|
141
304
|
}
|
|
142
305
|
)
|
|
143
|
-
|
|
306
|
+
canonical_id = str(parsed.get("paper_id") or normalized_id).strip() or normalized_id
|
|
307
|
+
primary_class = str(parsed.get("primary_class") or "").strip()
|
|
308
|
+
published_at = str(parsed.get("published_at") or "").strip()
|
|
309
|
+
version = parsed.get("version")
|
|
310
|
+
metadata = {
|
|
144
311
|
"ok": True,
|
|
145
|
-
"paper_id":
|
|
146
|
-
"requested_full_text": full_text,
|
|
147
|
-
"content_mode": plan.content_mode,
|
|
312
|
+
"paper_id": canonical_id,
|
|
148
313
|
"source": plan.name,
|
|
149
314
|
"source_url": plan.url,
|
|
150
|
-
"
|
|
315
|
+
"metadata_source": plan.name,
|
|
316
|
+
"title": title or canonical_id,
|
|
151
317
|
"authors": parsed.get("authors") or [],
|
|
152
|
-
"
|
|
318
|
+
"categories": parsed.get("categories") or ([] if not primary_class else [primary_class]),
|
|
319
|
+
"abstract": abstract,
|
|
320
|
+
"published_at": published_at,
|
|
321
|
+
"version": version if isinstance(version, int) else _parse_arxiv_version(canonical_id),
|
|
322
|
+
"primary_class": primary_class or ((parsed.get("categories") or [None])[0] or ""),
|
|
323
|
+
"abs_url": str(parsed.get("abs_url") or f"https://arxiv.org/abs/{canonical_id}"),
|
|
324
|
+
"pdf_url": str(parsed.get("pdf_url") or f"https://arxiv.org/pdf/{canonical_id}.pdf"),
|
|
153
325
|
"attempts": attempts,
|
|
154
|
-
"guidance": "Use web search for discovery. Use `artifact.arxiv(...)` after you already know the arXiv paper id.",
|
|
155
326
|
}
|
|
327
|
+
metadata["bibtex"] = _build_bibtex(metadata)
|
|
328
|
+
metadata["content"] = _build_metadata_content(metadata)
|
|
329
|
+
return metadata
|
|
156
330
|
except Exception as exc: # noqa: BLE001
|
|
157
331
|
attempts.append(
|
|
158
332
|
{
|
|
@@ -163,14 +337,11 @@ def read_arxiv_content(paper_id: str, *, full_text: bool = False) -> dict[str, A
|
|
|
163
337
|
}
|
|
164
338
|
)
|
|
165
339
|
|
|
166
|
-
mode = "full text" if full_text else "overview"
|
|
167
340
|
return {
|
|
168
341
|
"ok": False,
|
|
169
342
|
"paper_id": normalized_id,
|
|
170
|
-
"
|
|
171
|
-
"error": f"Unable to fetch arXiv {mode} content for `{normalized_id}`.",
|
|
343
|
+
"error": f"Unable to fetch arXiv metadata for `{normalized_id}`.",
|
|
172
344
|
"attempts": attempts,
|
|
173
|
-
"guidance": "Use web search to confirm the paper id or try again later.",
|
|
174
345
|
}
|
|
175
346
|
|
|
176
347
|
|
|
@@ -201,18 +372,6 @@ def _overview_plans(paper_id: str) -> list[_FetchPlan]:
|
|
|
201
372
|
parser=_parse_markdown,
|
|
202
373
|
timeout=4,
|
|
203
374
|
),
|
|
204
|
-
_FetchPlan(
|
|
205
|
-
name="arxiv_abstract",
|
|
206
|
-
url=f"https://arxiv.org/abs/{paper_id}",
|
|
207
|
-
content_mode="abstract",
|
|
208
|
-
parser=_parse_arxiv_abstract_html,
|
|
209
|
-
),
|
|
210
|
-
_FetchPlan(
|
|
211
|
-
name="alphaxiv_full_text",
|
|
212
|
-
url=f"https://www.alphaxiv.org/abs/{paper_id}.md",
|
|
213
|
-
content_mode="full_text",
|
|
214
|
-
parser=_parse_markdown,
|
|
215
|
-
),
|
|
216
375
|
]
|
|
217
376
|
|
|
218
377
|
|
|
@@ -251,6 +410,24 @@ def _full_text_plans(paper_id: str) -> list[_FetchPlan]:
|
|
|
251
410
|
]
|
|
252
411
|
|
|
253
412
|
|
|
413
|
+
def _metadata_plans(paper_id: str) -> list[_FetchPlan]:
|
|
414
|
+
return [
|
|
415
|
+
_FetchPlan(
|
|
416
|
+
name="arxiv_api",
|
|
417
|
+
url=ARXIV_API_URL.format(paper_id=paper_id),
|
|
418
|
+
content_mode="abstract",
|
|
419
|
+
parser=_parse_arxiv_atom,
|
|
420
|
+
timeout=8,
|
|
421
|
+
),
|
|
422
|
+
_FetchPlan(
|
|
423
|
+
name="arxiv_abstract",
|
|
424
|
+
url=f"https://arxiv.org/abs/{paper_id}",
|
|
425
|
+
content_mode="abstract",
|
|
426
|
+
parser=_parse_arxiv_abstract_html,
|
|
427
|
+
),
|
|
428
|
+
]
|
|
429
|
+
|
|
430
|
+
|
|
254
431
|
def _fetch_text(url: str, *, timeout: int) -> str:
|
|
255
432
|
request = Request(
|
|
256
433
|
url,
|
|
@@ -270,18 +447,89 @@ def _parse_markdown(paper_id: str, payload: str, url: str) -> dict[str, Any]:
|
|
|
270
447
|
return {"content": ""}
|
|
271
448
|
title_match = re.search(r"^#\s+(.+)$", content, re.MULTILINE)
|
|
272
449
|
title = title_match.group(1).strip() if title_match else _first_nonempty_line(content)
|
|
450
|
+
abstract = _markdown_to_text(content, title=title)
|
|
273
451
|
return {
|
|
274
452
|
"title": title,
|
|
275
453
|
"authors": [],
|
|
454
|
+
"categories": [],
|
|
455
|
+
"abstract": abstract,
|
|
456
|
+
"overview_markdown": content,
|
|
276
457
|
"content": content,
|
|
277
458
|
}
|
|
278
459
|
|
|
279
460
|
|
|
461
|
+
def _parse_arxiv_atom(paper_id: str, payload: str, url: str) -> dict[str, Any]:
|
|
462
|
+
root = ET.fromstring(payload)
|
|
463
|
+
entry = root.find("atom:entry", ARXIV_XML_NAMESPACES)
|
|
464
|
+
if entry is None:
|
|
465
|
+
return {"content": ""}
|
|
466
|
+
|
|
467
|
+
title = _clean_inline_text(entry.findtext("atom:title", default="", namespaces=ARXIV_XML_NAMESPACES))
|
|
468
|
+
abstract = _clean_inline_text(entry.findtext("atom:summary", default="", namespaces=ARXIV_XML_NAMESPACES))
|
|
469
|
+
published_at = _clean_inline_text(
|
|
470
|
+
entry.findtext("atom:published", default="", namespaces=ARXIV_XML_NAMESPACES)
|
|
471
|
+
)
|
|
472
|
+
authors: list[str] = []
|
|
473
|
+
for author in entry.findall("atom:author", ARXIV_XML_NAMESPACES):
|
|
474
|
+
author_name = _clean_inline_text(
|
|
475
|
+
author.findtext("atom:name", default="", namespaces=ARXIV_XML_NAMESPACES)
|
|
476
|
+
)
|
|
477
|
+
if author_name:
|
|
478
|
+
authors.append(author_name)
|
|
479
|
+
|
|
480
|
+
categories: list[str] = []
|
|
481
|
+
primary_class = ""
|
|
482
|
+
primary_node = entry.find("arxiv:primary_category", ARXIV_XML_NAMESPACES)
|
|
483
|
+
if primary_node is not None:
|
|
484
|
+
primary_class = _clean_inline_text(primary_node.attrib.get("term", ""))
|
|
485
|
+
if primary_class:
|
|
486
|
+
categories.append(primary_class)
|
|
487
|
+
for category in entry.findall("atom:category", ARXIV_XML_NAMESPACES):
|
|
488
|
+
term = _clean_inline_text(category.attrib.get("term", ""))
|
|
489
|
+
if term and term not in categories:
|
|
490
|
+
categories.append(term)
|
|
491
|
+
|
|
492
|
+
entry_id = _clean_inline_text(entry.findtext("atom:id", default="", namespaces=ARXIV_XML_NAMESPACES))
|
|
493
|
+
entry_id_normalized = normalize_arxiv_id(entry_id) or paper_id
|
|
494
|
+
canonical_id = normalize_arxiv_id(paper_id) or _strip_arxiv_version(entry_id_normalized) or paper_id
|
|
495
|
+
version = _parse_arxiv_version(entry_id_normalized)
|
|
496
|
+
abs_url = f"https://arxiv.org/abs/{canonical_id}"
|
|
497
|
+
pdf_url = f"https://arxiv.org/pdf/{canonical_id}.pdf"
|
|
498
|
+
return {
|
|
499
|
+
"paper_id": canonical_id,
|
|
500
|
+
"title": title,
|
|
501
|
+
"authors": authors,
|
|
502
|
+
"categories": categories,
|
|
503
|
+
"primary_class": primary_class or (categories[0] if categories else ""),
|
|
504
|
+
"published_at": _normalize_published_at(published_at),
|
|
505
|
+
"version": version,
|
|
506
|
+
"abstract": abstract,
|
|
507
|
+
"abs_url": abs_url,
|
|
508
|
+
"pdf_url": pdf_url,
|
|
509
|
+
"content": _build_metadata_content(
|
|
510
|
+
{
|
|
511
|
+
"paper_id": canonical_id,
|
|
512
|
+
"title": title,
|
|
513
|
+
"authors": authors,
|
|
514
|
+
"categories": categories,
|
|
515
|
+
"primary_class": primary_class or (categories[0] if categories else ""),
|
|
516
|
+
"published_at": _normalize_published_at(published_at),
|
|
517
|
+
"version": version,
|
|
518
|
+
"abstract": abstract,
|
|
519
|
+
"abs_url": abs_url,
|
|
520
|
+
"pdf_url": pdf_url,
|
|
521
|
+
}
|
|
522
|
+
),
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
|
|
280
526
|
def _parse_arxiv_abstract_html(paper_id: str, payload: str, url: str) -> dict[str, Any]:
|
|
281
527
|
title = _match_first(payload, r'<meta name="citation_title" content="([^"]+)"')
|
|
282
528
|
if not title:
|
|
283
529
|
title = _match_first(payload, r"<title>(.*?)</title>", flags=re.IGNORECASE | re.DOTALL)
|
|
284
530
|
authors = re.findall(r'<meta name="citation_author" content="([^"]+)"', payload)
|
|
531
|
+
categories = _parse_arxiv_categories(payload)
|
|
532
|
+
published_at = _normalize_published_at(_match_first(payload, r'<meta name="citation_date" content="([^"]+)"'))
|
|
285
533
|
abstract = _match_first(
|
|
286
534
|
payload,
|
|
287
535
|
r'<span class="descriptor">Abstract:</span>(.*?)</blockquote>',
|
|
@@ -290,18 +538,22 @@ def _parse_arxiv_abstract_html(paper_id: str, payload: str, url: str) -> dict[st
|
|
|
290
538
|
abstract = _clean_inline_text(abstract)
|
|
291
539
|
if not abstract:
|
|
292
540
|
abstract = _clean_inline_text(_extract_text(payload))
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
lines.append(f"- paper_id: {paper_id}")
|
|
297
|
-
lines.append("- source: arXiv abstract page")
|
|
298
|
-
if authors:
|
|
299
|
-
lines.append(f"- authors: {', '.join(_clean_inline_text(author) for author in authors)}")
|
|
300
|
-
lines.extend(["", "## Abstract", "", abstract or "Abstract unavailable."])
|
|
301
|
-
return {
|
|
541
|
+
primary_class = categories[0] if categories else ""
|
|
542
|
+
metadata = {
|
|
543
|
+
"paper_id": paper_id,
|
|
302
544
|
"title": _clean_inline_text(title),
|
|
303
545
|
"authors": [_clean_inline_text(author) for author in authors if _clean_inline_text(author)],
|
|
304
|
-
"
|
|
546
|
+
"categories": categories,
|
|
547
|
+
"abstract": abstract,
|
|
548
|
+
"published_at": published_at,
|
|
549
|
+
"version": _parse_arxiv_version(paper_id),
|
|
550
|
+
"primary_class": primary_class,
|
|
551
|
+
"abs_url": f"https://arxiv.org/abs/{paper_id}",
|
|
552
|
+
"pdf_url": f"https://arxiv.org/pdf/{paper_id}.pdf",
|
|
553
|
+
}
|
|
554
|
+
return {
|
|
555
|
+
**metadata,
|
|
556
|
+
"content": _build_metadata_content(metadata),
|
|
305
557
|
}
|
|
306
558
|
|
|
307
559
|
|
|
@@ -324,6 +576,8 @@ def _parse_article_html(paper_id: str, payload: str, url: str) -> dict[str, Any]
|
|
|
324
576
|
return {
|
|
325
577
|
"title": cleaned_title,
|
|
326
578
|
"authors": [],
|
|
579
|
+
"categories": [],
|
|
580
|
+
"abstract": _summarize_text(text),
|
|
327
581
|
"content": "\n".join(lines).strip(),
|
|
328
582
|
}
|
|
329
583
|
|
|
@@ -361,6 +615,199 @@ def _first_nonempty_line(text: str) -> str:
|
|
|
361
615
|
return ""
|
|
362
616
|
|
|
363
617
|
|
|
618
|
+
def _markdown_to_text(content: str, *, title: str | None = None) -> str:
|
|
619
|
+
text = re.sub(r"```.*?```", " ", content, flags=re.DOTALL)
|
|
620
|
+
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
|
|
621
|
+
text = re.sub(r"^\s*[-*+]\s+", "", text, flags=re.MULTILINE)
|
|
622
|
+
text = re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", text)
|
|
623
|
+
cleaned = _clean_inline_text(text)
|
|
624
|
+
if title:
|
|
625
|
+
title_prefix = _clean_inline_text(title)
|
|
626
|
+
if cleaned.lower().startswith(title_prefix.lower()):
|
|
627
|
+
cleaned = cleaned[len(title_prefix) :].strip(" :-")
|
|
628
|
+
return _summarize_text(cleaned)
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _summarize_text(text: str, *, limit: int = 1600) -> str:
|
|
632
|
+
cleaned = _clean_inline_text(text)
|
|
633
|
+
if len(cleaned) <= limit:
|
|
634
|
+
return cleaned
|
|
635
|
+
return f"{cleaned[: max(0, limit - 1)].rstrip()}…"
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def _parse_arxiv_categories(payload: str) -> list[str]:
|
|
639
|
+
raw = _match_first(
|
|
640
|
+
payload,
|
|
641
|
+
r'<td class="tablecell subjects">(.*?)</td>',
|
|
642
|
+
flags=re.IGNORECASE | re.DOTALL,
|
|
643
|
+
)
|
|
644
|
+
cleaned = _clean_inline_text(raw)
|
|
645
|
+
if not cleaned:
|
|
646
|
+
return []
|
|
647
|
+
parts = [part.strip() for part in cleaned.split(";") if part.strip()]
|
|
648
|
+
return parts
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def _normalize_published_at(value: str) -> str:
|
|
652
|
+
raw = _clean_inline_text(value)
|
|
653
|
+
if not raw:
|
|
654
|
+
return ""
|
|
655
|
+
if "T" in raw:
|
|
656
|
+
return raw.split("T", 1)[0]
|
|
657
|
+
if re.fullmatch(r"\d{4}-\d{2}-\d{2}", raw):
|
|
658
|
+
return raw
|
|
659
|
+
month_match = re.search(r"([A-Za-z]{3,9})\s+(\d{1,2}),\s*(\d{4})", raw)
|
|
660
|
+
if month_match:
|
|
661
|
+
month_lookup = {
|
|
662
|
+
"jan": "01",
|
|
663
|
+
"feb": "02",
|
|
664
|
+
"mar": "03",
|
|
665
|
+
"apr": "04",
|
|
666
|
+
"may": "05",
|
|
667
|
+
"jun": "06",
|
|
668
|
+
"jul": "07",
|
|
669
|
+
"aug": "08",
|
|
670
|
+
"sep": "09",
|
|
671
|
+
"oct": "10",
|
|
672
|
+
"nov": "11",
|
|
673
|
+
"dec": "12",
|
|
674
|
+
}
|
|
675
|
+
month = month_lookup.get(month_match.group(1)[:3].lower())
|
|
676
|
+
if month:
|
|
677
|
+
return f"{month_match.group(3)}-{month}-{int(month_match.group(2)):02d}"
|
|
678
|
+
year_match = re.search(r"\b(\d{4})\b", raw)
|
|
679
|
+
return year_match.group(1) if year_match else raw
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def _parse_arxiv_version(paper_id: str) -> int | None:
|
|
683
|
+
match = re.search(r"v(\d+)$", str(paper_id or "").strip(), re.IGNORECASE)
|
|
684
|
+
if not match:
|
|
685
|
+
return None
|
|
686
|
+
try:
|
|
687
|
+
return int(match.group(1))
|
|
688
|
+
except ValueError:
|
|
689
|
+
return None
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def _strip_arxiv_version(paper_id: str) -> str:
|
|
693
|
+
return re.sub(r"v\d+$", "", str(paper_id or "").strip(), flags=re.IGNORECASE)
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _bibtex_year(published_at: str) -> str:
|
|
697
|
+
match = re.search(r"\b(\d{4})\b", str(published_at or "").strip())
|
|
698
|
+
return match.group(1) if match else ""
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def _bibtex_key_author(authors: list[str]) -> str:
|
|
702
|
+
if not authors:
|
|
703
|
+
return "unknown"
|
|
704
|
+
parts = re.split(r"[\s,]+", authors[0].strip())
|
|
705
|
+
cleaned = [part for part in parts if part]
|
|
706
|
+
if not cleaned:
|
|
707
|
+
return "unknown"
|
|
708
|
+
return re.sub(r"[^a-z0-9]+", "", cleaned[-1].lower()) or "unknown"
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def _citation_key(paper_id: str, authors: list[str], published_at: str) -> str:
|
|
712
|
+
year = _bibtex_year(published_at) or "0000"
|
|
713
|
+
normalized_paper_id = re.sub(r"v\d+$", "", str(paper_id or "").lower())
|
|
714
|
+
base_id = re.sub(r"[^a-z0-9]+", "", normalized_paper_id)
|
|
715
|
+
if not base_id:
|
|
716
|
+
base_id = "arxiv"
|
|
717
|
+
return f"{_bibtex_key_author(authors)}{year}{base_id}"
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _build_bibtex(metadata: dict[str, Any]) -> str:
|
|
721
|
+
paper_id = str(metadata.get("paper_id") or "").strip()
|
|
722
|
+
title = str(metadata.get("title") or "").strip()
|
|
723
|
+
authors = [str(item).strip() for item in (metadata.get("authors") or []) if str(item).strip()]
|
|
724
|
+
published_at = str(metadata.get("published_at") or "").strip()
|
|
725
|
+
primary_class = str(metadata.get("primary_class") or "").strip()
|
|
726
|
+
year = _bibtex_year(published_at) or "0000"
|
|
727
|
+
lines = [
|
|
728
|
+
f"@misc{{{_citation_key(paper_id, authors, published_at)},",
|
|
729
|
+
f" title={{{title}}},",
|
|
730
|
+
f" author={{{' and '.join(authors)}}},",
|
|
731
|
+
f" year={{{year}}},",
|
|
732
|
+
f" eprint={{{paper_id}}},",
|
|
733
|
+
" archivePrefix={arXiv},",
|
|
734
|
+
]
|
|
735
|
+
if primary_class:
|
|
736
|
+
lines.append(f" primaryClass={{{primary_class}}},")
|
|
737
|
+
lines[-1] = lines[-1].replace(",", "")
|
|
738
|
+
lines.append("}")
|
|
739
|
+
return "\n".join(lines)
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
def _build_metadata_lines(metadata: dict[str, Any]) -> list[str]:
|
|
743
|
+
paper_id = str(metadata.get("paper_id") or "").strip()
|
|
744
|
+
title = str(metadata.get("title") or "").strip() or paper_id
|
|
745
|
+
authors = [str(item).strip() for item in (metadata.get("authors") or []) if str(item).strip()]
|
|
746
|
+
categories = [str(item).strip() for item in (metadata.get("categories") or []) if str(item).strip()]
|
|
747
|
+
published_at = str(metadata.get("published_at") or "").strip()
|
|
748
|
+
version = metadata.get("version")
|
|
749
|
+
lines = [f"# {title}", "", f"- paper_id: {paper_id}"]
|
|
750
|
+
if authors:
|
|
751
|
+
lines.append(f"- authors: {', '.join(authors)}")
|
|
752
|
+
if categories:
|
|
753
|
+
lines.append(f"- categories: {', '.join(categories)}")
|
|
754
|
+
if published_at:
|
|
755
|
+
lines.append(f"- published_at: {published_at}")
|
|
756
|
+
if isinstance(version, int):
|
|
757
|
+
lines.append(f"- version: v{version}")
|
|
758
|
+
lines.append(f"- abs_url: {str(metadata.get('abs_url') or f'https://arxiv.org/abs/{paper_id}')}")
|
|
759
|
+
return lines
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def _build_metadata_content(metadata: dict[str, Any]) -> str:
|
|
763
|
+
lines = _build_metadata_lines(metadata)
|
|
764
|
+
abstract = str(metadata.get("abstract") or "").strip()
|
|
765
|
+
lines.extend(["", "## Abstract", "", abstract or "Abstract unavailable."])
|
|
766
|
+
return "\n".join(lines).strip()
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def _build_overview_content(metadata: dict[str, Any], overview_text: str | None) -> str:
|
|
770
|
+
lines = _build_metadata_lines(metadata)
|
|
771
|
+
cleaned_overview = _clean_inline_text(overview_text or "")
|
|
772
|
+
abstract = str(metadata.get("abstract") or "").strip()
|
|
773
|
+
if cleaned_overview:
|
|
774
|
+
lines.extend(["", "## Summary", "", cleaned_overview])
|
|
775
|
+
if abstract and _clean_inline_text(abstract).lower() != cleaned_overview.lower():
|
|
776
|
+
lines.extend(["", "## Abstract", "", abstract])
|
|
777
|
+
else:
|
|
778
|
+
lines.extend(["", "## Abstract", "", abstract or "Abstract unavailable."])
|
|
779
|
+
return "\n".join(lines).strip()
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def _strip_duplicate_heading(content: str, title: str) -> str:
|
|
783
|
+
if not content:
|
|
784
|
+
return ""
|
|
785
|
+
lines = content.splitlines()
|
|
786
|
+
cleaned_title = _clean_inline_text(title)
|
|
787
|
+
while lines:
|
|
788
|
+
current = lines[0].strip()
|
|
789
|
+
if not current:
|
|
790
|
+
lines.pop(0)
|
|
791
|
+
continue
|
|
792
|
+
stripped = re.sub(r"^#+\s*", "", current)
|
|
793
|
+
if cleaned_title and _clean_inline_text(stripped).lower() == cleaned_title.lower():
|
|
794
|
+
lines.pop(0)
|
|
795
|
+
continue
|
|
796
|
+
break
|
|
797
|
+
return "\n".join(lines).strip()
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def _build_full_text_content(metadata: dict[str, Any], raw_content: str) -> str:
|
|
801
|
+
lines = _build_metadata_lines(metadata)
|
|
802
|
+
abstract = str(metadata.get("abstract") or "").strip()
|
|
803
|
+
if abstract:
|
|
804
|
+
lines.extend(["", "## Abstract", "", abstract])
|
|
805
|
+
body = _strip_duplicate_heading(raw_content, str(metadata.get("title") or ""))
|
|
806
|
+
if body:
|
|
807
|
+
lines.extend(["", "## Full Text", "", body])
|
|
808
|
+
return "\n".join(lines).strip()
|
|
809
|
+
|
|
810
|
+
|
|
364
811
|
def _format_error(exc: Exception) -> str:
|
|
365
812
|
message = str(exc).strip()
|
|
366
813
|
return message or exc.__class__.__name__
|
|
@@ -506,9 +506,7 @@ def _normalize_metric_entry(metric: object, *, fallback_id: str | None = None) -
|
|
|
506
506
|
metric_id = as_metric_id(
|
|
507
507
|
metric.get("metric_id") or metric.get("id") or metric.get("name") or fallback_id,
|
|
508
508
|
)
|
|
509
|
-
direction =
|
|
510
|
-
if direction not in {"maximize", "minimize"}:
|
|
511
|
-
direction = infer_metric_direction(metric_id)
|
|
509
|
+
direction = normalize_metric_direction(metric.get("direction"), metric_id=metric_id)
|
|
512
510
|
decimals_raw = metric.get("decimals")
|
|
513
511
|
decimals = int(decimals_raw) if isinstance(decimals_raw, int) else None
|
|
514
512
|
chart_group = str(metric.get("chart_group") or "default").strip() or "default"
|