deepresearch-flow 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,686 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+ import re
8
+ from typing import Any
9
+
10
+ import httpx
11
+ from starlette.applications import Starlette
12
+ from starlette.middleware import Middleware
13
+ from starlette.middleware.base import BaseHTTPMiddleware
14
+ from starlette.requests import Request
15
+ from starlette.responses import Response
16
+ from starlette.routing import Mount
17
+
18
+ from fastmcp import FastMCP
19
+
20
+ from deepresearch_flow.paper.snapshot.common import ApiLimits, _open_ro_conn
21
+ from deepresearch_flow.paper.snapshot.text import merge_adjacent_markers, remove_cjk_spaces, rewrite_search_query
22
+
23
+ _SUPPORTED_PROTOCOL_VERSIONS = {"2025-03-26", "2025-06-18"}
24
+ _DEFAULT_MAX_CHARS = 50_000
25
+ _DEFAULT_TIMEOUT = 10.0
26
+ _PAPER_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]+$')
27
+
28
+
29
+ class McpToolError(Exception):
30
+ """MCP tool exception for standardized error handling.
31
+
32
+ FastMCP will catch this exception and convert it to a proper
33
+ JSON-RPC error response that the client can understand.
34
+ """
35
+
36
+ def __init__(self, code: str, message: str, **details):
37
+ self.code = code
38
+ self.message = message
39
+ self.details = details
40
+ super().__init__(message)
41
+
42
+ def to_dict(self) -> dict[str, Any]:
43
+ """Convert to error dictionary format."""
44
+ return {"error": self.code, "message": self.message, **self.details}
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class McpSnapshotConfig:
49
+ snapshot_db: Path
50
+ static_base_url: str
51
+ static_export_dir: Path | None
52
+ limits: ApiLimits
53
+ origin_allowlist: list[str]
54
+ max_chars_default: int = _DEFAULT_MAX_CHARS
55
+ http_timeout: float = _DEFAULT_TIMEOUT
56
+ max_paper_id_length: int = 64
57
+ # HTTP client stored in object __dict__ to avoid dataclass frozen restriction
58
+ _http_client: httpx.Client | None = field(default=None, repr=False, compare=False)
59
+
60
+ def get_http_client(self) -> httpx.Client:
61
+ """Get or create a shared HTTP client with connection pooling."""
62
+ if self._http_client is None:
63
+ object.__setattr__(
64
+ self,
65
+ '_http_client',
66
+ httpx.Client(
67
+ timeout=self.http_timeout,
68
+ follow_redirects=True,
69
+ limits=httpx.Limits(
70
+ max_keepalive_connections=10,
71
+ max_connections=20
72
+ )
73
+ )
74
+ )
75
+ return self._http_client
76
+
77
+
78
+ class McpRequestGuardMiddleware(BaseHTTPMiddleware):
79
+ def __init__(self, app, *, origin_allowlist: list[str]) -> None:
80
+ super().__init__(app)
81
+ self._allowlist = [origin.lower() for origin in origin_allowlist]
82
+
83
+ async def dispatch(self, request: Request, call_next): # type: ignore[override]
84
+ if request.method == "GET":
85
+ return Response("Method Not Allowed", status_code=405)
86
+ if request.method not in {"POST", "OPTIONS"}:
87
+ return Response("Method Not Allowed", status_code=405)
88
+ origin = request.headers.get("origin")
89
+ if origin and not self._is_allowed_origin(origin):
90
+ return Response("Forbidden", status_code=403)
91
+ protocol = request.headers.get("mcp-protocol-version")
92
+ if protocol and protocol not in _SUPPORTED_PROTOCOL_VERSIONS:
93
+ return Response("Bad Request", status_code=400)
94
+ return await call_next(request)
95
+
96
+ def _is_allowed_origin(self, origin: str) -> bool:
97
+ if not self._allowlist or "*" in self._allowlist:
98
+ return True
99
+ return origin.lower() in self._allowlist
100
+
101
+
102
+ _CONFIG: McpSnapshotConfig | None = None
103
+ mcp = FastMCP("Paper DB MCP")
104
+
105
+
106
+ def configure(config: McpSnapshotConfig) -> None:
107
+ global _CONFIG
108
+ _CONFIG = config
109
+
110
+
111
+ def create_mcp_app(config: McpSnapshotConfig) -> tuple[Starlette, Any]:
112
+ """Create MCP app with middleware and return it along with lifespan.
113
+
114
+ Returns:
115
+ Tuple of (wrapped_app, lifespan_context) for use by parent Starlette.
116
+ """
117
+ configure(config)
118
+ mcp_app = mcp.http_app(path="/", stateless_http=True)
119
+ wrapped = Starlette(
120
+ routes=[Mount("/", app=mcp_app)],
121
+ middleware=[
122
+ Middleware(McpRequestGuardMiddleware, origin_allowlist=config.origin_allowlist),
123
+ ],
124
+ )
125
+ return wrapped, mcp_app.lifespan
126
+
127
+
128
+ def _get_config() -> McpSnapshotConfig:
129
+ if _CONFIG is None:
130
+ raise RuntimeError("MCP server not configured")
131
+ return _CONFIG
132
+
133
+
134
+ def _validate_query(query: str, cfg: McpSnapshotConfig) -> str:
135
+ """Validate search query string.
136
+
137
+ Raises:
138
+ McpToolError: If query is invalid or too long.
139
+ """
140
+ if not query or not query.strip():
141
+ raise McpToolError("invalid_query", "Query cannot be empty")
142
+ if len(query) > cfg.limits.max_query_length:
143
+ raise McpToolError(
144
+ "query_too_long",
145
+ f"Query exceeds maximum length of {cfg.limits.max_query_length}",
146
+ length=len(query),
147
+ max_length=cfg.limits.max_query_length
148
+ )
149
+ return query.strip()
150
+
151
+
152
+ def _validate_paper_id(paper_id: str, cfg: McpSnapshotConfig) -> str:
153
+ """Validate paper ID format.
154
+
155
+ Raises:
156
+ McpToolError: If paper_id is invalid.
157
+ """
158
+ if not paper_id:
159
+ raise McpToolError("invalid_paper_id", "Paper ID cannot be empty")
160
+ if len(paper_id) > cfg.max_paper_id_length:
161
+ raise McpToolError(
162
+ "paper_id_too_long",
163
+ f"Paper ID exceeds maximum length of {cfg.max_paper_id_length}",
164
+ length=len(paper_id),
165
+ max_length=cfg.max_paper_id_length
166
+ )
167
+ if not _PAPER_ID_PATTERN.match(paper_id):
168
+ raise McpToolError(
169
+ "invalid_paper_id_format",
170
+ "Paper ID must contain only alphanumeric characters, hyphens, and underscores",
171
+ paper_id=paper_id
172
+ )
173
+ return paper_id
174
+
175
+
176
+ def _truncate(text: str, max_chars: int | None) -> str:
177
+ """Truncate text with marker."""
178
+ if max_chars is None or max_chars <= 0:
179
+ return text
180
+ if len(text) <= max_chars:
181
+ return text
182
+ remaining = len(text) - max_chars
183
+ return f"{text[:max_chars]}\n[truncated: {remaining} more chars]"
184
+
185
+
186
+ def _read_static_text(rel_path: str) -> str | None:
187
+ """Read static text from local export directory if available."""
188
+ cfg = _get_config()
189
+ if cfg.static_export_dir:
190
+ path = cfg.static_export_dir / rel_path
191
+ if path.exists():
192
+ return path.read_text(encoding="utf-8")
193
+ return None
194
+
195
+
196
+ def _fetch_static_text(rel_path: str) -> str:
197
+ """Fetch static text from HTTP remote."""
198
+ cfg = _get_config()
199
+ if cfg.static_base_url:
200
+ base = cfg.static_base_url.rstrip("/")
201
+ url = f"{base}/{rel_path.lstrip('/')}"
202
+ client = cfg.get_http_client()
203
+ response = client.get(url)
204
+ response.raise_for_status()
205
+ return response.text
206
+ raise FileNotFoundError("static_base_url not configured")
207
+
208
+
209
+ def _load_static_text(rel_path: str) -> str:
210
+ """Load static text with fallback: local first, then HTTP."""
211
+ try:
212
+ text = _read_static_text(rel_path)
213
+ if text is not None:
214
+ return text
215
+ return _fetch_static_text(rel_path)
216
+ except httpx.HTTPStatusError as exc:
217
+ raise RuntimeError(f"asset_fetch_failed:{exc.response.status_code}") from exc
218
+ except httpx.RequestError as exc:
219
+ raise RuntimeError("asset_fetch_failed:request_error") from exc
220
+ except FileNotFoundError as exc:
221
+ raise RuntimeError("asset_fetch_failed:not_configured") from exc
222
+
223
+
224
+ def _load_summary_json(paper_id: str, template: str | None) -> tuple[str | None, list[str] | None]:
225
+ """Load summary JSON content and return available templates list."""
226
+ cfg = _get_config()
227
+ conn = _open_ro_conn(cfg.snapshot_db)
228
+ try:
229
+ row = conn.execute(
230
+ "SELECT preferred_summary_template, summary_asset_paths_json FROM paper WHERE paper_id = ?",
231
+ (paper_id,),
232
+ ).fetchone()
233
+ if not row:
234
+ return None, None
235
+ preferred = row["preferred_summary_template"]
236
+ asset_paths = json.loads(row["summary_asset_paths_json"] or "{}")
237
+ available = sorted(asset_paths.keys())
238
+ selected = template if template else preferred
239
+ if not selected or selected not in asset_paths:
240
+ return None, available
241
+ rel_path = asset_paths[selected]
242
+ return _load_static_text(rel_path), available
243
+ finally:
244
+ conn.close()
245
+
246
+
247
+ def _load_source_markdown(paper_id: str) -> str | None:
248
+ """Load source markdown for paper."""
249
+ cfg = _get_config()
250
+ conn = _open_ro_conn(cfg.snapshot_db)
251
+ try:
252
+ row = conn.execute(
253
+ "SELECT source_md_content_hash FROM paper WHERE paper_id = ?",
254
+ (paper_id,),
255
+ ).fetchone()
256
+ if not row or not row["source_md_content_hash"]:
257
+ return None
258
+ rel_path = f"md/{row['source_md_content_hash']}.md"
259
+ return _load_static_text(rel_path)
260
+ finally:
261
+ conn.close()
262
+
263
+
264
+ def _load_translation_markdown(paper_id: str, lang: str) -> str | None:
265
+ """Load translation markdown for paper and language."""
266
+ cfg = _get_config()
267
+ conn = _open_ro_conn(cfg.snapshot_db)
268
+ try:
269
+ row = conn.execute(
270
+ "SELECT translations_json FROM paper WHERE paper_id = ?",
271
+ (paper_id,),
272
+ ).fetchone()
273
+ if not row or not row["translations_json"]:
274
+ return None
275
+ translations = json.loads(row["translations_json"])
276
+ rel_path = translations.get(lang)
277
+ if not rel_path:
278
+ return None
279
+ return _load_static_text(rel_path)
280
+ finally:
281
+ conn.close()
282
+
283
+
284
+ # ==================== MCP Tools ====================
285
+
286
+ @mcp.tool()
287
+ def search_papers(query: str, limit: int = 10) -> list[dict[str, Any]]:
288
+ """Full-text search for papers (relevance-ranked).
289
+
290
+ Use when you only have topic keywords.
291
+ Returns paper_id, title, year, venue, snippet_markdown.
292
+ """
293
+ cfg = _get_config()
294
+ query = _validate_query(query, cfg)
295
+ limit = min(max(1, int(limit)), cfg.limits.max_page_size)
296
+
297
+ conn = _open_ro_conn(cfg.snapshot_db)
298
+ try:
299
+ cur = conn.execute(
300
+ """
301
+ SELECT paper_id, title, year, venue, abstract
302
+ FROM paper_search
303
+ WHERE paper_search MATCH ?
304
+ ORDER BY rank
305
+ LIMIT ?
306
+ """,
307
+ (rewrite_search_query(query), limit),
308
+ )
309
+ rows = cur.fetchall()
310
+ results: list[dict[str, Any]] = []
311
+ for row in rows:
312
+ snippet = str(row["abstract"] or "")
313
+ snippet = remove_cjk_spaces(snippet)
314
+ snippet, markers = merge_adjacent_markers(snippet)
315
+ results.append({
316
+ "paper_id": str(row["paper_id"]),
317
+ "title": str(row["title"]),
318
+ "year": str(row["year"]),
319
+ "venue": str(row["venue"]),
320
+ "snippet_markdown": snippet,
321
+ })
322
+ return results
323
+ finally:
324
+ conn.close()
325
+
326
+
327
+ @mcp.tool()
328
+ def search_papers_by_keyword(keyword: str, limit: int = 10) -> list[dict[str, Any]]:
329
+ """Search papers by keyword/tag (exact match).
330
+
331
+ Use when you know specific keywords or tags.
332
+ """
333
+ cfg = _get_config()
334
+ limit = min(max(1, int(limit)), cfg.limits.max_page_size)
335
+
336
+ conn = _open_ro_conn(cfg.snapshot_db)
337
+ try:
338
+ rows = conn.execute(
339
+ """
340
+ SELECT DISTINCT p.paper_id, p.title, p.year, p.venue, p.abstract
341
+ FROM paper p
342
+ JOIN paper_keyword pk ON pk.paper_id = p.paper_id
343
+ JOIN keyword k ON k.keyword_id = pk.keyword_id
344
+ WHERE k.value LIKE ?
345
+ ORDER BY p.year DESC, p.title ASC
346
+ LIMIT ?
347
+ """,
348
+ (f"%{keyword}%", limit),
349
+ ).fetchall()
350
+ results: list[dict[str, Any]] = []
351
+ for row in rows:
352
+ snippet = str(row["abstract"] or "")
353
+ snippet = remove_cjk_spaces(snippet)
354
+ snippet, markers = merge_adjacent_markers(snippet)
355
+ results.append({
356
+ "paper_id": str(row["paper_id"]),
357
+ "title": str(row["title"]),
358
+ "year": str(row["year"]),
359
+ "venue": str(row["venue"]),
360
+ "snippet_markdown": snippet,
361
+ })
362
+ return results
363
+ finally:
364
+ conn.close()
365
+
366
+
367
+ @mcp.tool()
368
+ def get_paper_metadata(paper_id: str) -> dict[str, Any]:
369
+ """Get paper metadata and available summary templates.
370
+
371
+ Call this first before requesting a summary to discover available templates.
372
+ """
373
+ cfg = _get_config()
374
+ paper_id = _validate_paper_id(paper_id, cfg)
375
+
376
+ conn = _open_ro_conn(cfg.snapshot_db)
377
+ try:
378
+ row = conn.execute(
379
+ """
380
+ SELECT paper_id, title, year, venue, doi, arxiv_id, openreview_id, paper_pw_url,
381
+ preferred_summary_template, summary_asset_paths_json
382
+ FROM paper WHERE paper_id = ?
383
+ """,
384
+ (paper_id,),
385
+ ).fetchone()
386
+ if not row:
387
+ raise McpToolError("not_found", "paper not found", paper_id=paper_id)
388
+
389
+ asset_paths = json.loads(row["summary_asset_paths_json"] or "{}")
390
+ available = sorted(asset_paths.keys())
391
+ return {
392
+ "paper_id": str(row["paper_id"]),
393
+ "title": str(row["title"]),
394
+ "year": str(row["year"]),
395
+ "venue": str(row["venue"]),
396
+ "doi": row["doi"],
397
+ "arxiv_id": row["arxiv_id"],
398
+ "openreview_id": row["openreview_id"],
399
+ "paper_pw_url": row["paper_pw_url"],
400
+ "preferred_summary_template": row["preferred_summary_template"],
401
+ "available_summary_templates": available,
402
+ }
403
+ finally:
404
+ conn.close()
405
+
406
+
407
+ @mcp.tool()
408
+ def get_paper_summary(paper_id: str, template: str | None = None, max_chars: int | None = None) -> str:
409
+ """Get summary JSON as raw string.
410
+
411
+ Uses preferred template if template is not specified.
412
+ Returns the full JSON content (not a URL).
413
+ """
414
+ cfg = _get_config()
415
+ paper_id = _validate_paper_id(paper_id, cfg)
416
+ max_chars = max_chars if max_chars is not None else cfg.max_chars_default
417
+
418
+ try:
419
+ payload, available = _load_summary_json(paper_id, template)
420
+ except RuntimeError as exc:
421
+ raise McpToolError(
422
+ "asset_fetch_failed",
423
+ "Failed to fetch summary asset",
424
+ paper_id=paper_id,
425
+ template=template,
426
+ detail=str(exc),
427
+ ) from exc
428
+
429
+ if payload is None:
430
+ raise McpToolError(
431
+ "template_not_available",
432
+ "Template not available",
433
+ paper_id=paper_id,
434
+ template=template,
435
+ available_summary_templates=available,
436
+ )
437
+
438
+ return _truncate(payload, max_chars)
439
+
440
+
441
+ @mcp.tool()
442
+ def get_paper_source(paper_id: str, max_chars: int | None = None) -> str:
443
+ """Get source markdown text.
444
+
445
+ Content may be large; use max_chars to limit size.
446
+ """
447
+ cfg = _get_config()
448
+ paper_id = _validate_paper_id(paper_id, cfg)
449
+ max_chars = max_chars if max_chars is not None else cfg.max_chars_default
450
+
451
+ try:
452
+ content = _load_source_markdown(paper_id)
453
+ except RuntimeError as exc:
454
+ raise McpToolError(
455
+ "asset_fetch_failed",
456
+ "Failed to fetch source asset",
457
+ paper_id=paper_id,
458
+ detail=str(exc),
459
+ ) from exc
460
+
461
+ if content is None:
462
+ raise McpToolError(
463
+ "source_not_available",
464
+ "Source markdown not available",
465
+ paper_id=paper_id
466
+ )
467
+
468
+ return _truncate(content, max_chars)
469
+
470
+
471
+ @mcp.tool()
472
+ def get_database_stats() -> dict[str, Any]:
473
+ """Get database statistics.
474
+
475
+ Returns totals, year/month distributions, and top facets
476
+ (authors, venues, keywords, institutions, tags).
477
+ """
478
+ cfg = _get_config()
479
+ conn = _open_ro_conn(cfg.snapshot_db)
480
+ try:
481
+ total_row = conn.execute("SELECT COUNT(*) AS c FROM paper").fetchone()
482
+ total = int(total_row["c"]) if total_row else 0
483
+
484
+ def top(table: str, limit: int = 20) -> list[dict[str, Any]]:
485
+ rows = conn.execute(
486
+ f"SELECT value, paper_count FROM {table} ORDER BY paper_count DESC, value ASC LIMIT ?",
487
+ (limit,),
488
+ ).fetchall()
489
+ return [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in rows]
490
+
491
+ years = conn.execute(
492
+ """
493
+ SELECT year AS value, paper_count
494
+ FROM year_count
495
+ ORDER BY CASE WHEN year GLOB '[0-9][0-9][0-9][0-9]' THEN 0 ELSE 1 END,
496
+ CAST(year AS INT) DESC, year ASC
497
+ LIMIT 50
498
+ """,
499
+ ).fetchall()
500
+ months = conn.execute(
501
+ """
502
+ SELECT month AS value, paper_count
503
+ FROM month_count
504
+ ORDER BY CASE WHEN month GLOB '[0-1][0-9]' THEN 0 ELSE 1 END,
505
+ CAST(month AS INT) ASC, month ASC
506
+ """,
507
+ ).fetchall()
508
+
509
+ return {
510
+ "total": total,
511
+ "years": [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in years],
512
+ "months": [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in months],
513
+ "authors": top("author"),
514
+ "venues": top("venue"),
515
+ "institutions": top("institution"),
516
+ "keywords": top("keyword"),
517
+ "tags": top("tag"),
518
+ }
519
+ finally:
520
+ conn.close()
521
+
522
+
523
+ @mcp.tool()
524
+ def list_top_facets(category: str, limit: int = 20) -> list[dict[str, Any]]:
525
+ """List top facet values.
526
+
527
+ Category: author | venue | keyword | institution | tag
528
+ """
529
+ table_map = {
530
+ "author": "author",
531
+ "venue": "venue",
532
+ "keyword": "keyword",
533
+ "institution": "institution",
534
+ "tag": "tag",
535
+ }
536
+ table = table_map.get((category or "").strip().lower())
537
+ if not table:
538
+ raise McpToolError(
539
+ "invalid_category",
540
+ f"Invalid category: {category}. Must be one of: {', '.join(table_map.keys())}",
541
+ category=category
542
+ )
543
+
544
+ limit = max(1, int(limit))
545
+ cfg = _get_config()
546
+ conn = _open_ro_conn(cfg.snapshot_db)
547
+ try:
548
+ rows = conn.execute(
549
+ f"SELECT value, paper_count FROM {table} ORDER BY paper_count DESC, value ASC LIMIT ?",
550
+ (limit,),
551
+ ).fetchall()
552
+ return [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in rows]
553
+ finally:
554
+ conn.close()
555
+
556
+
557
+ @mcp.tool()
558
+ def filter_papers(
559
+ author: str | None = None,
560
+ venue: str | None = None,
561
+ year: str | None = None,
562
+ keyword: str | None = None,
563
+ tag: str | None = None,
564
+ limit: int = 10,
565
+ ) -> list[dict[str, Any]]:
566
+ """Filter papers by structured fields.
567
+
568
+ Use for precise filtering by author, venue, year, keyword, or tag.
569
+ """
570
+ cfg = _get_config()
571
+ limit = min(max(1, int(limit)), cfg.limits.max_page_size)
572
+
573
+ query = "SELECT DISTINCT p.paper_id, p.title, p.year, p.venue FROM paper p"
574
+ joins: list[str] = []
575
+ conditions: list[str] = []
576
+ params: list[Any] = []
577
+
578
+ if author:
579
+ joins.append("JOIN paper_author pa ON pa.paper_id = p.paper_id")
580
+ joins.append("JOIN author a ON a.author_id = pa.author_id")
581
+ conditions.append("a.value LIKE ?")
582
+ params.append(f"%{author}%")
583
+ if keyword:
584
+ joins.append("JOIN paper_keyword pk ON pk.paper_id = p.paper_id")
585
+ joins.append("JOIN keyword k ON k.keyword_id = pk.keyword_id")
586
+ conditions.append("k.value LIKE ?")
587
+ params.append(f"%{keyword}%")
588
+ if tag:
589
+ joins.append("JOIN paper_tag pt ON pt.paper_id = p.paper_id")
590
+ joins.append("JOIN tag t ON t.tag_id = pt.tag_id")
591
+ conditions.append("t.value LIKE ?")
592
+ params.append(f"%{tag}%")
593
+ if venue:
594
+ conditions.append("p.venue LIKE ?")
595
+ params.append(f"%{venue}%")
596
+ if year:
597
+ conditions.append("p.year = ?")
598
+ params.append(str(year))
599
+
600
+ if joins:
601
+ query += " " + " ".join(joins)
602
+ if conditions:
603
+ query += " WHERE " + " AND ".join(conditions)
604
+ query += " ORDER BY p.year DESC, p.title ASC LIMIT ?"
605
+ params.append(limit)
606
+
607
+ conn = _open_ro_conn(cfg.snapshot_db)
608
+ try:
609
+ rows = conn.execute(query, tuple(params)).fetchall()
610
+ return [
611
+ {
612
+ "paper_id": str(row["paper_id"]),
613
+ "title": str(row["title"]),
614
+ "year": str(row["year"]),
615
+ "venue": str(row["venue"]),
616
+ }
617
+ for row in rows
618
+ ]
619
+ finally:
620
+ conn.close()
621
+
622
+
623
+ # ==================== MCP Resources ====================
624
+
625
+ @mcp.resource("paper://{paper_id}/metadata")
626
+ def resource_metadata(paper_id: str) -> str:
627
+ """Resource: metadata as JSON string."""
628
+ payload = get_paper_metadata(paper_id)
629
+ return json.dumps(payload, ensure_ascii=False)
630
+
631
+
632
+ @mcp.resource("paper://{paper_id}/summary")
633
+ def resource_summary_default(paper_id: str) -> str:
634
+ """Resource: preferred summary JSON string."""
635
+ payload = get_paper_summary(paper_id)
636
+ return payload # Already a JSON string
637
+
638
+
639
+ @mcp.resource("paper://{paper_id}/summary/{template}")
640
+ def resource_summary_template(paper_id: str, template: str) -> str:
641
+ """Resource: summary JSON string for a specific template."""
642
+ payload = get_paper_summary(paper_id, template=template)
643
+ return payload # Already a JSON string
644
+
645
+
646
+ @mcp.resource("paper://{paper_id}/source")
647
+ def resource_source(paper_id: str) -> str:
648
+ """Resource: source markdown text."""
649
+ payload = get_paper_source(paper_id)
650
+ return payload
651
+
652
+
653
+ @mcp.resource("paper://{paper_id}/translation/{lang}")
654
+ def resource_translation(paper_id: str, lang: str) -> str:
655
+ """Resource: translated markdown text."""
656
+ cfg = _get_config()
657
+ paper_id = _validate_paper_id(paper_id, cfg)
658
+
659
+ try:
660
+ content = _load_translation_markdown(paper_id, lang.lower())
661
+ except RuntimeError as exc:
662
+ raise McpToolError(
663
+ "asset_fetch_failed",
664
+ "Failed to fetch translation asset",
665
+ paper_id=paper_id,
666
+ lang=lang,
667
+ detail=str(exc),
668
+ ) from exc
669
+
670
+ if content is None:
671
+ raise McpToolError(
672
+ "translation_not_available",
673
+ "Translation not available",
674
+ paper_id=paper_id,
675
+ lang=lang,
676
+ )
677
+
678
+ return _truncate(content, cfg.max_chars_default)
679
+
680
+
681
+ def resolve_static_export_dir() -> Path | None:
682
+ """Resolve static export directory from environment variable."""
683
+ value = os.getenv("PAPER_DB_STATIC_EXPORT_DIR")
684
+ if not value:
685
+ return None
686
+ return Path(value)