deepresearch-flow 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -424,6 +424,87 @@ def register_db_commands(db_group: click.Group) -> None:
424
424
  click.echo(f"Wrote snapshot DB: {opts.output_db}")
425
425
  click.echo(f"Wrote static export: {opts.static_export_dir}")
426
426
 
427
+ @snapshot_group.group("unpack")
428
+ def snapshot_unpack_group() -> None:
429
+ """Unpack snapshot artifacts."""
430
+
431
+ @snapshot_unpack_group.command("md")
432
+ @click.option("--snapshot-db", "snapshot_db", required=True, help="Path to snapshot database")
433
+ @click.option(
434
+ "--static-export-dir",
435
+ "static_export_dir",
436
+ required=True,
437
+ help="Path to static export directory",
438
+ )
439
+ @click.option(
440
+ "--pdf-root",
441
+ "pdf_roots",
442
+ multiple=True,
443
+ required=True,
444
+ help="PDF root directories for name alignment (repeatable)",
445
+ )
446
+ @click.option("--md-output-dir", "md_output_dir", required=True, help="Output directory for Markdown")
447
+ @click.option(
448
+ "--md-translated-output-dir",
449
+ "md_translated_output_dir",
450
+ required=True,
451
+ help="Output directory for translated Markdown",
452
+ )
453
+ def snapshot_unpack_md(
454
+ snapshot_db: str,
455
+ static_export_dir: str,
456
+ pdf_roots: tuple[str, ...],
457
+ md_output_dir: str,
458
+ md_translated_output_dir: str,
459
+ ) -> None:
460
+ """Unpack source/translated markdown and align filenames to PDFs."""
461
+ from deepresearch_flow.paper.snapshot.unpacker import SnapshotUnpackMdOptions, unpack_md
462
+
463
+ opts = SnapshotUnpackMdOptions(
464
+ snapshot_db=Path(snapshot_db),
465
+ static_export_dir=Path(static_export_dir),
466
+ pdf_roots=[Path(path) for path in pdf_roots],
467
+ md_output_dir=Path(md_output_dir),
468
+ md_translated_output_dir=Path(md_translated_output_dir),
469
+ )
470
+ unpack_md(opts)
471
+
472
+ @snapshot_unpack_group.command("info")
473
+ @click.option("--snapshot-db", "snapshot_db", required=True, help="Path to snapshot database")
474
+ @click.option(
475
+ "--static-export-dir",
476
+ "static_export_dir",
477
+ required=True,
478
+ help="Path to static export directory",
479
+ )
480
+ @click.option(
481
+ "--pdf-root",
482
+ "pdf_roots",
483
+ multiple=True,
484
+ required=True,
485
+ help="PDF root directories for name alignment (repeatable)",
486
+ )
487
+ @click.option("--template", "template", required=True, help="Summary template tag")
488
+ @click.option("--output-json", "output_json", required=True, help="Output JSON file path")
489
+ def snapshot_unpack_info(
490
+ snapshot_db: str,
491
+ static_export_dir: str,
492
+ pdf_roots: tuple[str, ...],
493
+ template: str,
494
+ output_json: str,
495
+ ) -> None:
496
+ """Unpack aggregated paper_infos.json from snapshot summaries."""
497
+ from deepresearch_flow.paper.snapshot.unpacker import SnapshotUnpackInfoOptions, unpack_info
498
+
499
+ opts = SnapshotUnpackInfoOptions(
500
+ snapshot_db=Path(snapshot_db),
501
+ static_export_dir=Path(static_export_dir),
502
+ pdf_roots=[Path(path) for path in pdf_roots],
503
+ template=template,
504
+ output_json=Path(output_json),
505
+ )
506
+ unpack_info(opts)
507
+
427
508
  @db_group.group("api")
428
509
  def api_group() -> None:
429
510
  """Read-only JSON API server backed by a snapshot DB."""
@@ -11,8 +11,9 @@ from starlette.applications import Starlette
11
11
  from starlette.middleware.cors import CORSMiddleware
12
12
  from starlette.requests import Request
13
13
  from starlette.responses import JSONResponse, Response
14
- from starlette.routing import Route
14
+ from starlette.routing import Mount, Route
15
15
 
16
+ from deepresearch_flow.paper.snapshot.common import ApiLimits, _open_ro_conn
16
17
  from deepresearch_flow.paper.snapshot.text import merge_adjacent_markers, remove_cjk_spaces, rewrite_search_query
17
18
 
18
19
  _WHITESPACE_RE = re.compile(r"\s+")
@@ -87,13 +88,6 @@ _FACET_TYPE_TO_KEY = {
87
88
  }
88
89
 
89
90
 
90
- @dataclass(frozen=True)
91
- class ApiLimits:
92
- max_query_length: int = 500
93
- max_page_size: int = 100
94
- max_pagination_offset: int = 10_000 # page * page_size
95
-
96
-
97
91
  @dataclass(frozen=True)
98
92
  class SnapshotApiConfig:
99
93
  snapshot_db: Path
@@ -110,12 +104,6 @@ def _json_error(status_code: int, *, error: str, detail: str) -> JSONResponse:
110
104
  return JSONResponse({"error": error, "detail": detail}, status_code=status_code)
111
105
 
112
106
 
113
- def _open_ro_conn(db_path: Path) -> sqlite3.Connection:
114
- uri = f"file:{db_path.as_posix()}?mode=ro"
115
- conn = sqlite3.connect(uri, uri=True)
116
- conn.row_factory = sqlite3.Row
117
- conn.execute("PRAGMA query_only=ON;")
118
- return conn
119
107
 
120
108
 
121
109
  def _snapshot_build_id(conn: sqlite3.Connection) -> str:
@@ -917,6 +905,22 @@ def create_app(
917
905
  limits=limits or ApiLimits(),
918
906
  )
919
907
 
908
+ # Lazy import to avoid circular dependency
909
+ from deepresearch_flow.paper.snapshot.mcp_server import (
910
+ McpSnapshotConfig,
911
+ create_mcp_app,
912
+ resolve_static_export_dir,
913
+ )
914
+
915
+ mcp_config = McpSnapshotConfig(
916
+ snapshot_db=snapshot_db,
917
+ static_base_url=_normalize_base_url(static_base_url),
918
+ static_export_dir=resolve_static_export_dir(),
919
+ limits=limits or ApiLimits(),
920
+ origin_allowlist=cors_allowed_origins or ["*"],
921
+ )
922
+ mcp_app, mcp_lifespan = create_mcp_app(mcp_config)
923
+
920
924
  routes = [
921
925
  Route("/api/v1/config", _api_config, methods=["GET"]),
922
926
  Route("/api/v1/search", _api_search, methods=["GET"]),
@@ -927,9 +931,15 @@ def create_app(
927
931
  Route("/api/v1/facets/{facet:str}/{facet_id:str}/stats", _api_facet_stats, methods=["GET"]),
928
932
  Route("/api/v1/facets/{facet:str}/by-value/{value:str}/papers", _api_facet_by_value_papers, methods=["GET"]),
929
933
  Route("/api/v1/facets/{facet:str}/by-value/{value:str}/stats", _api_facet_by_value_stats, methods=["GET"]),
934
+ Mount("/mcp", app=mcp_app),
930
935
  ]
931
936
 
932
- app = Starlette(routes=routes)
937
+ # Pass MCP lifespan to ensure session manager initializes properly
938
+ # https://gofastmcp.com/deployment/http#mounting-in-starlette
939
+ app = Starlette(
940
+ routes=routes,
941
+ lifespan=mcp_lifespan,
942
+ )
933
943
  if cfg.cors_allowed_origins:
934
944
  app.add_middleware(
935
945
  CORSMiddleware,
@@ -0,0 +1,34 @@
1
+ """Shared utilities for snapshot API and MCP server.
2
+
3
+ This module contains common types, configuration, and utilities used by both
4
+ the snapshot REST API and the MCP server to avoid circular imports.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ import sqlite3
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ApiLimits:
16
+ """API rate and size limits."""
17
+
18
+ max_query_length: int = 500
19
+ max_page_size: int = 100
20
+ max_pagination_offset: int = 10_000
21
+
22
+
23
+ def _open_ro_conn(db_path: Path) -> sqlite3.Connection:
24
+ """Open a read-only SQLite connection with Row factory.
25
+
26
+ Args:
27
+ db_path: Path to the SQLite database file.
28
+
29
+ Returns:
30
+ sqlite3.Connection: A read-only connection with row_factory set to Row.
31
+ """
32
+ conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, check_same_thread=False)
33
+ conn.row_factory = sqlite3.Row
34
+ return conn
@@ -0,0 +1,686 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+ import re
8
+ from typing import Any
9
+
10
+ import httpx
11
+ from starlette.applications import Starlette
12
+ from starlette.middleware import Middleware
13
+ from starlette.middleware.base import BaseHTTPMiddleware
14
+ from starlette.requests import Request
15
+ from starlette.responses import Response
16
+ from starlette.routing import Mount
17
+
18
+ from fastmcp import FastMCP
19
+
20
+ from deepresearch_flow.paper.snapshot.common import ApiLimits, _open_ro_conn
21
+ from deepresearch_flow.paper.snapshot.text import merge_adjacent_markers, remove_cjk_spaces, rewrite_search_query
22
+
23
+ _SUPPORTED_PROTOCOL_VERSIONS = {"2025-03-26", "2025-06-18"}
24
+ _DEFAULT_MAX_CHARS = 50_000
25
+ _DEFAULT_TIMEOUT = 10.0
26
+ _PAPER_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]+$')
27
+
28
+
29
+ class McpToolError(Exception):
30
+ """MCP tool exception for standardized error handling.
31
+
32
+ FastMCP will catch this exception and convert it to a proper
33
+ JSON-RPC error response that the client can understand.
34
+ """
35
+
36
+ def __init__(self, code: str, message: str, **details):
37
+ self.code = code
38
+ self.message = message
39
+ self.details = details
40
+ super().__init__(message)
41
+
42
+ def to_dict(self) -> dict[str, Any]:
43
+ """Convert to error dictionary format."""
44
+ return {"error": self.code, "message": self.message, **self.details}
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class McpSnapshotConfig:
49
+ snapshot_db: Path
50
+ static_base_url: str
51
+ static_export_dir: Path | None
52
+ limits: ApiLimits
53
+ origin_allowlist: list[str]
54
+ max_chars_default: int = _DEFAULT_MAX_CHARS
55
+ http_timeout: float = _DEFAULT_TIMEOUT
56
+ max_paper_id_length: int = 64
57
+ # HTTP client stored in object __dict__ to avoid dataclass frozen restriction
58
+ _http_client: httpx.Client | None = field(default=None, repr=False, compare=False)
59
+
60
+ def get_http_client(self) -> httpx.Client:
61
+ """Get or create a shared HTTP client with connection pooling."""
62
+ if self._http_client is None:
63
+ object.__setattr__(
64
+ self,
65
+ '_http_client',
66
+ httpx.Client(
67
+ timeout=self.http_timeout,
68
+ follow_redirects=True,
69
+ limits=httpx.Limits(
70
+ max_keepalive_connections=10,
71
+ max_connections=20
72
+ )
73
+ )
74
+ )
75
+ return self._http_client
76
+
77
+
78
+ class McpRequestGuardMiddleware(BaseHTTPMiddleware):
79
+ def __init__(self, app, *, origin_allowlist: list[str]) -> None:
80
+ super().__init__(app)
81
+ self._allowlist = [origin.lower() for origin in origin_allowlist]
82
+
83
+ async def dispatch(self, request: Request, call_next): # type: ignore[override]
84
+ if request.method == "GET":
85
+ return Response("Method Not Allowed", status_code=405)
86
+ if request.method not in {"POST", "OPTIONS"}:
87
+ return Response("Method Not Allowed", status_code=405)
88
+ origin = request.headers.get("origin")
89
+ if origin and not self._is_allowed_origin(origin):
90
+ return Response("Forbidden", status_code=403)
91
+ protocol = request.headers.get("mcp-protocol-version")
92
+ if protocol and protocol not in _SUPPORTED_PROTOCOL_VERSIONS:
93
+ return Response("Bad Request", status_code=400)
94
+ return await call_next(request)
95
+
96
+ def _is_allowed_origin(self, origin: str) -> bool:
97
+ if not self._allowlist or "*" in self._allowlist:
98
+ return True
99
+ return origin.lower() in self._allowlist
100
+
101
+
102
+ _CONFIG: McpSnapshotConfig | None = None
103
+ mcp = FastMCP("Paper DB MCP")
104
+
105
+
106
+ def configure(config: McpSnapshotConfig) -> None:
107
+ global _CONFIG
108
+ _CONFIG = config
109
+
110
+
111
+ def create_mcp_app(config: McpSnapshotConfig) -> tuple[Starlette, Any]:
112
+ """Create MCP app with middleware and return it along with lifespan.
113
+
114
+ Returns:
115
+ Tuple of (wrapped_app, lifespan_context) for use by parent Starlette.
116
+ """
117
+ configure(config)
118
+ mcp_app = mcp.http_app(path="/", stateless_http=True)
119
+ wrapped = Starlette(
120
+ routes=[Mount("/", app=mcp_app)],
121
+ middleware=[
122
+ Middleware(McpRequestGuardMiddleware, origin_allowlist=config.origin_allowlist),
123
+ ],
124
+ )
125
+ return wrapped, mcp_app.lifespan
126
+
127
+
128
+ def _get_config() -> McpSnapshotConfig:
129
+ if _CONFIG is None:
130
+ raise RuntimeError("MCP server not configured")
131
+ return _CONFIG
132
+
133
+
134
+ def _validate_query(query: str, cfg: McpSnapshotConfig) -> str:
135
+ """Validate search query string.
136
+
137
+ Raises:
138
+ McpToolError: If query is invalid or too long.
139
+ """
140
+ if not query or not query.strip():
141
+ raise McpToolError("invalid_query", "Query cannot be empty")
142
+ if len(query) > cfg.limits.max_query_length:
143
+ raise McpToolError(
144
+ "query_too_long",
145
+ f"Query exceeds maximum length of {cfg.limits.max_query_length}",
146
+ length=len(query),
147
+ max_length=cfg.limits.max_query_length
148
+ )
149
+ return query.strip()
150
+
151
+
152
+ def _validate_paper_id(paper_id: str, cfg: McpSnapshotConfig) -> str:
153
+ """Validate paper ID format.
154
+
155
+ Raises:
156
+ McpToolError: If paper_id is invalid.
157
+ """
158
+ if not paper_id:
159
+ raise McpToolError("invalid_paper_id", "Paper ID cannot be empty")
160
+ if len(paper_id) > cfg.max_paper_id_length:
161
+ raise McpToolError(
162
+ "paper_id_too_long",
163
+ f"Paper ID exceeds maximum length of {cfg.max_paper_id_length}",
164
+ length=len(paper_id),
165
+ max_length=cfg.max_paper_id_length
166
+ )
167
+ if not _PAPER_ID_PATTERN.match(paper_id):
168
+ raise McpToolError(
169
+ "invalid_paper_id_format",
170
+ "Paper ID must contain only alphanumeric characters, hyphens, and underscores",
171
+ paper_id=paper_id
172
+ )
173
+ return paper_id
174
+
175
+
176
+ def _truncate(text: str, max_chars: int | None) -> str:
177
+ """Truncate text with marker."""
178
+ if max_chars is None or max_chars <= 0:
179
+ return text
180
+ if len(text) <= max_chars:
181
+ return text
182
+ remaining = len(text) - max_chars
183
+ return f"{text[:max_chars]}\n[truncated: {remaining} more chars]"
184
+
185
+
186
+ def _read_static_text(rel_path: str) -> str | None:
187
+ """Read static text from local export directory if available."""
188
+ cfg = _get_config()
189
+ if cfg.static_export_dir:
190
+ path = cfg.static_export_dir / rel_path
191
+ if path.exists():
192
+ return path.read_text(encoding="utf-8")
193
+ return None
194
+
195
+
196
+ def _fetch_static_text(rel_path: str) -> str:
197
+ """Fetch static text from HTTP remote."""
198
+ cfg = _get_config()
199
+ if cfg.static_base_url:
200
+ base = cfg.static_base_url.rstrip("/")
201
+ url = f"{base}/{rel_path.lstrip('/')}"
202
+ client = cfg.get_http_client()
203
+ response = client.get(url)
204
+ response.raise_for_status()
205
+ return response.text
206
+ raise FileNotFoundError("static_base_url not configured")
207
+
208
+
209
+ def _load_static_text(rel_path: str) -> str:
210
+ """Load static text with fallback: local first, then HTTP."""
211
+ try:
212
+ text = _read_static_text(rel_path)
213
+ if text is not None:
214
+ return text
215
+ return _fetch_static_text(rel_path)
216
+ except httpx.HTTPStatusError as exc:
217
+ raise RuntimeError(f"asset_fetch_failed:{exc.response.status_code}") from exc
218
+ except httpx.RequestError as exc:
219
+ raise RuntimeError("asset_fetch_failed:request_error") from exc
220
+ except FileNotFoundError as exc:
221
+ raise RuntimeError("asset_fetch_failed:not_configured") from exc
222
+
223
+
224
+ def _load_summary_json(paper_id: str, template: str | None) -> tuple[str | None, list[str] | None]:
225
+ """Load summary JSON content and return available templates list."""
226
+ cfg = _get_config()
227
+ conn = _open_ro_conn(cfg.snapshot_db)
228
+ try:
229
+ row = conn.execute(
230
+ "SELECT preferred_summary_template, summary_asset_paths_json FROM paper WHERE paper_id = ?",
231
+ (paper_id,),
232
+ ).fetchone()
233
+ if not row:
234
+ return None, None
235
+ preferred = row["preferred_summary_template"]
236
+ asset_paths = json.loads(row["summary_asset_paths_json"] or "{}")
237
+ available = sorted(asset_paths.keys())
238
+ selected = template if template else preferred
239
+ if not selected or selected not in asset_paths:
240
+ return None, available
241
+ rel_path = asset_paths[selected]
242
+ return _load_static_text(rel_path), available
243
+ finally:
244
+ conn.close()
245
+
246
+
247
+ def _load_source_markdown(paper_id: str) -> str | None:
248
+ """Load source markdown for paper."""
249
+ cfg = _get_config()
250
+ conn = _open_ro_conn(cfg.snapshot_db)
251
+ try:
252
+ row = conn.execute(
253
+ "SELECT source_md_content_hash FROM paper WHERE paper_id = ?",
254
+ (paper_id,),
255
+ ).fetchone()
256
+ if not row or not row["source_md_content_hash"]:
257
+ return None
258
+ rel_path = f"md/{row['source_md_content_hash']}.md"
259
+ return _load_static_text(rel_path)
260
+ finally:
261
+ conn.close()
262
+
263
+
264
+ def _load_translation_markdown(paper_id: str, lang: str) -> str | None:
265
+ """Load translation markdown for paper and language."""
266
+ cfg = _get_config()
267
+ conn = _open_ro_conn(cfg.snapshot_db)
268
+ try:
269
+ row = conn.execute(
270
+ "SELECT translations_json FROM paper WHERE paper_id = ?",
271
+ (paper_id,),
272
+ ).fetchone()
273
+ if not row or not row["translations_json"]:
274
+ return None
275
+ translations = json.loads(row["translations_json"])
276
+ rel_path = translations.get(lang)
277
+ if not rel_path:
278
+ return None
279
+ return _load_static_text(rel_path)
280
+ finally:
281
+ conn.close()
282
+
283
+
284
+ # ==================== MCP Tools ====================
285
+
286
+ @mcp.tool()
287
+ def search_papers(query: str, limit: int = 10) -> list[dict[str, Any]]:
288
+ """Full-text search for papers (relevance-ranked).
289
+
290
+ Use when you only have topic keywords.
291
+ Returns paper_id, title, year, venue, snippet_markdown.
292
+ """
293
+ cfg = _get_config()
294
+ query = _validate_query(query, cfg)
295
+ limit = min(max(1, int(limit)), cfg.limits.max_page_size)
296
+
297
+ conn = _open_ro_conn(cfg.snapshot_db)
298
+ try:
299
+ cur = conn.execute(
300
+ """
301
+ SELECT paper_id, title, year, venue, abstract
302
+ FROM paper_search
303
+ WHERE paper_search MATCH ?
304
+ ORDER BY rank
305
+ LIMIT ?
306
+ """,
307
+ (rewrite_search_query(query), limit),
308
+ )
309
+ rows = cur.fetchall()
310
+ results: list[dict[str, Any]] = []
311
+ for row in rows:
312
+ snippet = str(row["abstract"] or "")
313
+ snippet = remove_cjk_spaces(snippet)
314
+ snippet, markers = merge_adjacent_markers(snippet)
315
+ results.append({
316
+ "paper_id": str(row["paper_id"]),
317
+ "title": str(row["title"]),
318
+ "year": str(row["year"]),
319
+ "venue": str(row["venue"]),
320
+ "snippet_markdown": snippet,
321
+ })
322
+ return results
323
+ finally:
324
+ conn.close()
325
+
326
+
327
+ @mcp.tool()
328
+ def search_papers_by_keyword(keyword: str, limit: int = 10) -> list[dict[str, Any]]:
329
+ """Search papers by keyword/tag (exact match).
330
+
331
+ Use when you know specific keywords or tags.
332
+ """
333
+ cfg = _get_config()
334
+ limit = min(max(1, int(limit)), cfg.limits.max_page_size)
335
+
336
+ conn = _open_ro_conn(cfg.snapshot_db)
337
+ try:
338
+ rows = conn.execute(
339
+ """
340
+ SELECT DISTINCT p.paper_id, p.title, p.year, p.venue, p.abstract
341
+ FROM paper p
342
+ JOIN paper_keyword pk ON pk.paper_id = p.paper_id
343
+ JOIN keyword k ON k.keyword_id = pk.keyword_id
344
+ WHERE k.value LIKE ?
345
+ ORDER BY p.year DESC, p.title ASC
346
+ LIMIT ?
347
+ """,
348
+ (f"%{keyword}%", limit),
349
+ ).fetchall()
350
+ results: list[dict[str, Any]] = []
351
+ for row in rows:
352
+ snippet = str(row["abstract"] or "")
353
+ snippet = remove_cjk_spaces(snippet)
354
+ snippet, markers = merge_adjacent_markers(snippet)
355
+ results.append({
356
+ "paper_id": str(row["paper_id"]),
357
+ "title": str(row["title"]),
358
+ "year": str(row["year"]),
359
+ "venue": str(row["venue"]),
360
+ "snippet_markdown": snippet,
361
+ })
362
+ return results
363
+ finally:
364
+ conn.close()
365
+
366
+
367
+ @mcp.tool()
368
+ def get_paper_metadata(paper_id: str) -> dict[str, Any]:
369
+ """Get paper metadata and available summary templates.
370
+
371
+ Call this first before requesting a summary to discover available templates.
372
+ """
373
+ cfg = _get_config()
374
+ paper_id = _validate_paper_id(paper_id, cfg)
375
+
376
+ conn = _open_ro_conn(cfg.snapshot_db)
377
+ try:
378
+ row = conn.execute(
379
+ """
380
+ SELECT paper_id, title, year, venue, doi, arxiv_id, openreview_id, paper_pw_url,
381
+ preferred_summary_template, summary_asset_paths_json
382
+ FROM paper WHERE paper_id = ?
383
+ """,
384
+ (paper_id,),
385
+ ).fetchone()
386
+ if not row:
387
+ raise McpToolError("not_found", "paper not found", paper_id=paper_id)
388
+
389
+ asset_paths = json.loads(row["summary_asset_paths_json"] or "{}")
390
+ available = sorted(asset_paths.keys())
391
+ return {
392
+ "paper_id": str(row["paper_id"]),
393
+ "title": str(row["title"]),
394
+ "year": str(row["year"]),
395
+ "venue": str(row["venue"]),
396
+ "doi": row["doi"],
397
+ "arxiv_id": row["arxiv_id"],
398
+ "openreview_id": row["openreview_id"],
399
+ "paper_pw_url": row["paper_pw_url"],
400
+ "preferred_summary_template": row["preferred_summary_template"],
401
+ "available_summary_templates": available,
402
+ }
403
+ finally:
404
+ conn.close()
405
+
406
+
407
+ @mcp.tool()
408
+ def get_paper_summary(paper_id: str, template: str | None = None, max_chars: int | None = None) -> str:
409
+ """Get summary JSON as raw string.
410
+
411
+ Uses preferred template if template is not specified.
412
+ Returns the full JSON content (not a URL).
413
+ """
414
+ cfg = _get_config()
415
+ paper_id = _validate_paper_id(paper_id, cfg)
416
+ max_chars = max_chars if max_chars is not None else cfg.max_chars_default
417
+
418
+ try:
419
+ payload, available = _load_summary_json(paper_id, template)
420
+ except RuntimeError as exc:
421
+ raise McpToolError(
422
+ "asset_fetch_failed",
423
+ "Failed to fetch summary asset",
424
+ paper_id=paper_id,
425
+ template=template,
426
+ detail=str(exc),
427
+ ) from exc
428
+
429
+ if payload is None:
430
+ raise McpToolError(
431
+ "template_not_available",
432
+ "Template not available",
433
+ paper_id=paper_id,
434
+ template=template,
435
+ available_summary_templates=available,
436
+ )
437
+
438
+ return _truncate(payload, max_chars)
439
+
440
+
441
+ @mcp.tool()
442
+ def get_paper_source(paper_id: str, max_chars: int | None = None) -> str:
443
+ """Get source markdown text.
444
+
445
+ Content may be large; use max_chars to limit size.
446
+ """
447
+ cfg = _get_config()
448
+ paper_id = _validate_paper_id(paper_id, cfg)
449
+ max_chars = max_chars if max_chars is not None else cfg.max_chars_default
450
+
451
+ try:
452
+ content = _load_source_markdown(paper_id)
453
+ except RuntimeError as exc:
454
+ raise McpToolError(
455
+ "asset_fetch_failed",
456
+ "Failed to fetch source asset",
457
+ paper_id=paper_id,
458
+ detail=str(exc),
459
+ ) from exc
460
+
461
+ if content is None:
462
+ raise McpToolError(
463
+ "source_not_available",
464
+ "Source markdown not available",
465
+ paper_id=paper_id
466
+ )
467
+
468
+ return _truncate(content, max_chars)
469
+
470
+
471
+ @mcp.tool()
472
+ def get_database_stats() -> dict[str, Any]:
473
+ """Get database statistics.
474
+
475
+ Returns totals, year/month distributions, and top facets
476
+ (authors, venues, keywords, institutions, tags).
477
+ """
478
+ cfg = _get_config()
479
+ conn = _open_ro_conn(cfg.snapshot_db)
480
+ try:
481
+ total_row = conn.execute("SELECT COUNT(*) AS c FROM paper").fetchone()
482
+ total = int(total_row["c"]) if total_row else 0
483
+
484
+ def top(table: str, limit: int = 20) -> list[dict[str, Any]]:
485
+ rows = conn.execute(
486
+ f"SELECT value, paper_count FROM {table} ORDER BY paper_count DESC, value ASC LIMIT ?",
487
+ (limit,),
488
+ ).fetchall()
489
+ return [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in rows]
490
+
491
+ years = conn.execute(
492
+ """
493
+ SELECT year AS value, paper_count
494
+ FROM year_count
495
+ ORDER BY CASE WHEN year GLOB '[0-9][0-9][0-9][0-9]' THEN 0 ELSE 1 END,
496
+ CAST(year AS INT) DESC, year ASC
497
+ LIMIT 50
498
+ """,
499
+ ).fetchall()
500
+ months = conn.execute(
501
+ """
502
+ SELECT month AS value, paper_count
503
+ FROM month_count
504
+ ORDER BY CASE WHEN month GLOB '[0-1][0-9]' THEN 0 ELSE 1 END,
505
+ CAST(month AS INT) ASC, month ASC
506
+ """,
507
+ ).fetchall()
508
+
509
+ return {
510
+ "total": total,
511
+ "years": [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in years],
512
+ "months": [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in months],
513
+ "authors": top("author"),
514
+ "venues": top("venue"),
515
+ "institutions": top("institution"),
516
+ "keywords": top("keyword"),
517
+ "tags": top("tag"),
518
+ }
519
+ finally:
520
+ conn.close()
521
+
522
+
523
+ @mcp.tool()
524
+ def list_top_facets(category: str, limit: int = 20) -> list[dict[str, Any]]:
525
+ """List top facet values.
526
+
527
+ Category: author | venue | keyword | institution | tag
528
+ """
529
+ table_map = {
530
+ "author": "author",
531
+ "venue": "venue",
532
+ "keyword": "keyword",
533
+ "institution": "institution",
534
+ "tag": "tag",
535
+ }
536
+ table = table_map.get((category or "").strip().lower())
537
+ if not table:
538
+ raise McpToolError(
539
+ "invalid_category",
540
+ f"Invalid category: {category}. Must be one of: {', '.join(table_map.keys())}",
541
+ category=category
542
+ )
543
+
544
+ limit = max(1, int(limit))
545
+ cfg = _get_config()
546
+ conn = _open_ro_conn(cfg.snapshot_db)
547
+ try:
548
+ rows = conn.execute(
549
+ f"SELECT value, paper_count FROM {table} ORDER BY paper_count DESC, value ASC LIMIT ?",
550
+ (limit,),
551
+ ).fetchall()
552
+ return [{"value": str(r["value"]), "paper_count": int(r["paper_count"])} for r in rows]
553
+ finally:
554
+ conn.close()
555
+
556
+
557
+ @mcp.tool()
558
+ def filter_papers(
559
+ author: str | None = None,
560
+ venue: str | None = None,
561
+ year: str | None = None,
562
+ keyword: str | None = None,
563
+ tag: str | None = None,
564
+ limit: int = 10,
565
+ ) -> list[dict[str, Any]]:
566
+ """Filter papers by structured fields.
567
+
568
+ Use for precise filtering by author, venue, year, keyword, or tag.
569
+ """
570
+ cfg = _get_config()
571
+ limit = min(max(1, int(limit)), cfg.limits.max_page_size)
572
+
573
+ query = "SELECT DISTINCT p.paper_id, p.title, p.year, p.venue FROM paper p"
574
+ joins: list[str] = []
575
+ conditions: list[str] = []
576
+ params: list[Any] = []
577
+
578
+ if author:
579
+ joins.append("JOIN paper_author pa ON pa.paper_id = p.paper_id")
580
+ joins.append("JOIN author a ON a.author_id = pa.author_id")
581
+ conditions.append("a.value LIKE ?")
582
+ params.append(f"%{author}%")
583
+ if keyword:
584
+ joins.append("JOIN paper_keyword pk ON pk.paper_id = p.paper_id")
585
+ joins.append("JOIN keyword k ON k.keyword_id = pk.keyword_id")
586
+ conditions.append("k.value LIKE ?")
587
+ params.append(f"%{keyword}%")
588
+ if tag:
589
+ joins.append("JOIN paper_tag pt ON pt.paper_id = p.paper_id")
590
+ joins.append("JOIN tag t ON t.tag_id = pt.tag_id")
591
+ conditions.append("t.value LIKE ?")
592
+ params.append(f"%{tag}%")
593
+ if venue:
594
+ conditions.append("p.venue LIKE ?")
595
+ params.append(f"%{venue}%")
596
+ if year:
597
+ conditions.append("p.year = ?")
598
+ params.append(str(year))
599
+
600
+ if joins:
601
+ query += " " + " ".join(joins)
602
+ if conditions:
603
+ query += " WHERE " + " AND ".join(conditions)
604
+ query += " ORDER BY p.year DESC, p.title ASC LIMIT ?"
605
+ params.append(limit)
606
+
607
+ conn = _open_ro_conn(cfg.snapshot_db)
608
+ try:
609
+ rows = conn.execute(query, tuple(params)).fetchall()
610
+ return [
611
+ {
612
+ "paper_id": str(row["paper_id"]),
613
+ "title": str(row["title"]),
614
+ "year": str(row["year"]),
615
+ "venue": str(row["venue"]),
616
+ }
617
+ for row in rows
618
+ ]
619
+ finally:
620
+ conn.close()
621
+
622
+
623
+ # ==================== MCP Resources ====================
624
+
625
+ @mcp.resource("paper://{paper_id}/metadata")
626
+ def resource_metadata(paper_id: str) -> str:
627
+ """Resource: metadata as JSON string."""
628
+ payload = get_paper_metadata(paper_id)
629
+ return json.dumps(payload, ensure_ascii=False)
630
+
631
+
632
+ @mcp.resource("paper://{paper_id}/summary")
633
+ def resource_summary_default(paper_id: str) -> str:
634
+ """Resource: preferred summary JSON string."""
635
+ payload = get_paper_summary(paper_id)
636
+ return payload # Already a JSON string
637
+
638
+
639
+ @mcp.resource("paper://{paper_id}/summary/{template}")
640
+ def resource_summary_template(paper_id: str, template: str) -> str:
641
+ """Resource: summary JSON string for a specific template."""
642
+ payload = get_paper_summary(paper_id, template=template)
643
+ return payload # Already a JSON string
644
+
645
+
646
+ @mcp.resource("paper://{paper_id}/source")
647
+ def resource_source(paper_id: str) -> str:
648
+ """Resource: source markdown text."""
649
+ payload = get_paper_source(paper_id)
650
+ return payload
651
+
652
+
653
+ @mcp.resource("paper://{paper_id}/translation/{lang}")
654
+ def resource_translation(paper_id: str, lang: str) -> str:
655
+ """Resource: translated markdown text."""
656
+ cfg = _get_config()
657
+ paper_id = _validate_paper_id(paper_id, cfg)
658
+
659
+ try:
660
+ content = _load_translation_markdown(paper_id, lang.lower())
661
+ except RuntimeError as exc:
662
+ raise McpToolError(
663
+ "asset_fetch_failed",
664
+ "Failed to fetch translation asset",
665
+ paper_id=paper_id,
666
+ lang=lang,
667
+ detail=str(exc),
668
+ ) from exc
669
+
670
+ if content is None:
671
+ raise McpToolError(
672
+ "translation_not_available",
673
+ "Translation not available",
674
+ paper_id=paper_id,
675
+ lang=lang,
676
+ )
677
+
678
+ return _truncate(content, cfg.max_chars_default)
679
+
680
+
681
+ def resolve_static_export_dir() -> Path | None:
682
+ """Resolve static export directory from environment variable."""
683
+ value = os.getenv("PAPER_DB_STATIC_EXPORT_DIR")
684
+ if not value:
685
+ return None
686
+ return Path(value)
@@ -0,0 +1,259 @@
1
+ """Unpack snapshot to recover original files with readable names.
2
+
3
+ This is the reverse operation of builder.build_snapshot().
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ import hashlib
10
+ import json
11
+ from pathlib import Path
12
+ import re
13
+ import sqlite3
14
+ from typing import Any, Iterable
15
+
16
+ from rich.console import Console
17
+ from rich.table import Table
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class SnapshotUnpackBaseOptions:
22
+ snapshot_db: Path
23
+ static_export_dir: Path
24
+ pdf_roots: list[Path]
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class SnapshotUnpackMdOptions(SnapshotUnpackBaseOptions):
29
+ md_output_dir: Path
30
+ md_translated_output_dir: Path
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class SnapshotUnpackInfoOptions(SnapshotUnpackBaseOptions):
35
+ template: str
36
+ output_json: Path
37
+
38
+
39
+ @dataclass
40
+ class UnpackCounts:
41
+ total: int = 0
42
+ succeeded: int = 0
43
+ failed: int = 0
44
+ missing_pdf: int = 0
45
+ translated_succeeded: int = 0
46
+ translated_failed: int = 0
47
+
48
+
49
+ def _sanitize_filename(title: str) -> str:
50
+ """Convert title to safe filename."""
51
+ sanitized = re.sub(r'[<>:"/\\|?*]', "_", title)
52
+ if len(sanitized) > 200:
53
+ sanitized = sanitized[:200]
54
+ sanitized = sanitized.strip()
55
+ if not sanitized:
56
+ sanitized = "untitled"
57
+ return sanitized
58
+
59
+
60
+ def _hash_file(path: Path) -> str:
61
+ digest = hashlib.sha256()
62
+ with path.open("rb") as handle:
63
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
64
+ digest.update(chunk)
65
+ return digest.hexdigest()
66
+
67
+
68
+ def _build_pdf_hash_index(pdf_roots: Iterable[Path]) -> dict[str, Path]:
69
+ index: dict[str, Path] = {}
70
+ for root in pdf_roots:
71
+ if root.is_file() and root.suffix.lower() == ".pdf":
72
+ pdf_hash = _hash_file(root)
73
+ index.setdefault(pdf_hash, root)
74
+ continue
75
+ if not root.is_dir():
76
+ continue
77
+ for path in root.rglob("*.pdf"):
78
+ if not path.is_file():
79
+ continue
80
+ pdf_hash = _hash_file(path)
81
+ index.setdefault(pdf_hash, path)
82
+ return index
83
+
84
+
85
+ def _unique_base_name(base: str, paper_id: str, used: set[str]) -> str:
86
+ candidate = base
87
+ if candidate in used:
88
+ candidate = f"{base}_{paper_id}"
89
+ counter = 1
90
+ while candidate in used:
91
+ candidate = f"{base}_{paper_id}_{counter}"
92
+ counter += 1
93
+ used.add(candidate)
94
+ return candidate
95
+
96
+
97
+ def _open_snapshot_db(path: Path) -> sqlite3.Connection:
98
+ conn = sqlite3.connect(path)
99
+ conn.row_factory = sqlite3.Row
100
+ return conn
101
+
102
+
103
+ def _print_summary(title: str, counts: UnpackCounts) -> None:
104
+ table = Table(title=title, header_style="bold cyan", title_style="bold magenta")
105
+ table.add_column("Metric", style="cyan", no_wrap=True)
106
+ table.add_column("Value", style="white", overflow="fold")
107
+ table.add_row("Total", str(counts.total))
108
+ table.add_row("Succeeded", str(counts.succeeded))
109
+ table.add_row("Failed", str(counts.failed))
110
+ table.add_row("Missing PDF", str(counts.missing_pdf))
111
+ if counts.translated_succeeded or counts.translated_failed:
112
+ table.add_row("Translated succeeded", str(counts.translated_succeeded))
113
+ table.add_row("Translated failed", str(counts.translated_failed))
114
+ Console().print(table)
115
+
116
+
117
+ def unpack_md(opts: SnapshotUnpackMdOptions) -> None:
118
+ """Unpack source/translated markdown and align filenames to PDFs."""
119
+ opts.md_output_dir.mkdir(parents=True, exist_ok=True)
120
+ opts.md_translated_output_dir.mkdir(parents=True, exist_ok=True)
121
+
122
+ pdf_index = _build_pdf_hash_index(opts.pdf_roots)
123
+ used_names: set[str] = set()
124
+ counts = UnpackCounts()
125
+
126
+ conn = _open_snapshot_db(opts.snapshot_db)
127
+ try:
128
+ cursor = conn.execute(
129
+ """
130
+ SELECT
131
+ paper_id,
132
+ title,
133
+ source_hash,
134
+ pdf_content_hash,
135
+ source_md_content_hash
136
+ FROM paper
137
+ ORDER BY paper_index, title
138
+ """
139
+ )
140
+ for row in cursor.fetchall():
141
+ counts.total += 1
142
+ paper_id = str(row["paper_id"])
143
+ title = str(row["title"] or "")
144
+ pdf_hash = row["pdf_content_hash"]
145
+ md_hash = row["source_md_content_hash"]
146
+
147
+ base = ""
148
+ if pdf_hash and pdf_hash in pdf_index:
149
+ base = pdf_index[pdf_hash].stem
150
+ else:
151
+ counts.missing_pdf += 1
152
+ base = _sanitize_filename(title)
153
+ base = _unique_base_name(base, paper_id, used_names)
154
+
155
+ if md_hash:
156
+ src_md = opts.static_export_dir / "md" / f"{md_hash}.md"
157
+ if src_md.exists():
158
+ dst_md = opts.md_output_dir / f"{base}.md"
159
+ try:
160
+ dst_md.write_text(src_md.read_text(encoding="utf-8"), encoding="utf-8")
161
+ counts.succeeded += 1
162
+ except OSError:
163
+ counts.failed += 1
164
+ else:
165
+ counts.failed += 1
166
+ else:
167
+ counts.failed += 1
168
+
169
+
170
+ for tr_row in conn.execute(
171
+ "SELECT lang, md_content_hash FROM paper_translation WHERE paper_id = ?",
172
+ (paper_id,),
173
+ ):
174
+ lang = str(tr_row["lang"] or "").lower()
175
+ tr_hash = tr_row["md_content_hash"]
176
+ if not lang or not tr_hash:
177
+ counts.translated_failed += 1
178
+ continue
179
+ src_tr = opts.static_export_dir / "md_translate" / lang / f"{tr_hash}.md"
180
+ if not src_tr.exists():
181
+ counts.translated_failed += 1
182
+ continue
183
+ dst_tr = opts.md_translated_output_dir / f"{base}.{lang}.md"
184
+ try:
185
+ dst_tr.write_text(src_tr.read_text(encoding="utf-8"), encoding="utf-8")
186
+ counts.translated_succeeded += 1
187
+ except OSError:
188
+ counts.translated_failed += 1
189
+ finally:
190
+ conn.close()
191
+
192
+ _print_summary("snapshot unpack md summary", counts)
193
+
194
+
195
+ def unpack_info(opts: SnapshotUnpackInfoOptions) -> None:
196
+ """Unpack aggregated paper_infos.json from snapshot summaries."""
197
+ pdf_index = _build_pdf_hash_index(opts.pdf_roots)
198
+ counts = UnpackCounts()
199
+ items: list[dict[str, Any]] = []
200
+
201
+ conn = _open_snapshot_db(opts.snapshot_db)
202
+ try:
203
+ cursor = conn.execute(
204
+ """
205
+ SELECT
206
+ paper_id,
207
+ title,
208
+ source_hash,
209
+ pdf_content_hash
210
+ FROM paper
211
+ ORDER BY paper_index, title
212
+ """
213
+ )
214
+ for row in cursor.fetchall():
215
+ counts.total += 1
216
+ paper_id = str(row["paper_id"])
217
+ pdf_hash = row["pdf_content_hash"]
218
+ if not (pdf_hash and pdf_hash in pdf_index):
219
+ counts.missing_pdf += 1
220
+
221
+ summary_path = opts.static_export_dir / "summary" / paper_id / f"{opts.template}.json"
222
+ fallback_path = opts.static_export_dir / "summary" / f"{paper_id}.json"
223
+ target_path = summary_path if summary_path.exists() else fallback_path
224
+ used_fallback = target_path == fallback_path
225
+ if not target_path.exists():
226
+ counts.failed += 1
227
+ continue
228
+ try:
229
+ payload = json.loads(target_path.read_text(encoding="utf-8"))
230
+ except json.JSONDecodeError:
231
+ counts.failed += 1
232
+ continue
233
+ if not isinstance(payload, dict):
234
+ counts.failed += 1
235
+ continue
236
+
237
+ base = ""
238
+ if pdf_hash and pdf_hash in pdf_index:
239
+ base = pdf_index[pdf_hash].stem
240
+ else:
241
+ base = _sanitize_filename(str(row["title"] or ""))
242
+ source_path = f"{base}.md" if base else ""
243
+
244
+ payload["paper_id"] = paper_id
245
+ payload["paper_title"] = str(row["title"] or "")
246
+ payload["source_path"] = source_path
247
+ payload["source_hash"] = str(row["source_hash"] or "")
248
+
249
+ if used_fallback:
250
+ counts.failed += 1
251
+ else:
252
+ counts.succeeded += 1
253
+ items.append(payload)
254
+ finally:
255
+ conn.close()
256
+
257
+ opts.output_json.parent.mkdir(parents=True, exist_ok=True)
258
+ opts.output_json.write_text(json.dumps(items, ensure_ascii=False, indent=2), encoding="utf-8")
259
+ _print_summary("snapshot unpack info summary", counts)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepresearch-flow
3
- Version: 0.6.1
3
+ Version: 0.7.0
4
4
  Summary: Workflow tools for paper extraction, review, and research automation.
5
5
  Author-email: DengQi <dengqi935@gmail.com>
6
6
  License: MIT License
@@ -49,6 +49,7 @@ Requires-Dist: jinja2>=3.1.3
49
49
  Requires-Dist: json-repair>=0.55.1
50
50
  Requires-Dist: jsonschema>=4.26.0
51
51
  Requires-Dist: markdown-it-py>=3.0.0
52
+ Requires-Dist: fastmcp>=3.0.0b1
52
53
  Requires-Dist: mdit-py-plugins>=0.4.0
53
54
  Requires-Dist: pypdf>=6.6.2
54
55
  Requires-Dist: pylatexenc>=2.10
@@ -56,7 +57,7 @@ Requires-Dist: pybtex>=0.24.0
56
57
  Requires-Dist: rich>=14.3.1
57
58
  Requires-Dist: rumdl>=0.1.6
58
59
  Requires-Dist: starlette>=0.52.1
59
- Requires-Dist: tqdm>=4.66.4
60
+ Requires-Dist: tqdm>=4.67.2
60
61
  Requires-Dist: uvicorn>=0.27.1
61
62
  Dynamic: license-file
62
63
 
@@ -4,7 +4,7 @@ deepresearch_flow/cli.py,sha256=t4oowCNWldL0DrVJ4d0UlRkuGU2qHej_G0mAc_quteQ,455
4
4
  deepresearch_flow/paper/__init__.py,sha256=sunaOkcgAJBrfmcaJTumcWbPGVUSGWvOv2a2Yidzy0A,43
5
5
  deepresearch_flow/paper/cli.py,sha256=68d-yccScU0yL6d7eqZVdudPO6i_in8F4v-hKDWILMo,13647
6
6
  deepresearch_flow/paper/config.py,sha256=V7z4ApPXCV1acSl2FU3nZGq6nt8uisMhm0GtOq5zzmg,12021
7
- deepresearch_flow/paper/db.py,sha256=Bxhrd8NCaPZ9Ijtp1uiOplwh2Uy0n2Dyn1bO0d4A9bE,91780
7
+ deepresearch_flow/paper/db.py,sha256=RvUN9jeoaEgLNvf8NhWYD-cgIIMZwdZRK3cq17pNWZI,94727
8
8
  deepresearch_flow/paper/db_ops.py,sha256=cb64jn2ax39i3tCS-0DYmlsJdGX3uBS2u5ncUIbUBic,73980
9
9
  deepresearch_flow/paper/extract.py,sha256=78ASAyNLfCl1AsAk2o_v1vskZCNZuayaCHgr0S4V2Vs,87632
10
10
  deepresearch_flow/paper/llm.py,sha256=mHfs5IkT3Q6BOh46MDlfUmgVTX24WRf0IKKoOnN8nV8,4007
@@ -43,11 +43,14 @@ deepresearch_flow/paper/schemas/default_paper_schema.json,sha256=6h_2ayHolJj8JMn
43
43
  deepresearch_flow/paper/schemas/eight_questions_schema.json,sha256=VFKKpdZkgPdQkYIW5jyrZQ7c2TlQZwB4svVWfoiwxdg,1005
44
44
  deepresearch_flow/paper/schemas/three_pass_schema.json,sha256=8aNr4EdRiilxszIRBCC4hRNXrfIOcdnVW4Qhe6Fnh0o,689
45
45
  deepresearch_flow/paper/snapshot/__init__.py,sha256=1VLO36xxDB3J5Yoo-HH9vyI-4ev2HcivXN0sNLg8O5k,102
46
- deepresearch_flow/paper/snapshot/api.py,sha256=WgkOgS7n_2Fx-Bl4KnLrh5nhRJAsWJaPjXu7vX5ubxY,36960
46
+ deepresearch_flow/paper/snapshot/api.py,sha256=F_qehvCjxTBTGj9FmqP4NnJQayUPJm0N5e_8mm5JlDQ,37405
47
47
  deepresearch_flow/paper/snapshot/builder.py,sha256=HbRcfNteMoP4RnQ4y2onZCm9XfnIvzXLn_EwsLZsDzY,38692
48
+ deepresearch_flow/paper/snapshot/common.py,sha256=KAhlGlPgabOCe9Faps8BoDqin71qpkCfaL_ADCr_9vg,917
48
49
  deepresearch_flow/paper/snapshot/identity.py,sha256=k9x1EZPFBU1qgxzkTGvwVtDjLgcosmM_udPuvRLl0uI,7748
50
+ deepresearch_flow/paper/snapshot/mcp_server.py,sha256=lvgbXmuZCZ_zaQMdZEMjN-OChHPdoZ9MmuuQ-7ORias,22901
49
51
  deepresearch_flow/paper/snapshot/schema.py,sha256=DcVmAklLYyEeDoVV9jYw7hoMHnHd9Eziivl-LP2busY,8991
50
52
  deepresearch_flow/paper/snapshot/text.py,sha256=0RnxLowa6AdirdLsUYym6BhWbjwiP2Qj2oZeA-pjmdE,4368
53
+ deepresearch_flow/paper/snapshot/unpacker.py,sha256=ScKSFdrQLJHrITHe9KAxgAEH-vAAnXLolvW9zeJ3wsc,8575
51
54
  deepresearch_flow/paper/snapshot/tests/__init__.py,sha256=G0IowrxHjGUIaqxcw6SvlcLFAtE5ZsleG6ECgd-sIdk,52
52
55
  deepresearch_flow/paper/snapshot/tests/test_identity.py,sha256=KDFixAUU9l68KOum7gf1IrD0Oy18dBCSXG7RbJTqflA,4520
53
56
  deepresearch_flow/paper/templates/__init__.py,sha256=p8W6kINvrf-T2X6Ow4GMr28syVOorFuMn0pbmieVzAw,35
@@ -463,9 +466,9 @@ deepresearch_flow/translator/placeholder.py,sha256=mEgqA-dPdOsIhno0h_hzfpXpY2asb
463
466
  deepresearch_flow/translator/prompts.py,sha256=EvfBvBIpQXARDj4m87GAyFXJGL8EJeahj_rOmp9mv68,5556
464
467
  deepresearch_flow/translator/protector.py,sha256=yUMuS2FgVofK_MRXrcauLRiwNvdCCjNAnh6CcNd686o,11777
465
468
  deepresearch_flow/translator/segment.py,sha256=rBFMCLTrvm2GrPc_hNFymi-8Ih2DAtUQlZHCRE9nLaM,5146
466
- deepresearch_flow-0.6.1.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
467
- deepresearch_flow-0.6.1.dist-info/METADATA,sha256=y_CHy1YJ-3P31W43Q_fd8dEkznj7LKLRrCF6F-sGHaQ,26696
468
- deepresearch_flow-0.6.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
469
- deepresearch_flow-0.6.1.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
470
- deepresearch_flow-0.6.1.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
471
- deepresearch_flow-0.6.1.dist-info/RECORD,,
469
+ deepresearch_flow-0.7.0.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
470
+ deepresearch_flow-0.7.0.dist-info/METADATA,sha256=aluWW1CXPeSWCLKopChdbgl_GHEQHByua1fBohr6Mzg,26728
471
+ deepresearch_flow-0.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
472
+ deepresearch_flow-0.7.0.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
473
+ deepresearch_flow-0.7.0.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
474
+ deepresearch_flow-0.7.0.dist-info/RECORD,,