deepresearch-flow 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepresearch_flow/paper/snapshot/api.py +4 -3
- deepresearch_flow/paper/snapshot/mcp_server.py +84 -45
- deepresearch_flow/paper/snapshot/tests/test_mcp_server_schema_compat.py +184 -0
- deepresearch_flow/paper/snapshot/tests/test_mcp_transport.py +52 -0
- {deepresearch_flow-0.7.1.dist-info → deepresearch_flow-0.7.3.dist-info}/METADATA +25 -6
- {deepresearch_flow-0.7.1.dist-info → deepresearch_flow-0.7.3.dist-info}/RECORD +10 -8
- {deepresearch_flow-0.7.1.dist-info → deepresearch_flow-0.7.3.dist-info}/WHEEL +0 -0
- {deepresearch_flow-0.7.1.dist-info → deepresearch_flow-0.7.3.dist-info}/entry_points.txt +0 -0
- {deepresearch_flow-0.7.1.dist-info → deepresearch_flow-0.7.3.dist-info}/licenses/LICENSE +0 -0
- {deepresearch_flow-0.7.1.dist-info → deepresearch_flow-0.7.3.dist-info}/top_level.txt +0 -0
|
@@ -908,7 +908,7 @@ def create_app(
|
|
|
908
908
|
# Lazy import to avoid circular dependency
|
|
909
909
|
from deepresearch_flow.paper.snapshot.mcp_server import (
|
|
910
910
|
McpSnapshotConfig,
|
|
911
|
-
|
|
911
|
+
create_mcp_apps,
|
|
912
912
|
resolve_static_export_dir,
|
|
913
913
|
)
|
|
914
914
|
|
|
@@ -919,7 +919,7 @@ def create_app(
|
|
|
919
919
|
limits=limits or ApiLimits(),
|
|
920
920
|
origin_allowlist=cors_allowed_origins or ["*"],
|
|
921
921
|
)
|
|
922
|
-
|
|
922
|
+
mcp_apps, mcp_lifespan = create_mcp_apps(mcp_config)
|
|
923
923
|
|
|
924
924
|
routes = [
|
|
925
925
|
Route("/api/v1/config", _api_config, methods=["GET"]),
|
|
@@ -931,7 +931,8 @@ def create_app(
|
|
|
931
931
|
Route("/api/v1/facets/{facet:str}/{facet_id:str}/stats", _api_facet_stats, methods=["GET"]),
|
|
932
932
|
Route("/api/v1/facets/{facet:str}/by-value/{value:str}/papers", _api_facet_by_value_papers, methods=["GET"]),
|
|
933
933
|
Route("/api/v1/facets/{facet:str}/by-value/{value:str}/stats", _api_facet_by_value_stats, methods=["GET"]),
|
|
934
|
-
Mount("/mcp", app=
|
|
934
|
+
Mount("/mcp", app=mcp_apps["streamable-http"]),
|
|
935
|
+
Mount("/mcp-sse", app=mcp_apps["sse"]),
|
|
935
936
|
]
|
|
936
937
|
|
|
937
938
|
# Pass MCP lifespan to ensure session manager initializes properly
|
|
@@ -5,7 +5,7 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import re
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any, Literal
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
11
|
from starlette.applications import Starlette
|
|
@@ -76,14 +76,13 @@ class McpSnapshotConfig:
|
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
class McpRequestGuardMiddleware(BaseHTTPMiddleware):
|
|
79
|
-
def __init__(self, app, *, origin_allowlist: list[str]) -> None:
|
|
79
|
+
def __init__(self, app, *, origin_allowlist: list[str], allowed_methods: set[str] | None = None) -> None:
|
|
80
80
|
super().__init__(app)
|
|
81
81
|
self._allowlist = [origin.lower() for origin in origin_allowlist]
|
|
82
|
+
self._allowed_methods = {method.upper() for method in (allowed_methods or {"POST", "OPTIONS"})}
|
|
82
83
|
|
|
83
84
|
async def dispatch(self, request: Request, call_next): # type: ignore[override]
|
|
84
|
-
if request.method
|
|
85
|
-
return Response("Method Not Allowed", status_code=405)
|
|
86
|
-
if request.method not in {"POST", "OPTIONS"}:
|
|
85
|
+
if request.method.upper() not in self._allowed_methods:
|
|
87
86
|
return Response("Method Not Allowed", status_code=405)
|
|
88
87
|
origin = request.headers.get("origin")
|
|
89
88
|
if origin and not self._is_allowed_origin(origin):
|
|
@@ -108,23 +107,49 @@ def configure(config: McpSnapshotConfig) -> None:
|
|
|
108
107
|
_CONFIG = config
|
|
109
108
|
|
|
110
109
|
|
|
111
|
-
def
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
110
|
+
def _allowed_methods_for_transport(transport: Literal["streamable-http", "sse"]) -> set[str]:
|
|
111
|
+
if transport == "sse":
|
|
112
|
+
return {"GET", "POST", "OPTIONS"}
|
|
113
|
+
return {"POST", "OPTIONS"}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def create_mcp_transport_app(
|
|
117
|
+
config: McpSnapshotConfig,
|
|
118
|
+
*,
|
|
119
|
+
transport: Literal["streamable-http", "sse"] = "streamable-http",
|
|
120
|
+
) -> tuple[Starlette, Any]:
|
|
121
|
+
"""Create MCP app for a specific transport with transport-aware method guard."""
|
|
117
122
|
configure(config)
|
|
118
|
-
mcp_app = mcp.http_app(path="/", stateless_http=
|
|
123
|
+
mcp_app = mcp.http_app(path="/", transport=transport, stateless_http=(transport == "streamable-http"))
|
|
119
124
|
wrapped = Starlette(
|
|
120
125
|
routes=[Mount("/", app=mcp_app)],
|
|
121
126
|
middleware=[
|
|
122
|
-
Middleware(
|
|
127
|
+
Middleware(
|
|
128
|
+
McpRequestGuardMiddleware,
|
|
129
|
+
origin_allowlist=config.origin_allowlist,
|
|
130
|
+
allowed_methods=_allowed_methods_for_transport(transport),
|
|
131
|
+
),
|
|
123
132
|
],
|
|
124
133
|
)
|
|
125
134
|
return wrapped, mcp_app.lifespan
|
|
126
135
|
|
|
127
136
|
|
|
137
|
+
def create_mcp_apps(config: McpSnapshotConfig) -> tuple[dict[str, Starlette], Any]:
|
|
138
|
+
"""Create streamable-http and sse MCP apps.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
A tuple of (apps_by_transport, lifespan_context).
|
|
142
|
+
"""
|
|
143
|
+
streamable_app, lifespan = create_mcp_transport_app(config, transport="streamable-http")
|
|
144
|
+
sse_app, _ = create_mcp_transport_app(config, transport="sse")
|
|
145
|
+
return {"streamable-http": streamable_app, "sse": sse_app}, lifespan
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def create_mcp_app(config: McpSnapshotConfig) -> tuple[Starlette, Any]:
|
|
149
|
+
"""Backward-compatible helper returning streamable-http MCP app."""
|
|
150
|
+
return create_mcp_transport_app(config, transport="streamable-http")
|
|
151
|
+
|
|
152
|
+
|
|
128
153
|
def _get_config() -> McpSnapshotConfig:
|
|
129
154
|
if _CONFIG is None:
|
|
130
155
|
raise RuntimeError("MCP server not configured")
|
|
@@ -227,18 +252,24 @@ def _load_summary_json(paper_id: str, template: str | None) -> tuple[str | None,
|
|
|
227
252
|
conn = _open_ro_conn(cfg.snapshot_db)
|
|
228
253
|
try:
|
|
229
254
|
row = conn.execute(
|
|
230
|
-
"SELECT preferred_summary_template
|
|
255
|
+
"SELECT preferred_summary_template FROM paper WHERE paper_id = ?",
|
|
231
256
|
(paper_id,),
|
|
232
257
|
).fetchone()
|
|
233
258
|
if not row:
|
|
234
259
|
return None, None
|
|
235
|
-
preferred = row["preferred_summary_template"]
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
260
|
+
preferred = str(row["preferred_summary_template"] or "")
|
|
261
|
+
template_rows = conn.execute(
|
|
262
|
+
"SELECT template_tag FROM paper_summary WHERE paper_id = ?",
|
|
263
|
+
(paper_id,),
|
|
264
|
+
).fetchall()
|
|
265
|
+
available = sorted((str(item["template_tag"]) for item in template_rows), key=str.lower)
|
|
266
|
+
selected = (template or preferred).strip()
|
|
267
|
+
if not selected or selected not in set(available):
|
|
240
268
|
return None, available
|
|
241
|
-
|
|
269
|
+
if template:
|
|
270
|
+
rel_path = f"summary/{paper_id}/{selected}.json"
|
|
271
|
+
else:
|
|
272
|
+
rel_path = f"summary/{paper_id}.json"
|
|
242
273
|
return _load_static_text(rel_path), available
|
|
243
274
|
finally:
|
|
244
275
|
conn.close()
|
|
@@ -267,15 +298,12 @@ def _load_translation_markdown(paper_id: str, lang: str) -> str | None:
|
|
|
267
298
|
conn = _open_ro_conn(cfg.snapshot_db)
|
|
268
299
|
try:
|
|
269
300
|
row = conn.execute(
|
|
270
|
-
"SELECT
|
|
271
|
-
(paper_id,),
|
|
301
|
+
"SELECT md_content_hash FROM paper_translation WHERE paper_id = ? AND lang = ?",
|
|
302
|
+
(paper_id, lang),
|
|
272
303
|
).fetchone()
|
|
273
|
-
if not row or not row["
|
|
274
|
-
return None
|
|
275
|
-
translations = json.loads(row["translations_json"])
|
|
276
|
-
rel_path = translations.get(lang)
|
|
277
|
-
if not rel_path:
|
|
304
|
+
if not row or not row["md_content_hash"]:
|
|
278
305
|
return None
|
|
306
|
+
rel_path = f"md_translate/{lang}/{row['md_content_hash']}.md"
|
|
279
307
|
return _load_static_text(rel_path)
|
|
280
308
|
finally:
|
|
281
309
|
conn.close()
|
|
@@ -296,22 +324,32 @@ def search_papers(query: str, limit: int = 10) -> list[dict[str, Any]]:
|
|
|
296
324
|
|
|
297
325
|
conn = _open_ro_conn(cfg.snapshot_db)
|
|
298
326
|
try:
|
|
327
|
+
match_expr = rewrite_search_query(query)
|
|
328
|
+
if not match_expr:
|
|
329
|
+
return []
|
|
299
330
|
cur = conn.execute(
|
|
300
331
|
"""
|
|
301
|
-
SELECT
|
|
302
|
-
|
|
303
|
-
|
|
332
|
+
SELECT
|
|
333
|
+
p.paper_id,
|
|
334
|
+
p.title,
|
|
335
|
+
p.year,
|
|
336
|
+
p.venue,
|
|
337
|
+
snippet(paper_fts, -1, '[[[', ']]]', '…', 30) AS snippet_markdown,
|
|
338
|
+
bm25(paper_fts, 5.0, 3.0, 1.0, 1.0, 2.0) AS rank
|
|
339
|
+
FROM paper_fts
|
|
340
|
+
JOIN paper p ON p.paper_id = paper_fts.paper_id
|
|
341
|
+
WHERE paper_fts MATCH ?
|
|
304
342
|
ORDER BY rank
|
|
305
343
|
LIMIT ?
|
|
306
344
|
""",
|
|
307
|
-
(
|
|
345
|
+
(match_expr, limit),
|
|
308
346
|
)
|
|
309
347
|
rows = cur.fetchall()
|
|
310
348
|
results: list[dict[str, Any]] = []
|
|
311
349
|
for row in rows:
|
|
312
|
-
snippet = str(row["
|
|
350
|
+
snippet = str(row["snippet_markdown"] or "")
|
|
313
351
|
snippet = remove_cjk_spaces(snippet)
|
|
314
|
-
snippet
|
|
352
|
+
snippet = merge_adjacent_markers(snippet)
|
|
315
353
|
results.append({
|
|
316
354
|
"paper_id": str(row["paper_id"]),
|
|
317
355
|
"title": str(row["title"]),
|
|
@@ -337,7 +375,7 @@ def search_papers_by_keyword(keyword: str, limit: int = 10) -> list[dict[str, An
|
|
|
337
375
|
try:
|
|
338
376
|
rows = conn.execute(
|
|
339
377
|
"""
|
|
340
|
-
SELECT DISTINCT p.paper_id, p.title, p.year, p.venue, p.
|
|
378
|
+
SELECT DISTINCT p.paper_id, p.title, p.year, p.venue, p.summary_preview
|
|
341
379
|
FROM paper p
|
|
342
380
|
JOIN paper_keyword pk ON pk.paper_id = p.paper_id
|
|
343
381
|
JOIN keyword k ON k.keyword_id = pk.keyword_id
|
|
@@ -349,9 +387,9 @@ def search_papers_by_keyword(keyword: str, limit: int = 10) -> list[dict[str, An
|
|
|
349
387
|
).fetchall()
|
|
350
388
|
results: list[dict[str, Any]] = []
|
|
351
389
|
for row in rows:
|
|
352
|
-
snippet = str(row["
|
|
390
|
+
snippet = str(row["summary_preview"] or "")
|
|
353
391
|
snippet = remove_cjk_spaces(snippet)
|
|
354
|
-
snippet
|
|
392
|
+
snippet = merge_adjacent_markers(snippet)
|
|
355
393
|
results.append({
|
|
356
394
|
"paper_id": str(row["paper_id"]),
|
|
357
395
|
"title": str(row["title"]),
|
|
@@ -377,26 +415,27 @@ def get_paper_metadata(paper_id: str) -> dict[str, Any]:
|
|
|
377
415
|
try:
|
|
378
416
|
row = conn.execute(
|
|
379
417
|
"""
|
|
380
|
-
SELECT paper_id, title, year, venue,
|
|
381
|
-
preferred_summary_template, summary_asset_paths_json
|
|
418
|
+
SELECT paper_id, title, year, venue, preferred_summary_template
|
|
382
419
|
FROM paper WHERE paper_id = ?
|
|
383
420
|
""",
|
|
384
421
|
(paper_id,),
|
|
385
422
|
).fetchone()
|
|
386
423
|
if not row:
|
|
387
424
|
raise McpToolError("not_found", "paper not found", paper_id=paper_id)
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
425
|
+
template_rows = conn.execute(
|
|
426
|
+
"SELECT template_tag FROM paper_summary WHERE paper_id = ?",
|
|
427
|
+
(paper_id,),
|
|
428
|
+
).fetchall()
|
|
429
|
+
available = sorted((str(item["template_tag"]) for item in template_rows), key=str.lower)
|
|
391
430
|
return {
|
|
392
431
|
"paper_id": str(row["paper_id"]),
|
|
393
432
|
"title": str(row["title"]),
|
|
394
433
|
"year": str(row["year"]),
|
|
395
434
|
"venue": str(row["venue"]),
|
|
396
|
-
"doi":
|
|
397
|
-
"arxiv_id":
|
|
398
|
-
"openreview_id":
|
|
399
|
-
"paper_pw_url":
|
|
435
|
+
"doi": None,
|
|
436
|
+
"arxiv_id": None,
|
|
437
|
+
"openreview_id": None,
|
|
438
|
+
"paper_pw_url": None,
|
|
400
439
|
"preferred_summary_template": row["preferred_summary_template"],
|
|
401
440
|
"available_summary_templates": available,
|
|
402
441
|
}
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import unittest
|
|
7
|
+
|
|
8
|
+
from deepresearch_flow.paper.snapshot.common import ApiLimits
|
|
9
|
+
from deepresearch_flow.paper.snapshot.mcp_server import (
|
|
10
|
+
McpSnapshotConfig,
|
|
11
|
+
McpToolError,
|
|
12
|
+
configure,
|
|
13
|
+
get_paper_metadata,
|
|
14
|
+
get_paper_source,
|
|
15
|
+
get_paper_summary,
|
|
16
|
+
resource_translation,
|
|
17
|
+
search_papers,
|
|
18
|
+
search_papers_by_keyword,
|
|
19
|
+
)
|
|
20
|
+
from deepresearch_flow.paper.snapshot.schema import init_snapshot_db
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestMcpServerSchemaCompat(unittest.TestCase):
|
|
24
|
+
@classmethod
|
|
25
|
+
def setUpClass(cls) -> None:
|
|
26
|
+
cls.tmpdir = tempfile.TemporaryDirectory()
|
|
27
|
+
root = Path(cls.tmpdir.name)
|
|
28
|
+
cls.db_path = root / "snapshot.db"
|
|
29
|
+
cls.static_dir = root / "static"
|
|
30
|
+
cls.paper_id = "eb87c02de5b908dff9f91edda47364a5"
|
|
31
|
+
|
|
32
|
+
(cls.static_dir / "summary" / cls.paper_id).mkdir(parents=True, exist_ok=True)
|
|
33
|
+
(cls.static_dir / "md").mkdir(parents=True, exist_ok=True)
|
|
34
|
+
(cls.static_dir / "md_translate" / "zh").mkdir(parents=True, exist_ok=True)
|
|
35
|
+
(cls.static_dir / "summary" / f"{cls.paper_id}.json").write_text(
|
|
36
|
+
'{"template_tag":"deep_read","summary":"default summary"}',
|
|
37
|
+
encoding="utf-8",
|
|
38
|
+
)
|
|
39
|
+
(cls.static_dir / "summary" / cls.paper_id / "deep_read.json").write_text(
|
|
40
|
+
'{"template_tag":"deep_read","summary":"deep summary"}',
|
|
41
|
+
encoding="utf-8",
|
|
42
|
+
)
|
|
43
|
+
(cls.static_dir / "summary" / cls.paper_id / "simple.json").write_text(
|
|
44
|
+
'{"template_tag":"simple","summary":"simple summary"}',
|
|
45
|
+
encoding="utf-8",
|
|
46
|
+
)
|
|
47
|
+
(cls.static_dir / "md" / "sourcehash.md").write_text(
|
|
48
|
+
"# source body",
|
|
49
|
+
encoding="utf-8",
|
|
50
|
+
)
|
|
51
|
+
(cls.static_dir / "md_translate" / "zh" / "trhash.md").write_text(
|
|
52
|
+
"# 翻译内容",
|
|
53
|
+
encoding="utf-8",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
conn = sqlite3.connect(str(cls.db_path))
|
|
57
|
+
try:
|
|
58
|
+
init_snapshot_db(conn)
|
|
59
|
+
conn.execute(
|
|
60
|
+
"""
|
|
61
|
+
INSERT INTO paper(
|
|
62
|
+
paper_id, paper_key, paper_key_type, title, year, month, publication_date,
|
|
63
|
+
venue, preferred_summary_template, summary_preview, paper_index, source_hash,
|
|
64
|
+
output_language, provider, model, prompt_template, extracted_at,
|
|
65
|
+
pdf_content_hash, source_md_content_hash
|
|
66
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
67
|
+
""",
|
|
68
|
+
(
|
|
69
|
+
cls.paper_id,
|
|
70
|
+
"meta:key",
|
|
71
|
+
"meta",
|
|
72
|
+
"Graph Neural Networks",
|
|
73
|
+
"2024",
|
|
74
|
+
"01",
|
|
75
|
+
"2024-01-01",
|
|
76
|
+
"ICLR",
|
|
77
|
+
"deep_read",
|
|
78
|
+
"Graph methods preview",
|
|
79
|
+
1,
|
|
80
|
+
"sourcekey",
|
|
81
|
+
"en",
|
|
82
|
+
"provider-x",
|
|
83
|
+
"model-y",
|
|
84
|
+
"deep_read",
|
|
85
|
+
"2025-01-01T00:00:00Z",
|
|
86
|
+
"pdfhash",
|
|
87
|
+
"sourcehash",
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
conn.execute(
|
|
91
|
+
"INSERT INTO paper_summary(paper_id, template_tag) VALUES (?, ?)",
|
|
92
|
+
(cls.paper_id, "deep_read"),
|
|
93
|
+
)
|
|
94
|
+
conn.execute(
|
|
95
|
+
"INSERT INTO paper_summary(paper_id, template_tag) VALUES (?, ?)",
|
|
96
|
+
(cls.paper_id, "simple"),
|
|
97
|
+
)
|
|
98
|
+
conn.execute(
|
|
99
|
+
"INSERT INTO paper_translation(paper_id, lang, md_content_hash) VALUES (?, ?, ?)",
|
|
100
|
+
(cls.paper_id, "zh", "trhash"),
|
|
101
|
+
)
|
|
102
|
+
conn.execute("INSERT INTO keyword(value) VALUES (?)", ("machine learning",))
|
|
103
|
+
keyword_row = conn.execute(
|
|
104
|
+
"SELECT keyword_id FROM keyword WHERE value = ?",
|
|
105
|
+
("machine learning",),
|
|
106
|
+
).fetchone()
|
|
107
|
+
conn.execute(
|
|
108
|
+
"INSERT INTO paper_keyword(paper_id, keyword_id) VALUES (?, ?)",
|
|
109
|
+
(cls.paper_id, int(keyword_row[0])),
|
|
110
|
+
)
|
|
111
|
+
conn.execute(
|
|
112
|
+
"""
|
|
113
|
+
INSERT INTO paper_fts(paper_id, title, summary, source, translated, metadata)
|
|
114
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
115
|
+
""",
|
|
116
|
+
(
|
|
117
|
+
cls.paper_id,
|
|
118
|
+
"graph neural networks",
|
|
119
|
+
"graph representation learning",
|
|
120
|
+
"source text",
|
|
121
|
+
"translated text",
|
|
122
|
+
"machine learning iclr",
|
|
123
|
+
),
|
|
124
|
+
)
|
|
125
|
+
conn.commit()
|
|
126
|
+
finally:
|
|
127
|
+
conn.close()
|
|
128
|
+
|
|
129
|
+
configure(
|
|
130
|
+
McpSnapshotConfig(
|
|
131
|
+
snapshot_db=cls.db_path,
|
|
132
|
+
static_base_url="",
|
|
133
|
+
static_export_dir=cls.static_dir,
|
|
134
|
+
limits=ApiLimits(),
|
|
135
|
+
origin_allowlist=["*"],
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def tearDownClass(cls) -> None:
|
|
141
|
+
cls.tmpdir.cleanup()
|
|
142
|
+
|
|
143
|
+
def test_get_paper_metadata_with_new_schema(self) -> None:
|
|
144
|
+
payload = get_paper_metadata(self.paper_id)
|
|
145
|
+
self.assertEqual(payload["paper_id"], self.paper_id)
|
|
146
|
+
self.assertEqual(payload["preferred_summary_template"], "deep_read")
|
|
147
|
+
self.assertEqual(payload["available_summary_templates"], ["deep_read", "simple"])
|
|
148
|
+
self.assertIsNone(payload["doi"])
|
|
149
|
+
self.assertIsNone(payload["arxiv_id"])
|
|
150
|
+
self.assertIsNone(payload["openreview_id"])
|
|
151
|
+
self.assertIsNone(payload["paper_pw_url"])
|
|
152
|
+
|
|
153
|
+
def test_get_paper_summary_default_and_template(self) -> None:
|
|
154
|
+
default_summary = get_paper_summary(self.paper_id)
|
|
155
|
+
deep_read_summary = get_paper_summary(self.paper_id, template="deep_read")
|
|
156
|
+
self.assertIn("default summary", default_summary)
|
|
157
|
+
self.assertIn("deep summary", deep_read_summary)
|
|
158
|
+
|
|
159
|
+
def test_get_paper_summary_template_not_available(self) -> None:
|
|
160
|
+
with self.assertRaises(McpToolError) as ctx:
|
|
161
|
+
get_paper_summary(self.paper_id, template="unknown")
|
|
162
|
+
self.assertEqual(ctx.exception.code, "template_not_available")
|
|
163
|
+
self.assertEqual(
|
|
164
|
+
ctx.exception.details["available_summary_templates"],
|
|
165
|
+
["deep_read", "simple"],
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def test_source_and_translation_loading(self) -> None:
|
|
169
|
+
source = get_paper_source(self.paper_id)
|
|
170
|
+
translated = resource_translation(self.paper_id, "zh")
|
|
171
|
+
self.assertIn("source body", source)
|
|
172
|
+
self.assertIn("翻译内容", translated)
|
|
173
|
+
|
|
174
|
+
def test_search_tools_use_current_schema(self) -> None:
|
|
175
|
+
fts_hits = search_papers("graph", limit=5)
|
|
176
|
+
facet_hits = search_papers_by_keyword("machine", limit=5)
|
|
177
|
+
self.assertGreaterEqual(len(fts_hits), 1)
|
|
178
|
+
self.assertGreaterEqual(len(facet_hits), 1)
|
|
179
|
+
self.assertEqual(fts_hits[0]["paper_id"], self.paper_id)
|
|
180
|
+
self.assertEqual(facet_hits[0]["paper_id"], self.paper_id)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
if __name__ == "__main__":
|
|
184
|
+
unittest.main()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from deepresearch_flow.paper.snapshot.api import create_app
|
|
8
|
+
from deepresearch_flow.paper.snapshot.common import ApiLimits
|
|
9
|
+
from deepresearch_flow.paper.snapshot.mcp_server import (
|
|
10
|
+
McpSnapshotConfig,
|
|
11
|
+
_allowed_methods_for_transport,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestMcpTransport(unittest.TestCase):
|
|
16
|
+
@classmethod
|
|
17
|
+
def setUpClass(cls) -> None:
|
|
18
|
+
cls.tmpdir = tempfile.TemporaryDirectory()
|
|
19
|
+
cls.snapshot_db = Path(cls.tmpdir.name) / "snapshot.db"
|
|
20
|
+
cls.snapshot_db.touch()
|
|
21
|
+
cls.cfg = McpSnapshotConfig(
|
|
22
|
+
snapshot_db=cls.snapshot_db,
|
|
23
|
+
static_base_url="",
|
|
24
|
+
static_export_dir=None,
|
|
25
|
+
limits=ApiLimits(),
|
|
26
|
+
origin_allowlist=["*"],
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def tearDownClass(cls) -> None:
|
|
31
|
+
cls.tmpdir.cleanup()
|
|
32
|
+
|
|
33
|
+
def test_streamable_transport_rejects_get(self) -> None:
|
|
34
|
+
self.assertNotIn("GET", _allowed_methods_for_transport("streamable-http"))
|
|
35
|
+
|
|
36
|
+
def test_sse_transport_allows_get(self) -> None:
|
|
37
|
+
self.assertIn("GET", _allowed_methods_for_transport("sse"))
|
|
38
|
+
|
|
39
|
+
def test_api_mounts_streamable_and_sse_endpoints(self) -> None:
|
|
40
|
+
app = create_app(
|
|
41
|
+
snapshot_db=self.snapshot_db,
|
|
42
|
+
static_base_url="",
|
|
43
|
+
cors_allowed_origins=["*"],
|
|
44
|
+
limits=ApiLimits(),
|
|
45
|
+
)
|
|
46
|
+
mount_paths = sorted(getattr(route, "path", "") for route in app.routes)
|
|
47
|
+
self.assertIn("/mcp", mount_paths)
|
|
48
|
+
self.assertIn("/mcp-sse", mount_paths)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
unittest.main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepresearch-flow
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Workflow tools for paper extraction, review, and research automation.
|
|
5
5
|
Author-email: DengQi <dengqi935@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -532,6 +532,22 @@ server {
|
|
|
532
532
|
proxy_set_header X-Real-IP $remote_addr;
|
|
533
533
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
534
534
|
}
|
|
535
|
+
|
|
536
|
+
# SSE transport for MCP clients that require Server-Sent Events
|
|
537
|
+
location ^~ /mcp-sse {
|
|
538
|
+
proxy_pass http://127.0.0.1:8001;
|
|
539
|
+
proxy_http_version 1.1;
|
|
540
|
+
proxy_set_header Connection "";
|
|
541
|
+
proxy_set_header Host $host;
|
|
542
|
+
proxy_set_header X-Real-IP $remote_addr;
|
|
543
|
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
544
|
+
proxy_buffering off;
|
|
545
|
+
proxy_cache off;
|
|
546
|
+
proxy_read_timeout 3600s;
|
|
547
|
+
proxy_send_timeout 3600s;
|
|
548
|
+
chunked_transfer_encoding off;
|
|
549
|
+
add_header X-Accel-Buffering no;
|
|
550
|
+
}
|
|
535
551
|
}
|
|
536
552
|
|
|
537
553
|
# Static assets (separate domain)
|
|
@@ -562,12 +578,15 @@ uv run deepresearch-flow paper db api serve \
|
|
|
562
578
|
--host 0.0.0.0 --port 8001
|
|
563
579
|
```
|
|
564
580
|
|
|
565
|
-
### 3.1) MCP (FastMCP Streamable HTTP)
|
|
581
|
+
### 3.1) MCP (FastMCP Streamable HTTP + SSE)
|
|
566
582
|
|
|
567
|
-
This project exposes
|
|
583
|
+
This project exposes MCP servers mounted on the snapshot API:
|
|
568
584
|
|
|
569
|
-
-
|
|
570
|
-
-
|
|
585
|
+
- Streamable HTTP endpoint: `http://<host>:8001/mcp`
|
|
586
|
+
- SSE endpoint: `http://<host>:8001/mcp-sse`
|
|
587
|
+
- Transport behavior:
|
|
588
|
+
- `/mcp`: Streamable HTTP via `POST` only (`GET` returns 405)
|
|
589
|
+
- `/mcp-sse`: SSE-enabled transport (supports `GET` handshake)
|
|
571
590
|
- Protocol header: optional `mcp-protocol-version` (`2025-03-26` or `2025-06-18`)
|
|
572
591
|
- Static reads: summary/source/translation are served as **text content** by reading snapshot static assets (local-first via `PAPER_DB_STATIC_EXPORT_DIR`, HTTP fallback via `PAPER_DB_STATIC_BASE` / `PAPER_DB_STATIC_BASE_URL`)
|
|
573
592
|
|
|
@@ -959,7 +978,7 @@ docker run --rm -p 8899:8899 \
|
|
|
959
978
|
```
|
|
960
979
|
|
|
961
980
|
Notes:
|
|
962
|
-
- nginx listens on 8899 and proxies `/api
|
|
981
|
+
- nginx listens on 8899 and proxies `/api`, `/mcp`, and `/mcp-sse` to the internal API at `127.0.0.1:8000`.
|
|
963
982
|
- Mount your snapshot DB to `/db/papers.db` inside the container.
|
|
964
983
|
- Mount snapshot static assets to `/static` when serving assets from this container (default `PAPER_DB_STATIC_BASE` is `/static`).
|
|
965
984
|
- If `PAPER_DB_STATIC_BASE` is a full URL (e.g. `https://static.example.com`), nginx still serves the frontend locally, while API responses use that external static base for asset links.
|
|
@@ -43,16 +43,18 @@ deepresearch_flow/paper/schemas/default_paper_schema.json,sha256=6h_2ayHolJj8JMn
|
|
|
43
43
|
deepresearch_flow/paper/schemas/eight_questions_schema.json,sha256=VFKKpdZkgPdQkYIW5jyrZQ7c2TlQZwB4svVWfoiwxdg,1005
|
|
44
44
|
deepresearch_flow/paper/schemas/three_pass_schema.json,sha256=8aNr4EdRiilxszIRBCC4hRNXrfIOcdnVW4Qhe6Fnh0o,689
|
|
45
45
|
deepresearch_flow/paper/snapshot/__init__.py,sha256=1VLO36xxDB3J5Yoo-HH9vyI-4ev2HcivXN0sNLg8O5k,102
|
|
46
|
-
deepresearch_flow/paper/snapshot/api.py,sha256=
|
|
46
|
+
deepresearch_flow/paper/snapshot/api.py,sha256=z1TJmFeMKr5ZiNbZT1xTueVqUqQJD0WhBplDHKlFXRo,37476
|
|
47
47
|
deepresearch_flow/paper/snapshot/builder.py,sha256=HbRcfNteMoP4RnQ4y2onZCm9XfnIvzXLn_EwsLZsDzY,38692
|
|
48
48
|
deepresearch_flow/paper/snapshot/common.py,sha256=KAhlGlPgabOCe9Faps8BoDqin71qpkCfaL_ADCr_9vg,917
|
|
49
49
|
deepresearch_flow/paper/snapshot/identity.py,sha256=k9x1EZPFBU1qgxzkTGvwVtDjLgcosmM_udPuvRLl0uI,7748
|
|
50
|
-
deepresearch_flow/paper/snapshot/mcp_server.py,sha256=
|
|
50
|
+
deepresearch_flow/paper/snapshot/mcp_server.py,sha256=c_WrM7PIMGRmvLg_3759NXc8wH5iwLCa6REBAnngwRg,24491
|
|
51
51
|
deepresearch_flow/paper/snapshot/schema.py,sha256=DcVmAklLYyEeDoVV9jYw7hoMHnHd9Eziivl-LP2busY,8991
|
|
52
52
|
deepresearch_flow/paper/snapshot/text.py,sha256=0RnxLowa6AdirdLsUYym6BhWbjwiP2Qj2oZeA-pjmdE,4368
|
|
53
53
|
deepresearch_flow/paper/snapshot/unpacker.py,sha256=ScKSFdrQLJHrITHe9KAxgAEH-vAAnXLolvW9zeJ3wsc,8575
|
|
54
54
|
deepresearch_flow/paper/snapshot/tests/__init__.py,sha256=G0IowrxHjGUIaqxcw6SvlcLFAtE5ZsleG6ECgd-sIdk,52
|
|
55
55
|
deepresearch_flow/paper/snapshot/tests/test_identity.py,sha256=KDFixAUU9l68KOum7gf1IrD0Oy18dBCSXG7RbJTqflA,4520
|
|
56
|
+
deepresearch_flow/paper/snapshot/tests/test_mcp_server_schema_compat.py,sha256=T7FtkKkGpZx5M7Z278F4iaQFfwS0_XXce_tRdTArt5k,7076
|
|
57
|
+
deepresearch_flow/paper/snapshot/tests/test_mcp_transport.py,sha256=Qh91te1XgzssTUfgCJUpq6Xjnw4tzhhr78bcI3Z4DpA,1622
|
|
56
58
|
deepresearch_flow/paper/templates/__init__.py,sha256=p8W6kINvrf-T2X6Ow4GMr28syVOorFuMn0pbmieVzAw,35
|
|
57
59
|
deepresearch_flow/paper/templates/deep_read.md.j2,sha256=vwVSPOzMBFIS72ez5XFBaKrDZGz0z32L3VGP6mNk434,4780
|
|
58
60
|
deepresearch_flow/paper/templates/deep_read_phi.md.j2,sha256=6Yz2Kxk0czGDPkZiWX3b87glLYHwDU1afr6CkjS-dh8,1666
|
|
@@ -466,9 +468,9 @@ deepresearch_flow/translator/placeholder.py,sha256=mEgqA-dPdOsIhno0h_hzfpXpY2asb
|
|
|
466
468
|
deepresearch_flow/translator/prompts.py,sha256=EvfBvBIpQXARDj4m87GAyFXJGL8EJeahj_rOmp9mv68,5556
|
|
467
469
|
deepresearch_flow/translator/protector.py,sha256=yUMuS2FgVofK_MRXrcauLRiwNvdCCjNAnh6CcNd686o,11777
|
|
468
470
|
deepresearch_flow/translator/segment.py,sha256=rBFMCLTrvm2GrPc_hNFymi-8Ih2DAtUQlZHCRE9nLaM,5146
|
|
469
|
-
deepresearch_flow-0.7.
|
|
470
|
-
deepresearch_flow-0.7.
|
|
471
|
-
deepresearch_flow-0.7.
|
|
472
|
-
deepresearch_flow-0.7.
|
|
473
|
-
deepresearch_flow-0.7.
|
|
474
|
-
deepresearch_flow-0.7.
|
|
471
|
+
deepresearch_flow-0.7.3.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
|
|
472
|
+
deepresearch_flow-0.7.3.dist-info/METADATA,sha256=I8ERjmgnZ-IZ9WQvm6sPT_yKgozcL5g11sWgqh36Ti8,31955
|
|
473
|
+
deepresearch_flow-0.7.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
474
|
+
deepresearch_flow-0.7.3.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
|
|
475
|
+
deepresearch_flow-0.7.3.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
|
|
476
|
+
deepresearch_flow-0.7.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|