deepresearch-flow 0.7.1__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -227,18 +227,24 @@ def _load_summary_json(paper_id: str, template: str | None) -> tuple[str | None,
227
227
  conn = _open_ro_conn(cfg.snapshot_db)
228
228
  try:
229
229
  row = conn.execute(
230
- "SELECT preferred_summary_template, summary_asset_paths_json FROM paper WHERE paper_id = ?",
230
+ "SELECT preferred_summary_template FROM paper WHERE paper_id = ?",
231
231
  (paper_id,),
232
232
  ).fetchone()
233
233
  if not row:
234
234
  return None, None
235
- preferred = row["preferred_summary_template"]
236
- asset_paths = json.loads(row["summary_asset_paths_json"] or "{}")
237
- available = sorted(asset_paths.keys())
238
- selected = template if template else preferred
239
- if not selected or selected not in asset_paths:
235
+ preferred = str(row["preferred_summary_template"] or "")
236
+ template_rows = conn.execute(
237
+ "SELECT template_tag FROM paper_summary WHERE paper_id = ?",
238
+ (paper_id,),
239
+ ).fetchall()
240
+ available = sorted((str(item["template_tag"]) for item in template_rows), key=str.lower)
241
+ selected = (template or preferred).strip()
242
+ if not selected or selected not in set(available):
240
243
  return None, available
241
- rel_path = asset_paths[selected]
244
+ if template:
245
+ rel_path = f"summary/{paper_id}/{selected}.json"
246
+ else:
247
+ rel_path = f"summary/{paper_id}.json"
242
248
  return _load_static_text(rel_path), available
243
249
  finally:
244
250
  conn.close()
@@ -267,15 +273,12 @@ def _load_translation_markdown(paper_id: str, lang: str) -> str | None:
267
273
  conn = _open_ro_conn(cfg.snapshot_db)
268
274
  try:
269
275
  row = conn.execute(
270
- "SELECT translations_json FROM paper WHERE paper_id = ?",
271
- (paper_id,),
276
+ "SELECT md_content_hash FROM paper_translation WHERE paper_id = ? AND lang = ?",
277
+ (paper_id, lang),
272
278
  ).fetchone()
273
- if not row or not row["translations_json"]:
274
- return None
275
- translations = json.loads(row["translations_json"])
276
- rel_path = translations.get(lang)
277
- if not rel_path:
279
+ if not row or not row["md_content_hash"]:
278
280
  return None
281
+ rel_path = f"md_translate/{lang}/{row['md_content_hash']}.md"
279
282
  return _load_static_text(rel_path)
280
283
  finally:
281
284
  conn.close()
@@ -296,22 +299,32 @@ def search_papers(query: str, limit: int = 10) -> list[dict[str, Any]]:
296
299
 
297
300
  conn = _open_ro_conn(cfg.snapshot_db)
298
301
  try:
302
+ match_expr = rewrite_search_query(query)
303
+ if not match_expr:
304
+ return []
299
305
  cur = conn.execute(
300
306
  """
301
- SELECT paper_id, title, year, venue, abstract
302
- FROM paper_search
303
- WHERE paper_search MATCH ?
307
+ SELECT
308
+ p.paper_id,
309
+ p.title,
310
+ p.year,
311
+ p.venue,
312
+ snippet(paper_fts, -1, '[[[', ']]]', '…', 30) AS snippet_markdown,
313
+ bm25(paper_fts, 5.0, 3.0, 1.0, 1.0, 2.0) AS rank
314
+ FROM paper_fts
315
+ JOIN paper p ON p.paper_id = paper_fts.paper_id
316
+ WHERE paper_fts MATCH ?
304
317
  ORDER BY rank
305
318
  LIMIT ?
306
319
  """,
307
- (rewrite_search_query(query), limit),
320
+ (match_expr, limit),
308
321
  )
309
322
  rows = cur.fetchall()
310
323
  results: list[dict[str, Any]] = []
311
324
  for row in rows:
312
- snippet = str(row["abstract"] or "")
325
+ snippet = str(row["snippet_markdown"] or "")
313
326
  snippet = remove_cjk_spaces(snippet)
314
- snippet, markers = merge_adjacent_markers(snippet)
327
+ snippet = merge_adjacent_markers(snippet)
315
328
  results.append({
316
329
  "paper_id": str(row["paper_id"]),
317
330
  "title": str(row["title"]),
@@ -337,7 +350,7 @@ def search_papers_by_keyword(keyword: str, limit: int = 10) -> list[dict[str, An
337
350
  try:
338
351
  rows = conn.execute(
339
352
  """
340
- SELECT DISTINCT p.paper_id, p.title, p.year, p.venue, p.abstract
353
+ SELECT DISTINCT p.paper_id, p.title, p.year, p.venue, p.summary_preview
341
354
  FROM paper p
342
355
  JOIN paper_keyword pk ON pk.paper_id = p.paper_id
343
356
  JOIN keyword k ON k.keyword_id = pk.keyword_id
@@ -349,9 +362,9 @@ def search_papers_by_keyword(keyword: str, limit: int = 10) -> list[dict[str, An
349
362
  ).fetchall()
350
363
  results: list[dict[str, Any]] = []
351
364
  for row in rows:
352
- snippet = str(row["abstract"] or "")
365
+ snippet = str(row["summary_preview"] or "")
353
366
  snippet = remove_cjk_spaces(snippet)
354
- snippet, markers = merge_adjacent_markers(snippet)
367
+ snippet = merge_adjacent_markers(snippet)
355
368
  results.append({
356
369
  "paper_id": str(row["paper_id"]),
357
370
  "title": str(row["title"]),
@@ -377,26 +390,27 @@ def get_paper_metadata(paper_id: str) -> dict[str, Any]:
377
390
  try:
378
391
  row = conn.execute(
379
392
  """
380
- SELECT paper_id, title, year, venue, doi, arxiv_id, openreview_id, paper_pw_url,
381
- preferred_summary_template, summary_asset_paths_json
393
+ SELECT paper_id, title, year, venue, preferred_summary_template
382
394
  FROM paper WHERE paper_id = ?
383
395
  """,
384
396
  (paper_id,),
385
397
  ).fetchone()
386
398
  if not row:
387
399
  raise McpToolError("not_found", "paper not found", paper_id=paper_id)
388
-
389
- asset_paths = json.loads(row["summary_asset_paths_json"] or "{}")
390
- available = sorted(asset_paths.keys())
400
+ template_rows = conn.execute(
401
+ "SELECT template_tag FROM paper_summary WHERE paper_id = ?",
402
+ (paper_id,),
403
+ ).fetchall()
404
+ available = sorted((str(item["template_tag"]) for item in template_rows), key=str.lower)
391
405
  return {
392
406
  "paper_id": str(row["paper_id"]),
393
407
  "title": str(row["title"]),
394
408
  "year": str(row["year"]),
395
409
  "venue": str(row["venue"]),
396
- "doi": row["doi"],
397
- "arxiv_id": row["arxiv_id"],
398
- "openreview_id": row["openreview_id"],
399
- "paper_pw_url": row["paper_pw_url"],
410
+ "doi": None,
411
+ "arxiv_id": None,
412
+ "openreview_id": None,
413
+ "paper_pw_url": None,
400
414
  "preferred_summary_template": row["preferred_summary_template"],
401
415
  "available_summary_templates": available,
402
416
  }
@@ -0,0 +1,184 @@
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
+ import tempfile
5
+ from pathlib import Path
6
+ import unittest
7
+
8
+ from deepresearch_flow.paper.snapshot.common import ApiLimits
9
+ from deepresearch_flow.paper.snapshot.mcp_server import (
10
+ McpSnapshotConfig,
11
+ McpToolError,
12
+ configure,
13
+ get_paper_metadata,
14
+ get_paper_source,
15
+ get_paper_summary,
16
+ resource_translation,
17
+ search_papers,
18
+ search_papers_by_keyword,
19
+ )
20
+ from deepresearch_flow.paper.snapshot.schema import init_snapshot_db
21
+
22
+
23
+ class TestMcpServerSchemaCompat(unittest.TestCase):
24
+ @classmethod
25
+ def setUpClass(cls) -> None:
26
+ cls.tmpdir = tempfile.TemporaryDirectory()
27
+ root = Path(cls.tmpdir.name)
28
+ cls.db_path = root / "snapshot.db"
29
+ cls.static_dir = root / "static"
30
+ cls.paper_id = "eb87c02de5b908dff9f91edda47364a5"
31
+
32
+ (cls.static_dir / "summary" / cls.paper_id).mkdir(parents=True, exist_ok=True)
33
+ (cls.static_dir / "md").mkdir(parents=True, exist_ok=True)
34
+ (cls.static_dir / "md_translate" / "zh").mkdir(parents=True, exist_ok=True)
35
+ (cls.static_dir / "summary" / f"{cls.paper_id}.json").write_text(
36
+ '{"template_tag":"deep_read","summary":"default summary"}',
37
+ encoding="utf-8",
38
+ )
39
+ (cls.static_dir / "summary" / cls.paper_id / "deep_read.json").write_text(
40
+ '{"template_tag":"deep_read","summary":"deep summary"}',
41
+ encoding="utf-8",
42
+ )
43
+ (cls.static_dir / "summary" / cls.paper_id / "simple.json").write_text(
44
+ '{"template_tag":"simple","summary":"simple summary"}',
45
+ encoding="utf-8",
46
+ )
47
+ (cls.static_dir / "md" / "sourcehash.md").write_text(
48
+ "# source body",
49
+ encoding="utf-8",
50
+ )
51
+ (cls.static_dir / "md_translate" / "zh" / "trhash.md").write_text(
52
+ "# 翻译内容",
53
+ encoding="utf-8",
54
+ )
55
+
56
+ conn = sqlite3.connect(str(cls.db_path))
57
+ try:
58
+ init_snapshot_db(conn)
59
+ conn.execute(
60
+ """
61
+ INSERT INTO paper(
62
+ paper_id, paper_key, paper_key_type, title, year, month, publication_date,
63
+ venue, preferred_summary_template, summary_preview, paper_index, source_hash,
64
+ output_language, provider, model, prompt_template, extracted_at,
65
+ pdf_content_hash, source_md_content_hash
66
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
67
+ """,
68
+ (
69
+ cls.paper_id,
70
+ "meta:key",
71
+ "meta",
72
+ "Graph Neural Networks",
73
+ "2024",
74
+ "01",
75
+ "2024-01-01",
76
+ "ICLR",
77
+ "deep_read",
78
+ "Graph methods preview",
79
+ 1,
80
+ "sourcekey",
81
+ "en",
82
+ "provider-x",
83
+ "model-y",
84
+ "deep_read",
85
+ "2025-01-01T00:00:00Z",
86
+ "pdfhash",
87
+ "sourcehash",
88
+ ),
89
+ )
90
+ conn.execute(
91
+ "INSERT INTO paper_summary(paper_id, template_tag) VALUES (?, ?)",
92
+ (cls.paper_id, "deep_read"),
93
+ )
94
+ conn.execute(
95
+ "INSERT INTO paper_summary(paper_id, template_tag) VALUES (?, ?)",
96
+ (cls.paper_id, "simple"),
97
+ )
98
+ conn.execute(
99
+ "INSERT INTO paper_translation(paper_id, lang, md_content_hash) VALUES (?, ?, ?)",
100
+ (cls.paper_id, "zh", "trhash"),
101
+ )
102
+ conn.execute("INSERT INTO keyword(value) VALUES (?)", ("machine learning",))
103
+ keyword_row = conn.execute(
104
+ "SELECT keyword_id FROM keyword WHERE value = ?",
105
+ ("machine learning",),
106
+ ).fetchone()
107
+ conn.execute(
108
+ "INSERT INTO paper_keyword(paper_id, keyword_id) VALUES (?, ?)",
109
+ (cls.paper_id, int(keyword_row[0])),
110
+ )
111
+ conn.execute(
112
+ """
113
+ INSERT INTO paper_fts(paper_id, title, summary, source, translated, metadata)
114
+ VALUES (?, ?, ?, ?, ?, ?)
115
+ """,
116
+ (
117
+ cls.paper_id,
118
+ "graph neural networks",
119
+ "graph representation learning",
120
+ "source text",
121
+ "translated text",
122
+ "machine learning iclr",
123
+ ),
124
+ )
125
+ conn.commit()
126
+ finally:
127
+ conn.close()
128
+
129
+ configure(
130
+ McpSnapshotConfig(
131
+ snapshot_db=cls.db_path,
132
+ static_base_url="",
133
+ static_export_dir=cls.static_dir,
134
+ limits=ApiLimits(),
135
+ origin_allowlist=["*"],
136
+ )
137
+ )
138
+
139
+ @classmethod
140
+ def tearDownClass(cls) -> None:
141
+ cls.tmpdir.cleanup()
142
+
143
+ def test_get_paper_metadata_with_new_schema(self) -> None:
144
+ payload = get_paper_metadata(self.paper_id)
145
+ self.assertEqual(payload["paper_id"], self.paper_id)
146
+ self.assertEqual(payload["preferred_summary_template"], "deep_read")
147
+ self.assertEqual(payload["available_summary_templates"], ["deep_read", "simple"])
148
+ self.assertIsNone(payload["doi"])
149
+ self.assertIsNone(payload["arxiv_id"])
150
+ self.assertIsNone(payload["openreview_id"])
151
+ self.assertIsNone(payload["paper_pw_url"])
152
+
153
+ def test_get_paper_summary_default_and_template(self) -> None:
154
+ default_summary = get_paper_summary(self.paper_id)
155
+ deep_read_summary = get_paper_summary(self.paper_id, template="deep_read")
156
+ self.assertIn("default summary", default_summary)
157
+ self.assertIn("deep summary", deep_read_summary)
158
+
159
+ def test_get_paper_summary_template_not_available(self) -> None:
160
+ with self.assertRaises(McpToolError) as ctx:
161
+ get_paper_summary(self.paper_id, template="unknown")
162
+ self.assertEqual(ctx.exception.code, "template_not_available")
163
+ self.assertEqual(
164
+ ctx.exception.details["available_summary_templates"],
165
+ ["deep_read", "simple"],
166
+ )
167
+
168
+ def test_source_and_translation_loading(self) -> None:
169
+ source = get_paper_source(self.paper_id)
170
+ translated = resource_translation(self.paper_id, "zh")
171
+ self.assertIn("source body", source)
172
+ self.assertIn("翻译内容", translated)
173
+
174
+ def test_search_tools_use_current_schema(self) -> None:
175
+ fts_hits = search_papers("graph", limit=5)
176
+ facet_hits = search_papers_by_keyword("machine", limit=5)
177
+ self.assertGreaterEqual(len(fts_hits), 1)
178
+ self.assertGreaterEqual(len(facet_hits), 1)
179
+ self.assertEqual(fts_hits[0]["paper_id"], self.paper_id)
180
+ self.assertEqual(facet_hits[0]["paper_id"], self.paper_id)
181
+
182
+
183
+ if __name__ == "__main__":
184
+ unittest.main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepresearch-flow
3
- Version: 0.7.1
3
+ Version: 0.7.2
4
4
  Summary: Workflow tools for paper extraction, review, and research automation.
5
5
  Author-email: DengQi <dengqi935@gmail.com>
6
6
  License: MIT License
@@ -47,12 +47,13 @@ deepresearch_flow/paper/snapshot/api.py,sha256=F_qehvCjxTBTGj9FmqP4NnJQayUPJm0N5
47
47
  deepresearch_flow/paper/snapshot/builder.py,sha256=HbRcfNteMoP4RnQ4y2onZCm9XfnIvzXLn_EwsLZsDzY,38692
48
48
  deepresearch_flow/paper/snapshot/common.py,sha256=KAhlGlPgabOCe9Faps8BoDqin71qpkCfaL_ADCr_9vg,917
49
49
  deepresearch_flow/paper/snapshot/identity.py,sha256=k9x1EZPFBU1qgxzkTGvwVtDjLgcosmM_udPuvRLl0uI,7748
50
- deepresearch_flow/paper/snapshot/mcp_server.py,sha256=lvgbXmuZCZ_zaQMdZEMjN-OChHPdoZ9MmuuQ-7ORias,22901
50
+ deepresearch_flow/paper/snapshot/mcp_server.py,sha256=KGNCtOWiJ82wHQmrLNVhLwDugGtosqVvKWeLq4ZlBlg,23395
51
51
  deepresearch_flow/paper/snapshot/schema.py,sha256=DcVmAklLYyEeDoVV9jYw7hoMHnHd9Eziivl-LP2busY,8991
52
52
  deepresearch_flow/paper/snapshot/text.py,sha256=0RnxLowa6AdirdLsUYym6BhWbjwiP2Qj2oZeA-pjmdE,4368
53
53
  deepresearch_flow/paper/snapshot/unpacker.py,sha256=ScKSFdrQLJHrITHe9KAxgAEH-vAAnXLolvW9zeJ3wsc,8575
54
54
  deepresearch_flow/paper/snapshot/tests/__init__.py,sha256=G0IowrxHjGUIaqxcw6SvlcLFAtE5ZsleG6ECgd-sIdk,52
55
55
  deepresearch_flow/paper/snapshot/tests/test_identity.py,sha256=KDFixAUU9l68KOum7gf1IrD0Oy18dBCSXG7RbJTqflA,4520
56
+ deepresearch_flow/paper/snapshot/tests/test_mcp_server_schema_compat.py,sha256=T7FtkKkGpZx5M7Z278F4iaQFfwS0_XXce_tRdTArt5k,7076
56
57
  deepresearch_flow/paper/templates/__init__.py,sha256=p8W6kINvrf-T2X6Ow4GMr28syVOorFuMn0pbmieVzAw,35
57
58
  deepresearch_flow/paper/templates/deep_read.md.j2,sha256=vwVSPOzMBFIS72ez5XFBaKrDZGz0z32L3VGP6mNk434,4780
58
59
  deepresearch_flow/paper/templates/deep_read_phi.md.j2,sha256=6Yz2Kxk0czGDPkZiWX3b87glLYHwDU1afr6CkjS-dh8,1666
@@ -466,9 +467,9 @@ deepresearch_flow/translator/placeholder.py,sha256=mEgqA-dPdOsIhno0h_hzfpXpY2asb
466
467
  deepresearch_flow/translator/prompts.py,sha256=EvfBvBIpQXARDj4m87GAyFXJGL8EJeahj_rOmp9mv68,5556
467
468
  deepresearch_flow/translator/protector.py,sha256=yUMuS2FgVofK_MRXrcauLRiwNvdCCjNAnh6CcNd686o,11777
468
469
  deepresearch_flow/translator/segment.py,sha256=rBFMCLTrvm2GrPc_hNFymi-8Ih2DAtUQlZHCRE9nLaM,5146
469
- deepresearch_flow-0.7.1.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
470
- deepresearch_flow-0.7.1.dist-info/METADATA,sha256=rSmAZMSVrjhXLo6Dte3Gaf9AvVyznUaHd-Ahwn47Ne4,31331
471
- deepresearch_flow-0.7.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
472
- deepresearch_flow-0.7.1.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
473
- deepresearch_flow-0.7.1.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
474
- deepresearch_flow-0.7.1.dist-info/RECORD,,
470
+ deepresearch_flow-0.7.2.dist-info/licenses/LICENSE,sha256=hT8F2Py1pe6flxq3Ufdm2UKFk0B8CBm0aAQfsLXfvjw,1063
471
+ deepresearch_flow-0.7.2.dist-info/METADATA,sha256=2QzhwiS1G6q-XZyRXv7SGgzzsNueZ3PH5-q3g0jlP-Y,31331
472
+ deepresearch_flow-0.7.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
473
+ deepresearch_flow-0.7.2.dist-info/entry_points.txt,sha256=1uIKscs0YRMg_mFsg9NjsaTt4CvQqQ_-zGERUKhhL_Y,65
474
+ deepresearch_flow-0.7.2.dist-info/top_level.txt,sha256=qBl4RvPJNJUbL8CFfMNWxY0HpQLx5RlF_ko-z_aKpm0,18
475
+ deepresearch_flow-0.7.2.dist-info/RECORD,,