deepresearch-flow 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. deepresearch_flow/paper/db.py +184 -0
  2. deepresearch_flow/paper/db_ops.py +1939 -0
  3. deepresearch_flow/paper/web/app.py +38 -3705
  4. deepresearch_flow/paper/web/constants.py +23 -0
  5. deepresearch_flow/paper/web/filters.py +255 -0
  6. deepresearch_flow/paper/web/handlers/__init__.py +14 -0
  7. deepresearch_flow/paper/web/handlers/api.py +217 -0
  8. deepresearch_flow/paper/web/handlers/pages.py +334 -0
  9. deepresearch_flow/paper/web/markdown.py +549 -0
  10. deepresearch_flow/paper/web/static/css/main.css +857 -0
  11. deepresearch_flow/paper/web/static/js/detail.js +406 -0
  12. deepresearch_flow/paper/web/static/js/index.js +266 -0
  13. deepresearch_flow/paper/web/static/js/outline.js +58 -0
  14. deepresearch_flow/paper/web/static/js/stats.js +39 -0
  15. deepresearch_flow/paper/web/templates/base.html +43 -0
  16. deepresearch_flow/paper/web/templates/detail.html +332 -0
  17. deepresearch_flow/paper/web/templates/index.html +114 -0
  18. deepresearch_flow/paper/web/templates/stats.html +29 -0
  19. deepresearch_flow/paper/web/templates.py +85 -0
  20. deepresearch_flow/paper/web/text.py +68 -0
  21. deepresearch_flow/recognize/cli.py +805 -26
  22. deepresearch_flow/recognize/katex_check.js +29 -0
  23. deepresearch_flow/recognize/math.py +719 -0
  24. deepresearch_flow/recognize/mermaid.py +690 -0
  25. {deepresearch_flow-0.3.0.dist-info → deepresearch_flow-0.4.1.dist-info}/METADATA +78 -4
  26. {deepresearch_flow-0.3.0.dist-info → deepresearch_flow-0.4.1.dist-info}/RECORD +30 -9
  27. {deepresearch_flow-0.3.0.dist-info → deepresearch_flow-0.4.1.dist-info}/WHEEL +0 -0
  28. {deepresearch_flow-0.3.0.dist-info → deepresearch_flow-0.4.1.dist-info}/entry_points.txt +0 -0
  29. {deepresearch_flow-0.3.0.dist-info → deepresearch_flow-0.4.1.dist-info}/licenses/LICENSE +0 -0
  30. {deepresearch_flow-0.3.0.dist-info → deepresearch_flow-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,334 @@
1
+ """Page route handlers for paper web UI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import html
6
+ from urllib.parse import urlencode
7
+
8
+ from starlette.requests import Request
9
+ from starlette.responses import HTMLResponse, RedirectResponse, Response
10
+
11
+ from deepresearch_flow.paper.db_ops import PaperIndex
12
+ from deepresearch_flow.paper.web.markdown import (
13
+ create_md_renderer,
14
+ normalize_markdown_images,
15
+ render_markdown_with_math_placeholders,
16
+ render_paper_markdown,
17
+ select_template_tag,
18
+ )
19
+ from deepresearch_flow.paper.web.text import normalize_title
20
+ from deepresearch_flow.paper.web.templates import (
21
+ build_pdfjs_viewer_url,
22
+ render_template,
23
+ )
24
+
25
+
26
+ async def robots_txt(_: Request) -> Response:
27
+ """Serve robots.txt to disallow all crawlers."""
28
+ return Response("User-agent: *\nDisallow: /\n", media_type="text/plain")
29
+
30
+
31
+ async def index_page(request: Request) -> HTMLResponse:
32
+ """Main landing page with search and paper list."""
33
+ from deepresearch_flow.paper.web.templates import render_template
34
+
35
+ index: PaperIndex = request.app.state.index
36
+ filter_help = (
37
+ "Filters syntax:\n"
38
+ "pdf:yes|no source:yes|no translated:yes|no summary:yes|no\n"
39
+ "tmpl:<tag> or template:<tag>\n"
40
+ "has:pdf / no:source aliases\n"
41
+ "Content tags still use the search box (tag:fpga)."
42
+ )
43
+ # Convert newlines to HTML entity for tooltip
44
+ filter_help_escaped = filter_help.replace("\n", "&#10;")
45
+
46
+ return HTMLResponse(
47
+ render_template(
48
+ "index.html",
49
+ title="Paper DB",
50
+ template_tags=index.template_tags,
51
+ filter_help=filter_help_escaped,
52
+ )
53
+ )
54
+
55
+
56
+ async def stats_page(request: Request) -> HTMLResponse:
57
+ """Statistics page with charts."""
58
+ from deepresearch_flow.paper.web.templates import render_template
59
+
60
+ return HTMLResponse(render_template("stats.html", title="Stats"))
61
+
62
+
63
+ async def paper_detail(request: Request) -> HTMLResponse:
64
+ """Paper detail page with multiple views (summary, source, translated, PDF, etc).
65
+
66
+ Uses Jinja2 templates for rendering (detail.html).
67
+ """
68
+ index: PaperIndex = request.app.state.index
69
+ source_hash = request.path_params["source_hash"]
70
+ idx = index.id_by_hash.get(source_hash)
71
+ if idx is None:
72
+ return RedirectResponse("/")
73
+ paper = index.papers[idx]
74
+ is_pdf_only = bool(paper.get("_is_pdf_only"))
75
+ page_title = normalize_title(str(paper.get("paper_title") or "")) or "Paper"
76
+ view = request.query_params.get("view")
77
+ template_param = request.query_params.get("template")
78
+ embed = request.query_params.get("embed") == "1"
79
+
80
+ pdf_path = index.pdf_path_by_hash.get(source_hash)
81
+ pdf_url = f"/api/pdf/{source_hash}"
82
+ source_available = source_hash in index.md_path_by_hash
83
+ translations = index.translated_md_by_hash.get(source_hash, {})
84
+ translation_langs = sorted(translations.keys(), key=str.lower)
85
+ lang_param = request.query_params.get("lang")
86
+ normalized_lang = lang_param.lower() if lang_param else None
87
+ selected_lang = None
88
+ if translation_langs:
89
+ if normalized_lang and normalized_lang in translations:
90
+ selected_lang = normalized_lang
91
+ elif "zh" in translations:
92
+ selected_lang = "zh"
93
+ else:
94
+ selected_lang = translation_langs[0]
95
+
96
+ # Determine allowed views
97
+ allowed_views = {"summary", "source", "translated", "pdf", "pdfjs", "split"}
98
+ if is_pdf_only:
99
+ allowed_views = {"pdf", "pdfjs", "split"}
100
+
101
+ def normalize_view(value: str | None, default: str) -> str:
102
+ if value in allowed_views:
103
+ return value
104
+ return default
105
+
106
+ preferred_pdf_view = "pdfjs" if pdf_path else "pdf"
107
+ default_view = preferred_pdf_view if is_pdf_only else "summary"
108
+ view = normalize_view(view, default_view)
109
+ if view == "split":
110
+ embed = False
111
+
112
+ # Determine split view settings
113
+ if is_pdf_only:
114
+ default_left = preferred_pdf_view
115
+ default_right = preferred_pdf_view
116
+ else:
117
+ default_left = preferred_pdf_view if pdf_path else ("source" if source_available else "summary")
118
+ default_right = "summary"
119
+
120
+ left_param = request.query_params.get("left")
121
+ right_param = request.query_params.get("right")
122
+ left_view = normalize_view(left_param, default_left) if left_param else default_left
123
+ right_view = normalize_view(right_param, default_right) if right_param else default_right
124
+
125
+ # Build tabs and view_hrefs
126
+ def build_href(v: str, **extra_params: str) -> str:
127
+ params: dict[str, str] = {"view": v}
128
+ if v == "summary" and template_param:
129
+ params["template"] = str(template_param)
130
+ if v == "translated" and selected_lang:
131
+ params["lang"] = selected_lang
132
+ if v == "split":
133
+ params["left"] = left_view
134
+ params["right"] = right_view
135
+ for k, val in extra_params.items():
136
+ params[k] = str(val)
137
+ return f"/paper/{source_hash}?{urlencode(params)}"
138
+
139
+ tab_defs = [
140
+ ("Summary", "summary"),
141
+ ("Source", "source"),
142
+ ("Translated", "translated"),
143
+ ("PDF", "pdf"),
144
+ ("PDF Viewer", "pdfjs"),
145
+ ("Split", "split"),
146
+ ]
147
+ if is_pdf_only:
148
+ tab_defs = [
149
+ ("PDF", "pdf"),
150
+ ("PDF Viewer", "pdfjs"),
151
+ ("Split", "split"),
152
+ ]
153
+
154
+ tabs = [(label, v) for label, v in tab_defs if v in allowed_views]
155
+ view_hrefs = {v: build_href(v) for label, v in tab_defs if v in allowed_views}
156
+
157
+ # Initialize template variables
158
+ body_html = ""
159
+ raw_content = ""
160
+ summary_template_name = ""
161
+ template_warning = ""
162
+ template_controls = ""
163
+ source_path_str = ""
164
+ translated_path_str = ""
165
+ pdf_filename = ""
166
+ pdfjs_url = ""
167
+ left_src = ""
168
+ right_src = ""
169
+ split_options: list[tuple[str, str]] = []
170
+ show_outline = False
171
+
172
+ selected_tag, available_templates = select_template_tag(paper, template_param)
173
+
174
+ # Summary view
175
+ if view == "summary":
176
+ markdown, summary_template_name, warning = render_paper_markdown(
177
+ paper,
178
+ request.app.state.fallback_language,
179
+ template_tag=selected_tag,
180
+ )
181
+ md_renderer = create_md_renderer()
182
+ body_html = render_markdown_with_math_placeholders(md_renderer, markdown)
183
+ # Warning is already HTML, don't wrap again
184
+ template_warning = warning if warning else ""
185
+ show_outline = True
186
+ if available_templates:
187
+ options = "\n".join(
188
+ f'<option value="{html.escape(tag)}"{" selected" if tag == selected_tag else ""}>{html.escape(tag)}</option>'
189
+ for tag in available_templates
190
+ )
191
+ template_controls = f"""
192
+ <div class="flex items-center gap-2 text-sm text-slate-500">
193
+ <span>Template:</span>
194
+ <select id="templateSelect" class="h-9 rounded-md border border-slate-200 bg-white px-2 text-sm text-slate-900 shadow-sm">
195
+ {options}
196
+ </select>
197
+ </div>
198
+ <script>
199
+ const templateSelect = document.getElementById('templateSelect');
200
+ if (templateSelect) {{
201
+ templateSelect.addEventListener('change', () => {{
202
+ const params = new URLSearchParams(window.location.search);
203
+ params.set('view', 'summary');
204
+ params.set('template', templateSelect.value);
205
+ window.location.search = params.toString();
206
+ }});
207
+ }}
208
+ </script>
209
+ """
210
+
211
+ # Source view
212
+ if view == "source":
213
+ source_path = index.md_path_by_hash.get(source_hash)
214
+ if not source_path:
215
+ body_html = '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
216
+ else:
217
+ try:
218
+ raw = source_path.read_text(encoding="utf-8")
219
+ except UnicodeDecodeError:
220
+ raw = source_path.read_text(encoding="latin-1")
221
+ md_renderer = create_md_renderer()
222
+ body_html = render_markdown_with_math_placeholders(md_renderer, raw)
223
+ raw_content = raw
224
+ source_path_str = str(source_path)
225
+ show_outline = True
226
+
227
+ # Translated view
228
+ if view == "translated":
229
+ if not translation_langs or not selected_lang:
230
+ body_html = '<div class="warning">No translated markdown found. Provide <code>--md-translated-root</code> and place <code><base>.<lang>.md</code> under that root.</div>'
231
+ else:
232
+ translated_path = translations.get(selected_lang)
233
+ if not translated_path:
234
+ body_html = '<div class="warning">Translated markdown not found for the selected language.</div>'
235
+ else:
236
+ try:
237
+ raw = translated_path.read_text(encoding="utf-8")
238
+ except UnicodeDecodeError:
239
+ raw = translated_path.read_text(encoding="latin-1")
240
+ raw = normalize_markdown_images(raw)
241
+ md_renderer = create_md_renderer()
242
+ body_html = render_markdown_with_math_placeholders(md_renderer, raw)
243
+ raw_content = raw
244
+ translated_path_str = str(translated_path)
245
+ show_outline = True
246
+
247
+ # PDF view
248
+ if view == "pdf":
249
+ if not pdf_path:
250
+ body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
251
+ pdf_filename = str(pdf_path.name) if pdf_path else ""
252
+
253
+ # PDF.js view
254
+ if view == "pdfjs":
255
+ if not pdf_path:
256
+ body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
257
+ pdfjs_url = build_pdfjs_viewer_url(pdf_url)
258
+ pdf_filename = str(pdf_path.name) if pdf_path else ""
259
+
260
+ # Split view
261
+ if view == "split":
262
+ def pane_src(pane_view: str) -> str:
263
+ if pane_view == "pdfjs" and pdf_path:
264
+ return build_pdfjs_viewer_url(pdf_url)
265
+ params: dict[str, str] = {"view": pane_view, "embed": "1"}
266
+ if pane_view == "summary" and template_param:
267
+ params["template"] = str(template_param)
268
+ if pane_view == "translated" and selected_lang:
269
+ params["lang"] = selected_lang
270
+ return f"/paper/{source_hash}?{urlencode(params)}"
271
+
272
+ left_src = pane_src(left_view)
273
+ right_src = pane_src(right_view)
274
+
275
+ split_options = [
276
+ ("summary", "Summary"),
277
+ ("source", "Source"),
278
+ ("translated", "Translated"),
279
+ ("pdf", "PDF"),
280
+ ("pdfjs", "PDF Viewer"),
281
+ ]
282
+ if is_pdf_only:
283
+ split_options = [
284
+ ("pdf", "PDF"),
285
+ ("pdfjs", "PDF Viewer"),
286
+ ]
287
+
288
+ # Render template
289
+ container_class = "wide" if view == "split" else ""
290
+ body_class = "font-hei"
291
+ if embed:
292
+ body_class = f"{body_class} embed-view"
293
+ if view == "split":
294
+ body_class = f"{body_class} split-view"
295
+ return HTMLResponse(
296
+ render_template(
297
+ "detail.html",
298
+ title=page_title,
299
+ embed=embed,
300
+ header_title=page_title,
301
+ body_class=body_class,
302
+ container_class=container_class,
303
+ is_pdf_only=is_pdf_only,
304
+ current_view=view,
305
+ tabs=tabs,
306
+ view_hrefs=view_hrefs,
307
+ show_outline=show_outline,
308
+ # Content variables
309
+ body_html=body_html,
310
+ raw_content=raw_content,
311
+ summary_template_name=summary_template_name,
312
+ template_warning=template_warning,
313
+ template_controls=template_controls,
314
+ available_templates=available_templates,
315
+ selected_template_tag=selected_tag,
316
+ # Source view
317
+ source_path=source_path_str,
318
+ # Translated view
319
+ translated_path=translated_path_str,
320
+ selected_lang=selected_lang,
321
+ translation_langs=translation_langs,
322
+ # PDF view
323
+ pdf_filename=pdf_filename,
324
+ pdf_url=pdf_url,
325
+ # PDF.js view
326
+ pdfjs_url=pdfjs_url,
327
+ # Split view
328
+ left_src=left_src,
329
+ right_src=right_src,
330
+ left_view=left_view,
331
+ right_view=right_view,
332
+ split_options=split_options,
333
+ )
334
+ )