deepresearch-flow 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. deepresearch_flow/cli.py +2 -0
  2. deepresearch_flow/paper/config.py +15 -0
  3. deepresearch_flow/paper/db.py +9 -0
  4. deepresearch_flow/paper/llm.py +2 -0
  5. deepresearch_flow/paper/web/app.py +413 -20
  6. deepresearch_flow/paper/web/pdfjs/build/pdf.js +18146 -0
  7. deepresearch_flow/paper/web/pdfjs/build/pdf.js.map +1 -0
  8. deepresearch_flow/paper/web/pdfjs/build/pdf.sandbox.js +280 -0
  9. deepresearch_flow/paper/web/pdfjs/build/pdf.sandbox.js.map +1 -0
  10. deepresearch_flow/paper/web/pdfjs/build/pdf.worker.js +58353 -0
  11. deepresearch_flow/paper/web/pdfjs/build/pdf.worker.js.map +1 -0
  12. deepresearch_flow/recognize/cli.py +157 -3
  13. deepresearch_flow/recognize/organize.py +58 -0
  14. deepresearch_flow/translator/__init__.py +1 -0
  15. deepresearch_flow/translator/cli.py +451 -0
  16. deepresearch_flow/translator/config.py +19 -0
  17. deepresearch_flow/translator/engine.py +959 -0
  18. deepresearch_flow/translator/fixers.py +451 -0
  19. deepresearch_flow/translator/placeholder.py +62 -0
  20. deepresearch_flow/translator/prompts.py +116 -0
  21. deepresearch_flow/translator/protector.py +291 -0
  22. deepresearch_flow/translator/segment.py +180 -0
  23. deepresearch_flow-0.3.0.dist-info/METADATA +306 -0
  24. {deepresearch_flow-0.2.0.dist-info → deepresearch_flow-0.3.0.dist-info}/RECORD +28 -13
  25. deepresearch_flow-0.2.0.dist-info/METADATA +0 -424
  26. {deepresearch_flow-0.2.0.dist-info → deepresearch_flow-0.3.0.dist-info}/WHEEL +0 -0
  27. {deepresearch_flow-0.2.0.dist-info → deepresearch_flow-0.3.0.dist-info}/entry_points.txt +0 -0
  28. {deepresearch_flow-0.2.0.dist-info → deepresearch_flow-0.3.0.dist-info}/licenses/LICENSE +0 -0
  29. {deepresearch_flow-0.2.0.dist-info → deepresearch_flow-0.3.0.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,10 @@ import re
12
12
  from urllib.parse import urlencode, quote
13
13
 
14
14
  from markdown_it import MarkdownIt
15
+ try:
16
+ from mdit_py_plugins.footnote import footnote_plugin as footnote
17
+ except ImportError: # pragma: no cover - compatibility with older names
18
+ from mdit_py_plugins.footnote import footnote
15
19
  from starlette.applications import Starlette
16
20
  from starlette.requests import Request
17
21
  from starlette.responses import FileResponse, HTMLResponse, JSONResponse, RedirectResponse, Response
@@ -66,6 +70,7 @@ class PaperIndex:
66
70
  by_venue: dict[str, set[int]]
67
71
  stats: dict[str, Any]
68
72
  md_path_by_hash: dict[str, Path]
73
+ translated_md_by_hash: dict[str, dict[str, Path]]
69
74
  pdf_path_by_hash: dict[str, Path]
70
75
  template_tags: list[str]
71
76
 
@@ -248,6 +253,7 @@ def build_index(
248
253
  papers: list[dict[str, Any]],
249
254
  *,
250
255
  md_roots: list[Path] | None = None,
256
+ md_translated_roots: list[Path] | None = None,
251
257
  pdf_roots: list[Path] | None = None,
252
258
  ) -> PaperIndex:
253
259
  id_by_hash: dict[str, int] = {}
@@ -258,9 +264,11 @@ def build_index(
258
264
  by_venue: dict[str, set[int]] = {}
259
265
 
260
266
  md_path_by_hash: dict[str, Path] = {}
267
+ translated_md_by_hash: dict[str, dict[str, Path]] = {}
261
268
  pdf_path_by_hash: dict[str, Path] = {}
262
269
 
263
270
  md_file_index = _build_file_index(md_roots or [], suffixes={".md"})
271
+ translated_index = _build_translated_index(md_translated_roots or [])
264
272
  pdf_file_index = _build_file_index(pdf_roots or [], suffixes={".pdf"})
265
273
 
266
274
  year_counts: dict[str, int] = {}
@@ -360,6 +368,10 @@ def build_index(
360
368
  md_path = _resolve_source_md(paper, md_file_index)
361
369
  if md_path is not None:
362
370
  md_path_by_hash[source_hash_str] = md_path
371
+ base_key = md_path.with_suffix("").name.lower()
372
+ translations = translated_index.get(base_key, {})
373
+ if translations:
374
+ translated_md_by_hash[source_hash_str] = translations
363
375
  pdf_path = _resolve_pdf(paper, pdf_file_index)
364
376
  if pdf_path is not None:
365
377
  pdf_path_by_hash[source_hash_str] = pdf_path
@@ -398,6 +410,7 @@ def build_index(
398
410
  by_venue=by_venue,
399
411
  stats=stats,
400
412
  md_path_by_hash=md_path_by_hash,
413
+ translated_md_by_hash=translated_md_by_hash,
401
414
  pdf_path_by_hash=pdf_path_by_hash,
402
415
  template_tags=template_tags,
403
416
  )
@@ -628,6 +641,7 @@ def _load_or_merge_papers(
628
641
 
629
642
  def _md_renderer() -> MarkdownIt:
630
643
  md = MarkdownIt("commonmark", {"html": False, "linkify": True})
644
+ md.use(footnote)
631
645
  md.enable("table")
632
646
  return md
633
647
 
@@ -641,6 +655,51 @@ def _strip_paragraph_wrapped_tables(text: str) -> str:
641
655
  return "\n".join(lines)
642
656
 
643
657
 
658
+ def _normalize_markdown_images(text: str) -> str:
659
+ lines = text.splitlines()
660
+ out: list[str] = []
661
+ in_fence = False
662
+ fence_char = ""
663
+ fence_len = 0
664
+ img_re = re.compile(r"!\[[^\]]*\]\((?:[^)\\]|\\.)*\)")
665
+ list_re = re.compile(r"^\s{0,3}(-|\*|\+|\d{1,9}\.)\s+")
666
+
667
+ for line in lines:
668
+ stripped = line.lstrip()
669
+ if stripped.startswith(("```", "~~~")):
670
+ run_len = 0
671
+ while run_len < len(stripped) and stripped[run_len] == stripped[0]:
672
+ run_len += 1
673
+ if not in_fence:
674
+ in_fence = True
675
+ fence_char = stripped[0]
676
+ fence_len = run_len
677
+ elif stripped[0] == fence_char and run_len >= fence_len:
678
+ in_fence = False
679
+ out.append(line)
680
+ continue
681
+ if in_fence:
682
+ out.append(line)
683
+ continue
684
+ match = img_re.search(line)
685
+ if not match:
686
+ out.append(line)
687
+ continue
688
+ if list_re.match(line) or (line.lstrip().startswith("|") and line.count("|") >= 2):
689
+ out.append(line)
690
+ continue
691
+ prefix = line[:match.start()]
692
+ if prefix.strip():
693
+ out.append(prefix.rstrip())
694
+ out.append("")
695
+ out.append(line[match.start():].lstrip())
696
+ continue
697
+ if out and out[-1].strip():
698
+ out.append("")
699
+ out.append(line)
700
+ return "\n".join(out)
701
+
702
+
644
703
  def _normalize_merge_title(value: str | None) -> str | None:
645
704
  if not value:
646
705
  return None
@@ -897,11 +956,14 @@ def _extract_math_placeholders(text: str) -> tuple[str, dict[str, str]]:
897
956
 
898
957
  # Inline math: $...$ (single-line)
899
958
  if ch == "$" and not text.startswith("$$", idx) and (idx == 0 or text[idx - 1] != "\\"):
959
+ line_end = text.find("\n", idx + 1)
960
+ if line_end == -1:
961
+ line_end = len(text)
900
962
  search_from = idx + 1
901
- end = text.find("$", search_from)
963
+ end = text.find("$", search_from, line_end)
902
964
  while end != -1 and text[end - 1] == "\\":
903
965
  search_from = end + 1
904
- end = text.find("$", search_from)
966
+ end = text.find("$", search_from, line_end)
905
967
  if end != -1:
906
968
  out.append(next_placeholder(text[idx : end + 1]))
907
969
  idx = end + 1
@@ -1683,6 +1745,39 @@ def _resolve_source_md(paper: dict[str, Any], md_index: dict[str, list[Path]]) -
1683
1745
  return _resolve_by_title_and_meta(paper, md_index)
1684
1746
 
1685
1747
 
1748
+ def _build_translated_index(roots: list[Path]) -> dict[str, dict[str, Path]]:
1749
+ index: dict[str, dict[str, Path]] = {}
1750
+ candidates: list[Path] = []
1751
+ for root in roots:
1752
+ try:
1753
+ if not root.exists() or not root.is_dir():
1754
+ continue
1755
+ except OSError:
1756
+ continue
1757
+ try:
1758
+ candidates.extend(root.rglob("*.md"))
1759
+ except OSError:
1760
+ continue
1761
+ for path in sorted(candidates, key=lambda item: str(item)):
1762
+ try:
1763
+ if not path.is_file():
1764
+ continue
1765
+ except OSError:
1766
+ continue
1767
+ name = path.name
1768
+ match = re.match(r"^(.+)\.([^.]+)\.md$", name, flags=re.IGNORECASE)
1769
+ if not match:
1770
+ continue
1771
+ base_name = match.group(1).strip()
1772
+ lang = match.group(2).strip()
1773
+ if not base_name or not lang:
1774
+ continue
1775
+ base_key = base_name.lower()
1776
+ lang_key = lang.lower()
1777
+ index.setdefault(base_key, {}).setdefault(lang_key, path.resolve())
1778
+ return index
1779
+
1780
+
1686
1781
  def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
1687
1782
  source_path = paper.get("source_path")
1688
1783
  if not source_path:
@@ -1761,6 +1856,7 @@ def _parse_filter_query(text: str) -> dict[str, set[str]]:
1761
1856
  "pdf": set(),
1762
1857
  "source": set(),
1763
1858
  "summary": set(),
1859
+ "translated": set(),
1764
1860
  "template": set(),
1765
1861
  }
1766
1862
  for token in _tokenize_filter_query(text):
@@ -1777,7 +1873,7 @@ def _parse_filter_query(text: str) -> dict[str, set[str]]:
1777
1873
  if tag:
1778
1874
  parsed["template"].add(tag.lower())
1779
1875
  continue
1780
- if key in {"pdf", "source", "summary"}:
1876
+ if key in {"pdf", "source", "summary", "translated"}:
1781
1877
  for part in raw_value.split(","):
1782
1878
  normalized = _normalize_presence_value(part)
1783
1879
  if normalized:
@@ -1786,7 +1882,7 @@ def _parse_filter_query(text: str) -> dict[str, set[str]]:
1786
1882
  if key in {"has", "no"}:
1787
1883
  targets = [part.strip().lower() for part in raw_value.split(",") if part.strip()]
1788
1884
  for target in targets:
1789
- if target not in {"pdf", "source", "summary"}:
1885
+ if target not in {"pdf", "source", "summary", "translated"}:
1790
1886
  continue
1791
1887
  parsed[target].add("with" if key == "has" else "without")
1792
1888
  return parsed
@@ -1831,6 +1927,7 @@ def _compute_counts(index: PaperIndex, ids: set[int]) -> dict[str, Any]:
1831
1927
  pdf_count = 0
1832
1928
  source_count = 0
1833
1929
  summary_count = 0
1930
+ translated_count = 0
1834
1931
  total_count = 0
1835
1932
  tag_map = _template_tag_map(index)
1836
1933
 
@@ -1843,12 +1940,15 @@ def _compute_counts(index: PaperIndex, ids: set[int]) -> dict[str, Any]:
1843
1940
  has_source = source_hash in index.md_path_by_hash
1844
1941
  has_pdf = source_hash in index.pdf_path_by_hash
1845
1942
  has_summary = bool(paper.get("_has_summary"))
1943
+ has_translated = bool(index.translated_md_by_hash.get(source_hash))
1846
1944
  if has_source:
1847
1945
  source_count += 1
1848
1946
  if has_pdf:
1849
1947
  pdf_count += 1
1850
1948
  if has_summary:
1851
1949
  summary_count += 1
1950
+ if has_translated:
1951
+ translated_count += 1
1852
1952
  for tag_lc in paper.get("_template_tags_lc") or []:
1853
1953
  display = tag_map.get(tag_lc)
1854
1954
  if display:
@@ -1859,6 +1959,7 @@ def _compute_counts(index: PaperIndex, ids: set[int]) -> dict[str, Any]:
1859
1959
  "pdf": pdf_count,
1860
1960
  "source": source_count,
1861
1961
  "summary": summary_count,
1962
+ "translated": translated_count,
1862
1963
  "templates": template_counts,
1863
1964
  "template_order": template_order,
1864
1965
  }
@@ -1959,7 +2060,7 @@ def _page_shell(
1959
2060
  .detail-header .header-back {{ margin-right: 0; }}
1960
2061
  .detail-header .header-link {{ margin-right: 0; }}
1961
2062
  .container {{ max-width: 1100px; margin: 0 auto; padding: 16px; }}
1962
- .filters {{ display: grid; grid-template-columns: repeat(6, 1fr); gap: 8px; margin: 12px 0 16px; }}
2063
+ .filters {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 8px; margin: 12px 0 16px; }}
1963
2064
  .filters input {{ width: 100%; padding: 8px; border: 1px solid #d0d7de; border-radius: 6px; }}
1964
2065
  .filters select {{ width: 100%; border: 1px solid #d0d7de; border-radius: 6px; background: #fff; font-size: 13px; }}
1965
2066
  .filters select:not([multiple]) {{ padding: 6px 8px; }}
@@ -1981,10 +2082,20 @@ def _page_shell(
1981
2082
  .detail-toolbar {{ display: flex; flex-wrap: wrap; align-items: center; justify-content: flex-start; gap: 12px; padding: 6px 8px 10px; border-bottom: 1px solid #e5e7eb; box-sizing: border-box; }}
1982
2083
  .detail-toolbar .tabs {{ margin: 0; }}
1983
2084
  .toolbar-actions {{ display: flex; flex-wrap: wrap; align-items: center; gap: 10px; margin-left: auto; padding-right: 16px; }}
2085
+ .search-row {{ display: flex; flex-wrap: wrap; gap: 8px; margin-top: 8px; align-items: stretch; }}
2086
+ .search-row input {{ flex: 1 1 320px; min-width: 0; padding: 10px; border: 1px solid #d0d7de; border-radius: 8px; }}
2087
+ .search-row select {{ flex: 0 1 220px; min-width: 0; max-width: 100%; padding: 10px; border: 1px solid #d0d7de; border-radius: 8px; background: #fff; }}
2088
+ .filter-row {{ display: flex; flex-wrap: wrap; gap: 8px; align-items: center; margin-top: 8px; }}
2089
+ .filter-row input {{ flex: 1 1 320px; min-width: 0; padding: 10px; border: 1px solid #d0d7de; border-radius: 8px; }}
2090
+ .filter-row .help-icon {{ flex: 0 0 auto; }}
2091
+ .adv-actions {{ display: flex; gap: 8px; align-items: center; margin-top: 8px; flex-wrap: wrap; }}
1984
2092
  .split-inline {{ display: flex; flex-wrap: wrap; align-items: center; gap: 6px; }}
1985
2093
  .split-inline select {{ padding: 6px 8px; border-radius: 8px; border: 1px solid #d0d7de; background: #fff; min-width: 140px; }}
1986
2094
  .split-actions {{ display: flex; align-items: center; justify-content: center; gap: 8px; }}
1987
2095
  .split-actions button {{ padding: 6px 10px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; min-width: 36px; }}
2096
+ .lang-select {{ display: flex; align-items: center; gap: 6px; }}
2097
+ .lang-select label {{ color: #57606a; font-size: 13px; }}
2098
+ .lang-select select {{ padding: 6px 8px; border-radius: 8px; border: 1px solid #d0d7de; background: #fff; min-width: 120px; }}
1988
2099
  .fullscreen-actions {{ display: flex; align-items: center; gap: 6px; }}
1989
2100
  .fullscreen-actions button {{ padding: 6px 10px; border-radius: 8px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; }}
1990
2101
  .fullscreen-exit {{ display: none; }}
@@ -2010,10 +2121,57 @@ def _page_shell(
2010
2121
  .stats-row {{ display: flex; flex-wrap: wrap; gap: 6px; align-items: center; }}
2011
2122
  .stats-label {{ font-weight: 600; color: #0b1220; margin-right: 4px; }}
2012
2123
  .pill.stat {{ background: #f6f8fa; border-color: #c7d2e0; color: #1f2a37; }}
2124
+ .footnotes {{ border-top: 1px solid #e5e7eb; margin-top: 16px; padding-top: 12px; color: #57606a; }}
2125
+ .footnotes ol {{ padding-left: 20px; }}
2126
+ .footnotes li {{ margin-bottom: 6px; }}
2127
+ .footnote-ref {{ font-size: 0.85em; }}
2128
+ .footnote-tip {{ position: relative; display: inline-block; }}
2129
+ .footnote-tip::after {{
2130
+ content: attr(data-footnote);
2131
+ position: absolute;
2132
+ left: 50%;
2133
+ bottom: 130%;
2134
+ transform: translateX(-50%);
2135
+ width: min(320px, 70vw);
2136
+ padding: 8px 10px;
2137
+ border-radius: 8px;
2138
+ background: #0b1220;
2139
+ color: #e6edf3;
2140
+ font-size: 12px;
2141
+ line-height: 1.35;
2142
+ white-space: pre-line;
2143
+ box-shadow: 0 10px 24px rgba(0, 0, 0, 0.18);
2144
+ opacity: 0;
2145
+ pointer-events: none;
2146
+ z-index: 30;
2147
+ transition: opacity 0.12s ease-in-out;
2148
+ }}
2149
+ .footnote-tip:hover::after,
2150
+ .footnote-tip:focus::after {{
2151
+ opacity: 1;
2152
+ }}
2013
2153
  pre {{ overflow: auto; padding: 10px; background: #0b1220; color: #e6edf3; border-radius: 10px; }}
2014
2154
  code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
2015
2155
  a {{ color: #0969da; }}
2016
- @media (max-width: 768px) {{
2156
+ @media (max-width: 640px) {{
2157
+ .search-row {{
2158
+ flex-direction: column;
2159
+ }}
2160
+ .search-row input,
2161
+ .search-row select {{
2162
+ width: 100%;
2163
+ }}
2164
+ .filter-row {{
2165
+ flex-direction: column;
2166
+ align-items: stretch;
2167
+ }}
2168
+ .filter-row .help-icon {{
2169
+ align-self: flex-end;
2170
+ }}
2171
+ .adv-actions {{
2172
+ flex-direction: column;
2173
+ align-items: stretch;
2174
+ }}
2017
2175
  .detail-toolbar {{
2018
2176
  flex-wrap: nowrap;
2019
2177
  overflow-x: auto;
@@ -2235,7 +2393,7 @@ async def _index_page(request: Request) -> HTMLResponse:
2235
2393
  template_options = '<option value="" disabled>(no templates)</option>'
2236
2394
  filter_help = (
2237
2395
  "Filters syntax:\\n"
2238
- "pdf:yes|no source:yes|no summary:yes|no\\n"
2396
+ "pdf:yes|no source:yes|no translated:yes|no summary:yes|no\\n"
2239
2397
  "tmpl:<tag> or template:<tag>\\n"
2240
2398
  "has:pdf / no:source aliases\\n"
2241
2399
  "Content tags still use the search box (tag:fpga)."
@@ -2245,17 +2403,18 @@ async def _index_page(request: Request) -> HTMLResponse:
2245
2403
  <h2>Paper Database</h2>
2246
2404
  <div class="card">
2247
2405
  <div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
2248
- <div style="display:flex; gap:8px; margin-top:8px;">
2249
- <input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
2250
- <select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
2406
+ <div class="search-row">
2407
+ <input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' />
2408
+ <select id="openView">
2251
2409
  <option value="summary" selected>Open: Summary</option>
2252
2410
  <option value="source">Open: Source</option>
2411
+ <option value="translated">Open: Translated</option>
2253
2412
  <option value="pdf">Open: PDF</option>
2254
2413
  <option value="pdfjs">Open: PDF Viewer</option>
2255
2414
  <option value="split">Open: Split</option>
2256
2415
  </select>
2257
2416
  </div>
2258
- <div class="filters" style="grid-template-columns: repeat(4, 1fr); margin-top:10px;">
2417
+ <div class="filters" style="margin-top:10px;">
2259
2418
  <div class="filter-group">
2260
2419
  <label>PDF</label>
2261
2420
  <select id="filterPdf" multiple size="2">
@@ -2270,6 +2429,13 @@ async def _index_page(request: Request) -> HTMLResponse:
2270
2429
  <option value="without">Without</option>
2271
2430
  </select>
2272
2431
  </div>
2432
+ <div class="filter-group">
2433
+ <label>Translated</label>
2434
+ <select id="filterTranslated" multiple size="2">
2435
+ <option value="with">With</option>
2436
+ <option value="without">Without</option>
2437
+ </select>
2438
+ </div>
2273
2439
  <div class="filter-group">
2274
2440
  <label>Summary</label>
2275
2441
  <select id="filterSummary" multiple size="2">
@@ -2284,14 +2450,14 @@ async def _index_page(request: Request) -> HTMLResponse:
2284
2450
  </select>
2285
2451
  </div>
2286
2452
  </div>
2287
- <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
2288
- <input id="filterQuery" placeholder='Filters... e.g. pdf:yes tmpl:simple' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
2453
+ <div class="filter-row">
2454
+ <input id="filterQuery" placeholder='Filters... e.g. pdf:yes tmpl:simple' />
2289
2455
  <span class="help-icon" data-tip="__FILTER_HELP__">?</span>
2290
2456
  </div>
2291
2457
  <details style="margin-top:10px;">
2292
2458
  <summary>Advanced search</summary>
2293
2459
  <div style="margin-top:10px;" class="muted">Build a query:</div>
2294
- <div class="filters" style="grid-template-columns: repeat(3, 1fr);">
2460
+ <div class="filters">
2295
2461
  <input id="advTitle" placeholder="title contains..." />
2296
2462
  <input id="advAuthor" placeholder="author contains..." />
2297
2463
  <input id="advTag" placeholder="tag (comma separated)" />
@@ -2299,7 +2465,7 @@ async def _index_page(request: Request) -> HTMLResponse:
2299
2465
  <input id="advMonth" placeholder="month (01-12)" />
2300
2466
  <input id="advVenue" placeholder="venue contains..." />
2301
2467
  </div>
2302
- <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
2468
+ <div class="adv-actions">
2303
2469
  <button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
2304
2470
  <div class="muted">Generated: <code id="generated"></code></div>
2305
2471
  </div>
@@ -2333,6 +2499,7 @@ function currentParams(nextPage) {
2333
2499
  }
2334
2500
  addMulti("filterPdf", "pdf");
2335
2501
  addMulti("filterSource", "source");
2502
+ addMulti("filterTranslated", "translated");
2336
2503
  addMulti("filterSummary", "summary");
2337
2504
  addMulti("filterTemplate", "template");
2338
2505
  return params;
@@ -2348,7 +2515,7 @@ function viewSuffixForItem(item) {
2348
2515
  let view = document.getElementById("openView").value;
2349
2516
  const isPdfOnly = item.is_pdf_only;
2350
2517
  const pdfFallback = item.has_pdf ? "pdfjs" : "pdf";
2351
- if (isPdfOnly && (view === "summary" || view === "source")) {
2518
+ if (isPdfOnly && (view === "summary" || view === "source" || view === "translated")) {
2352
2519
  view = pdfFallback;
2353
2520
  }
2354
2521
  if (!view || view === "summary") return "";
@@ -2380,6 +2547,7 @@ function renderItem(item) {
2380
2547
  const viewSuffix = viewSuffixForItem(item);
2381
2548
  const badges = [
2382
2549
  item.has_source ? `<span class="pill">source</span>` : "",
2550
+ item.has_translation ? `<span class="pill">translated</span>` : "",
2383
2551
  item.has_pdf ? `<span class="pill">pdf</span>` : "",
2384
2552
  item.is_pdf_only ? `<span class="pill pdf-only">pdf-only</span>` : "",
2385
2553
  ].join("");
@@ -2401,6 +2569,7 @@ function renderStatsRow(targetId, label, counts) {
2401
2569
  pills.push(`<span class="pill stat">Count ${counts.total}</span>`);
2402
2570
  pills.push(`<span class="pill stat">PDF ${counts.pdf}</span>`);
2403
2571
  pills.push(`<span class="pill stat">Source ${counts.source}</span>`);
2572
+ pills.push(`<span class="pill stat">Translated ${counts.translated || 0}</span>`);
2404
2573
  pills.push(`<span class="pill stat">Summary ${counts.summary}</span>`);
2405
2574
  const order = counts.template_order || Object.keys(counts.templates || {});
2406
2575
  for (const tag of order) {
@@ -2451,6 +2620,7 @@ document.getElementById("openView").addEventListener("change", resetAndLoad);
2451
2620
  document.getElementById("filterQuery").addEventListener("change", resetAndLoad);
2452
2621
  document.getElementById("filterPdf").addEventListener("change", resetAndLoad);
2453
2622
  document.getElementById("filterSource").addEventListener("change", resetAndLoad);
2623
+ document.getElementById("filterTranslated").addEventListener("change", resetAndLoad);
2454
2624
  document.getElementById("filterSummary").addEventListener("change", resetAndLoad);
2455
2625
  document.getElementById("filterTemplate").addEventListener("change", resetAndLoad);
2456
2626
 
@@ -2511,6 +2681,7 @@ def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
2511
2681
  pdf_filters = [item for item in qp.getlist("pdf") if item]
2512
2682
  source_filters = [item for item in qp.getlist("source") if item]
2513
2683
  summary_filters = [item for item in qp.getlist("summary") if item]
2684
+ translated_filters = [item for item in qp.getlist("translated") if item]
2514
2685
  template_filters = [item for item in qp.getlist("template") if item]
2515
2686
 
2516
2687
  return {
@@ -2521,6 +2692,7 @@ def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
2521
2692
  "pdf": pdf_filters,
2522
2693
  "source": source_filters,
2523
2694
  "summary": summary_filters,
2695
+ "translated": translated_filters,
2524
2696
  "template": template_filters,
2525
2697
  }
2526
2698
 
@@ -2542,6 +2714,9 @@ async def _api_papers(request: Request) -> JSONResponse:
2542
2714
  summary_filter = _merge_filter_set(
2543
2715
  _presence_filter(filters["summary"]), _presence_filter(list(filter_terms["summary"]))
2544
2716
  )
2717
+ translated_filter = _merge_filter_set(
2718
+ _presence_filter(filters["translated"]), _presence_filter(list(filter_terms["translated"]))
2719
+ )
2545
2720
  template_selected = {item.lower() for item in filters["template"] if item}
2546
2721
  template_filter = _merge_filter_set(
2547
2722
  template_selected or None,
@@ -2556,12 +2731,15 @@ async def _api_papers(request: Request) -> JSONResponse:
2556
2731
  has_source = source_hash in index.md_path_by_hash
2557
2732
  has_pdf = source_hash in index.pdf_path_by_hash
2558
2733
  has_summary = bool(paper.get("_has_summary"))
2734
+ has_translated = bool(index.translated_md_by_hash.get(source_hash))
2559
2735
  if not _matches_presence(pdf_filter, has_pdf):
2560
2736
  continue
2561
2737
  if not _matches_presence(source_filter, has_source):
2562
2738
  continue
2563
2739
  if not _matches_presence(summary_filter, has_summary):
2564
2740
  continue
2741
+ if not _matches_presence(translated_filter, has_translated):
2742
+ continue
2565
2743
  if template_filter:
2566
2744
  tags = paper.get("_template_tags_lc") or []
2567
2745
  if not any(tag in template_filter for tag in tags):
@@ -2585,6 +2763,8 @@ async def _api_papers(request: Request) -> JSONResponse:
2585
2763
  for idx in page_ids:
2586
2764
  paper = index.papers[idx]
2587
2765
  source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
2766
+ translations = index.translated_md_by_hash.get(source_hash, {})
2767
+ translation_languages = sorted(translations.keys(), key=str.lower)
2588
2768
  items.append(
2589
2769
  {
2590
2770
  "source_hash": source_hash,
@@ -2596,9 +2776,11 @@ async def _api_papers(request: Request) -> JSONResponse:
2596
2776
  "tags": paper.get("_tags") or [],
2597
2777
  "template_tags": paper.get("_template_tags") or [],
2598
2778
  "has_source": source_hash in index.md_path_by_hash,
2779
+ "has_translation": bool(translation_languages),
2599
2780
  "has_pdf": source_hash in index.pdf_path_by_hash,
2600
2781
  "has_summary": bool(paper.get("_has_summary")),
2601
2782
  "is_pdf_only": bool(paper.get("_is_pdf_only")),
2783
+ "translation_languages": translation_languages,
2602
2784
  }
2603
2785
  )
2604
2786
 
@@ -2631,7 +2813,19 @@ async def _paper_detail(request: Request) -> HTMLResponse:
2631
2813
  pdf_path = index.pdf_path_by_hash.get(source_hash)
2632
2814
  pdf_url = f"/api/pdf/{source_hash}"
2633
2815
  source_available = source_hash in index.md_path_by_hash
2634
- allowed_views = {"summary", "source", "pdf", "pdfjs", "split"}
2816
+ translations = index.translated_md_by_hash.get(source_hash, {})
2817
+ translation_langs = sorted(translations.keys(), key=str.lower)
2818
+ lang_param = request.query_params.get("lang")
2819
+ normalized_lang = lang_param.lower() if lang_param else None
2820
+ selected_lang = None
2821
+ if translation_langs:
2822
+ if normalized_lang and normalized_lang in translations:
2823
+ selected_lang = normalized_lang
2824
+ elif "zh" in translations:
2825
+ selected_lang = "zh"
2826
+ else:
2827
+ selected_lang = translation_langs[0]
2828
+ allowed_views = {"summary", "source", "translated", "pdf", "pdfjs", "split"}
2635
2829
  if is_pdf_only:
2636
2830
  allowed_views = {"pdf", "pdfjs", "split"}
2637
2831
 
@@ -2651,10 +2845,11 @@ async def _paper_detail(request: Request) -> HTMLResponse:
2651
2845
  left = normalize_view(left_param, preferred_pdf_view) if left_param else preferred_pdf_view
2652
2846
  right = normalize_view(right_param, preferred_pdf_view) if right_param else preferred_pdf_view
2653
2847
  else:
2654
- default_right = "pdfjs" if pdf_path else ("source" if source_available else "summary")
2848
+ default_left = preferred_pdf_view if pdf_path else ("source" if source_available else "summary")
2849
+ default_right = "summary"
2655
2850
  left_param = request.query_params.get("left")
2656
2851
  right_param = request.query_params.get("right")
2657
- left = normalize_view(left_param, "summary") if left_param else "summary"
2852
+ left = normalize_view(left_param, default_left) if left_param else default_left
2658
2853
  right = normalize_view(right_param, default_right) if right_param else default_right
2659
2854
 
2660
2855
  def render_page(title: str, body: str, extra_head: str = "", extra_scripts: str = "") -> HTMLResponse:
@@ -2667,6 +2862,8 @@ async def _paper_detail(request: Request) -> HTMLResponse:
2667
2862
  params: dict[str, str] = {"view": v}
2668
2863
  if v == "summary" and template_param:
2669
2864
  params["template"] = str(template_param)
2865
+ if v == "translated" and selected_lang:
2866
+ params["lang"] = selected_lang
2670
2867
  if v == "split":
2671
2868
  params["left"] = left
2672
2869
  params["right"] = right
@@ -2676,6 +2873,7 @@ async def _paper_detail(request: Request) -> HTMLResponse:
2676
2873
  tab_defs = [
2677
2874
  ("Summary", "summary"),
2678
2875
  ("Source", "source"),
2876
+ ("Translated", "translated"),
2679
2877
  ("PDF", "pdf"),
2680
2878
  ("PDF Viewer", "pdfjs"),
2681
2879
  ("Split", "split"),
@@ -2758,6 +2956,8 @@ document.addEventListener('keydown', (event) => {
2758
2956
  params: dict[str, str] = {"view": pane_view, "embed": "1"}
2759
2957
  if pane_view == "summary" and template_param:
2760
2958
  params["template"] = str(template_param)
2959
+ if pane_view == "translated" and selected_lang:
2960
+ params["lang"] = selected_lang
2761
2961
  return f"/paper/{source_hash}?{urlencode(params)}"
2762
2962
 
2763
2963
  left_src = pane_src(left)
@@ -2765,6 +2965,7 @@ document.addEventListener('keydown', (event) => {
2765
2965
  options = [
2766
2966
  ("summary", "Summary"),
2767
2967
  ("source", "Source"),
2968
+ ("translated", "Translated"),
2768
2969
  ("pdf", "PDF"),
2769
2970
  ("pdfjs", "PDF Viewer"),
2770
2971
  ]
@@ -2773,6 +2974,16 @@ document.addEventListener('keydown', (event) => {
2773
2974
  ("pdf", "PDF"),
2774
2975
  ("pdfjs", "PDF Viewer"),
2775
2976
  ]
2977
+ if translation_langs:
2978
+ lang_options = "\n".join(
2979
+ f'<option value="{html.escape(lang)}"{" selected" if lang == selected_lang else ""}>'
2980
+ f'{html.escape(lang)}</option>'
2981
+ for lang in translation_langs
2982
+ )
2983
+ lang_disabled = ""
2984
+ else:
2985
+ lang_options = '<option value="" selected>(no translations)</option>'
2986
+ lang_disabled = " disabled"
2776
2987
  left_options = "\n".join(
2777
2988
  f'<option value="{value}"{" selected" if value == left else ""}>{label}</option>'
2778
2989
  for value, label in options
@@ -2796,6 +3007,10 @@ document.addEventListener('keydown', (event) => {
2796
3007
  <select id="splitRight">
2797
3008
  {right_options}
2798
3009
  </select>
3010
+ <span class="muted">Lang</span>
3011
+ <select id="splitLang"{lang_disabled}>
3012
+ {lang_options}
3013
+ </select>
2799
3014
  </div>
2800
3015
  """
2801
3016
  toolbar_html = detail_toolbar(split_controls)
@@ -2854,6 +3069,7 @@ document.addEventListener('keydown', (event) => {
2854
3069
  <script>
2855
3070
  const leftSelect = document.getElementById('splitLeft');
2856
3071
  const rightSelect = document.getElementById('splitRight');
3072
+ const langSelect = document.getElementById('splitLang');
2857
3073
  const swapButton = document.getElementById('splitSwap');
2858
3074
  const tightenButton = document.getElementById('splitTighten');
2859
3075
  const widenButton = document.getElementById('splitWiden');
@@ -2862,10 +3078,16 @@ function updateSplit() {
2862
3078
  params.set('view', 'split');
2863
3079
  params.set('left', leftSelect.value);
2864
3080
  params.set('right', rightSelect.value);
3081
+ if (langSelect && langSelect.value) {
3082
+ params.set('lang', langSelect.value);
3083
+ }
2865
3084
  window.location.search = params.toString();
2866
3085
  }
2867
3086
  leftSelect.addEventListener('change', updateSplit);
2868
3087
  rightSelect.addEventListener('change', updateSplit);
3088
+ if (langSelect) {
3089
+ langSelect.addEventListener('change', updateSplit);
3090
+ }
2869
3091
  swapButton.addEventListener('click', () => {
2870
3092
  const leftValue = leftSelect.value;
2871
3093
  leftSelect.value = rightSelect.value;
@@ -2914,6 +3136,134 @@ applySplitWidth();
2914
3136
  extra_scripts=extra_scripts + fullscreen_script,
2915
3137
  )
2916
3138
 
3139
+ if view == "translated":
3140
+ if translation_langs:
3141
+ lang_options = "\n".join(
3142
+ f'<option value="{html.escape(lang)}"{" selected" if lang == selected_lang else ""}>'
3143
+ f'{html.escape(lang)}</option>'
3144
+ for lang in translation_langs
3145
+ )
3146
+ disabled_attr = ""
3147
+ else:
3148
+ lang_options = '<option value="" selected>(no translations)</option>'
3149
+ disabled_attr = " disabled"
3150
+ lang_controls = f"""
3151
+ <div class="lang-select">
3152
+ <label for="translationLang">Language</label>
3153
+ <select id="translationLang"{disabled_attr}>
3154
+ {lang_options}
3155
+ </select>
3156
+ </div>
3157
+ """
3158
+ toolbar_html = detail_toolbar(lang_controls)
3159
+ if not translation_langs or not selected_lang:
3160
+ body = wrap_detail(
3161
+ '<div class="warning">No translated markdown found. '
3162
+ 'Provide <code>--md-translated-root</code> and place '
3163
+ '<code>&lt;base&gt;.&lt;lang&gt;.md</code> under that root.</div>',
3164
+ toolbar_html=toolbar_html,
3165
+ )
3166
+ return render_page("Translated", body, extra_scripts=fullscreen_script)
3167
+ translated_path = translations.get(selected_lang)
3168
+ if not translated_path:
3169
+ body = wrap_detail(
3170
+ '<div class="warning">Translated markdown not found for the selected language.</div>',
3171
+ toolbar_html=toolbar_html,
3172
+ )
3173
+ return render_page("Translated", body, extra_scripts=fullscreen_script)
3174
+ try:
3175
+ raw = translated_path.read_text(encoding="utf-8")
3176
+ except UnicodeDecodeError:
3177
+ raw = translated_path.read_text(encoding="latin-1")
3178
+ raw = _normalize_markdown_images(raw)
3179
+ rendered = _render_markdown_with_math_placeholders(md, raw)
3180
+ body = wrap_detail(
3181
+ f"""
3182
+ <div class="muted">Language: {html.escape(selected_lang)}</div>
3183
+ <div class="muted">{html.escape(str(translated_path))}</div>
3184
+ <div class="muted" style="margin-top:10px;">Rendered from translated markdown:</div>
3185
+ {outline_html}
3186
+ <div id="content">{rendered}</div>
3187
+ <details style="margin-top:12px;"><summary>Raw markdown</summary>
3188
+ <pre><code>{html.escape(raw)}</code></pre>
3189
+ </details>
3190
+ """,
3191
+ toolbar_html=toolbar_html,
3192
+ )
3193
+ extra_head = f"""
3194
+ <link rel="stylesheet" href="{_CDN_KATEX}" />
3195
+ {outline_css}
3196
+ <style>
3197
+ #content img {{
3198
+ max-width: 100%;
3199
+ height: auto;
3200
+ }}
3201
+ </style>
3202
+ """
3203
+ extra_scripts = f"""
3204
+ <script src="{_CDN_MERMAID}"></script>
3205
+ <script src="{_CDN_KATEX_JS}"></script>
3206
+ <script src="{_CDN_KATEX_AUTO}"></script>
3207
+ <script>
3208
+ const translationSelect = document.getElementById('translationLang');
3209
+ if (translationSelect) {{
3210
+ translationSelect.addEventListener('change', () => {{
3211
+ const params = new URLSearchParams(window.location.search);
3212
+ params.set('view', 'translated');
3213
+ params.set('lang', translationSelect.value);
3214
+ window.location.search = params.toString();
3215
+ }});
3216
+ }}
3217
+ document.querySelectorAll('code.language-mermaid').forEach((code) => {{
3218
+ const pre = code.parentElement;
3219
+ const div = document.createElement('div');
3220
+ div.className = 'mermaid';
3221
+ div.textContent = code.textContent;
3222
+ pre.replaceWith(div);
3223
+ }});
3224
+ if (window.mermaid) {{
3225
+ mermaid.initialize({{ startOnLoad: false }});
3226
+ mermaid.run();
3227
+ }}
3228
+ if (window.renderMathInElement) {{
3229
+ renderMathInElement(document.getElementById('content'), {{
3230
+ delimiters: [
3231
+ {{left: '$$', right: '$$', display: true}},
3232
+ {{left: '$', right: '$', display: false}},
3233
+ {{left: '\\\\(', right: '\\\\)', display: false}},
3234
+ {{left: '\\\\[', right: '\\\\]', display: true}}
3235
+ ],
3236
+ throwOnError: false
3237
+ }});
3238
+ }}
3239
+ if (document.querySelector('.footnotes')) {{
3240
+ const notes = {{}};
3241
+ document.querySelectorAll('.footnotes li[id]').forEach((li) => {{
3242
+ const id = li.getAttribute('id');
3243
+ if (!id) return;
3244
+ const clone = li.cloneNode(true);
3245
+ clone.querySelectorAll('a.footnote-backref').forEach((el) => el.remove());
3246
+ const text = (clone.textContent || '').replace(/\\s+/g, ' ').trim();
3247
+ if (text) notes['#' + id] = text.length > 400 ? text.slice(0, 397) + '…' : text;
3248
+ }});
3249
+ document.querySelectorAll('.footnote-ref a[href^="#fn"]').forEach((link) => {{
3250
+ const ref = link.getAttribute('href');
3251
+ const text = notes[ref];
3252
+ if (!text) return;
3253
+ link.dataset.footnote = text;
3254
+ link.classList.add('footnote-tip');
3255
+ }});
3256
+ }}
3257
+ {outline_js}
3258
+ </script>
3259
+ """
3260
+ return render_page(
3261
+ "Translated",
3262
+ body,
3263
+ extra_head=extra_head,
3264
+ extra_scripts=extra_scripts + fullscreen_script,
3265
+ )
3266
+
2917
3267
  if view == "source":
2918
3268
  source_path = index.md_path_by_hash.get(source_hash)
2919
3269
  if not source_path:
@@ -2974,6 +3324,24 @@ if (window.renderMathInElement) {{
2974
3324
  throwOnError: false
2975
3325
  }});
2976
3326
  }}
3327
+ if (document.querySelector('.footnotes')) {{
3328
+ const notes = {{}};
3329
+ document.querySelectorAll('.footnotes li[id]').forEach((li) => {{
3330
+ const id = li.getAttribute('id');
3331
+ if (!id) return;
3332
+ const clone = li.cloneNode(true);
3333
+ clone.querySelectorAll('a.footnote-backref').forEach((el) => el.remove());
3334
+ const text = (clone.textContent || '').replace(/\\s+/g, ' ').trim();
3335
+ if (text) notes['#' + id] = text.length > 400 ? text.slice(0, 397) + '…' : text;
3336
+ }});
3337
+ document.querySelectorAll('.footnote-ref a[href^="#fn"]').forEach((link) => {{
3338
+ const ref = link.getAttribute('href');
3339
+ const text = notes[ref];
3340
+ if (!text) return;
3341
+ link.dataset.footnote = text;
3342
+ link.classList.add('footnote-tip');
3343
+ }});
3344
+ }}
2977
3345
  {outline_js}
2978
3346
  </script>
2979
3347
  """
@@ -3188,6 +3556,24 @@ if (window.renderMathInElement) {{
3188
3556
  throwOnError: false
3189
3557
  }});
3190
3558
  }}
3559
+ if (document.querySelector('.footnotes')) {{
3560
+ const notes = {{}};
3561
+ document.querySelectorAll('.footnotes li[id]').forEach((li) => {{
3562
+ const id = li.getAttribute('id');
3563
+ if (!id) return;
3564
+ const clone = li.cloneNode(true);
3565
+ clone.querySelectorAll('a.footnote-backref').forEach((el) => el.remove());
3566
+ const text = (clone.textContent || '').replace(/\\s+/g, ' ').trim();
3567
+ if (text) notes['#' + id] = text.length > 400 ? text.slice(0, 397) + '…' : text;
3568
+ }});
3569
+ document.querySelectorAll('.footnote-ref a[href^="#fn"]').forEach((link) => {{
3570
+ const ref = link.getAttribute('href');
3571
+ const text = notes[ref];
3572
+ if (!text) return;
3573
+ link.dataset.footnote = text;
3574
+ link.classList.add('footnote-tip');
3575
+ }});
3576
+ }}
3191
3577
  {outline_js}
3192
3578
  </script>
3193
3579
  """
@@ -3329,6 +3715,7 @@ def create_app(
3329
3715
  fallback_language: str = "en",
3330
3716
  bibtex_path: Path | None = None,
3331
3717
  md_roots: list[Path] | None = None,
3718
+ md_translated_roots: list[Path] | None = None,
3332
3719
  pdf_roots: list[Path] | None = None,
3333
3720
  cache_dir: Path | None = None,
3334
3721
  use_cache: bool = True,
@@ -3336,8 +3723,14 @@ def create_app(
3336
3723
  papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache, pdf_roots=pdf_roots)
3337
3724
 
3338
3725
  md_roots = md_roots or []
3726
+ md_translated_roots = md_translated_roots or []
3339
3727
  pdf_roots = pdf_roots or []
3340
- index = build_index(papers, md_roots=md_roots, pdf_roots=pdf_roots)
3728
+ index = build_index(
3729
+ papers,
3730
+ md_roots=md_roots,
3731
+ md_translated_roots=md_translated_roots,
3732
+ pdf_roots=pdf_roots,
3733
+ )
3341
3734
  md = _md_renderer()
3342
3735
  routes = [
3343
3736
  Route("/", _index_page, methods=["GET"]),