PyPI - deepresearch-flow - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

deepresearch-flow 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

deepresearch_flow/paper/web/app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import html
 import json
 import logging
+import unicodedata
 from dataclasses import dataclass
 from html.parser import HTMLParser
 from pathlib import Path
@@ -32,6 +33,12 @@ try:
 except Exception:
     PYBTEX_AVAILABLE = False
+try:
+    from pypdf import PdfReader
+    PYPDF_AVAILABLE = True
+except Exception:
+    PYPDF_AVAILABLE = False
 _CDN_ECHARTS = "https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"
 _CDN_MERMAID = "https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"
@@ -60,6 +67,7 @@ class PaperIndex:
     stats: dict[str, Any]
     md_path_by_hash: dict[str, Path]
     pdf_path_by_hash: dict[str, Path]
+    template_tags: list[str]
 def _split_csv(values: list[str]) -> list[str]:
@@ -189,6 +197,41 @@ def _extract_tags(paper: dict[str, Any]) -> list[str]:
     return []
+def _extract_keywords(paper: dict[str, Any]) -> list[str]:
+    keywords = paper.get("keywords") or []
+    if isinstance(keywords, list):
+        return [str(keyword).strip() for keyword in keywords if str(keyword).strip()]
+    if isinstance(keywords, str):
+        parts = re.split(r"[;,]", keywords)
+        return [part.strip() for part in parts if part.strip()]
+    return []
+_SUMMARY_FIELDS = (
+    "summary",
+    "abstract",
+    "keywords",
+    "question1",
+    "question2",
+    "question3",
+    "question4",
+    "question5",
+    "question6",
+    "question7",
+    "question8",
+)
+def _has_summary(paper: dict[str, Any], template_tags: list[str]) -> bool:
+    if template_tags:
+        return True
+    for key in _SUMMARY_FIELDS:
+        value = paper.get(key)
+        if isinstance(value, str) and value.strip():
+            return True
+    return False
 def _extract_venue(paper: dict[str, Any]) -> str:
     if isinstance(paper.get("bibtex"), dict):
         bib = paper.get("bibtex") or {}
@@ -223,13 +266,16 @@ def build_index(
     year_counts: dict[str, int] = {}
     month_counts: dict[str, int] = {}
     tag_counts: dict[str, int] = {}
+    keyword_counts: dict[str, int] = {}
     author_counts: dict[str, int] = {}
     venue_counts: dict[str, int] = {}
+    template_tag_counts: dict[str, int] = {}
     def add_index(index: dict[str, set[int]], key: str, idx: int) -> None:
         index.setdefault(key, set()).add(idx)
     for idx, paper in enumerate(papers):
+        is_pdf_only = bool(paper.get("_is_pdf_only"))
         source_hash = paper.get("source_hash")
         if not source_hash and paper.get("source_path"):
             source_hash = stable_hash(str(paper.get("source_path")))
@@ -258,31 +304,54 @@ def build_index(
         paper["_month"] = month_label
         add_index(by_year, _normalize_key(year_label), idx)
         add_index(by_month, _normalize_key(month_label), idx)
-        year_counts[year_label] = year_counts.get(year_label, 0) + 1
-        month_counts[month_label] = month_counts.get(month_label, 0) + 1
+        if not is_pdf_only:
+            year_counts[year_label] = year_counts.get(year_label, 0) + 1
+            month_counts[month_label] = month_counts.get(month_label, 0) + 1
         venue = _extract_venue(paper).strip()
         paper["_venue"] = venue
         if venue:
             add_index(by_venue, _normalize_key(venue), idx)
-            venue_counts[venue] = venue_counts.get(venue, 0) + 1
+            if not is_pdf_only:
+                venue_counts[venue] = venue_counts.get(venue, 0) + 1
         else:
             add_index(by_venue, "unknown", idx)
-            venue_counts["Unknown"] = venue_counts.get("Unknown", 0) + 1
+            if not is_pdf_only:
+                venue_counts["Unknown"] = venue_counts.get("Unknown", 0) + 1
         authors = _extract_authors(paper)
         paper["_authors"] = authors
         for author in authors:
             key = _normalize_key(author)
             add_index(by_author, key, idx)
-            author_counts[author] = author_counts.get(author, 0) + 1
+            if not is_pdf_only:
+                author_counts[author] = author_counts.get(author, 0) + 1
         tags = _extract_tags(paper)
         paper["_tags"] = tags
         for tag in tags:
             key = _normalize_key(tag)
             add_index(by_tag, key, idx)
-            tag_counts[tag] = tag_counts.get(tag, 0) + 1
+            if not is_pdf_only:
+                tag_counts[tag] = tag_counts.get(tag, 0) + 1
+        keywords = _extract_keywords(paper)
+        paper["_keywords"] = keywords
+        for keyword in keywords:
+            if not is_pdf_only:
+                keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
+        template_tags = _available_templates(paper)
+        if not template_tags:
+            fallback_tag = paper.get("template_tag") or paper.get("prompt_template")
+            if fallback_tag:
+                template_tags = [str(fallback_tag)]
+        paper["_template_tags"] = template_tags
+        paper["_template_tags_lc"] = [tag.lower() for tag in template_tags]
+        paper["_has_summary"] = _has_summary(paper, template_tags)
+        if not is_pdf_only:
+            for tag in template_tags:
+                template_tag_counts[tag] = template_tag_counts.get(tag, 0) + 1
         search_parts = [title, venue, " ".join(authors), " ".join(tags)]
         paper["_search_lc"] = " ".join(part for part in search_parts if part).lower()
@@ -305,15 +374,19 @@ def build_index(
     ordered_ids = [idx for idx, _ in sorted(enumerate(papers), key=year_sort_key)]
+    stats_total = sum(1 for paper in papers if not paper.get("_is_pdf_only"))
     stats = {
-        "total": len(papers),
+        "total": stats_total,
         "years": _sorted_counts(year_counts, numeric_desc=True),
         "months": _sorted_month_counts(month_counts),
         "tags": _sorted_counts(tag_counts),
+        "keywords": _sorted_counts(keyword_counts),
         "authors": _sorted_counts(author_counts),
         "venues": _sorted_counts(venue_counts),
     }
+    template_tags = sorted(template_tag_counts.keys(), key=lambda item: item.lower())
     return PaperIndex(
         papers=papers,
         id_by_hash=id_by_hash,
@@ -326,6 +399,7 @@ def build_index(
         stats=stats,
         md_path_by_hash=md_path_by_hash,
         pdf_path_by_hash=pdf_path_by_hash,
+        template_tags=template_tags,
     )
@@ -422,7 +496,11 @@ def _infer_template_tag(papers: list[dict[str, Any]], path: Path) -> str:
     return best_tag
-def _build_cache_meta(db_paths: list[Path], bibtex_path: Path | None) -> dict[str, Any]:
+def _build_cache_meta(
+    db_paths: list[Path],
+    bibtex_path: Path | None,
+    pdf_roots_meta: list[dict[str, Any]] | None = None,
+) -> dict[str, Any]:
     def file_meta(path: Path) -> dict[str, Any]:
         try:
             stats = path.stat()
@@ -435,6 +513,8 @@ def _build_cache_meta(db_paths: list[Path], bibtex_path: Path | None) -> dict[st
         "inputs": [file_meta(path) for path in db_paths],
         "bibtex": file_meta(bibtex_path) if bibtex_path else None,
     }
+    if pdf_roots_meta is not None:
+        meta["pdf_roots"] = pdf_roots_meta
     return meta
@@ -462,16 +542,72 @@ def _write_cached_papers(cache_dir: Path, meta: dict[str, Any], papers: list[dic
     data_path.write_text(json.dumps(papers, ensure_ascii=False, indent=2), encoding="utf-8")
+def _extract_year_for_matching(paper: dict[str, Any]) -> str | None:
+    if isinstance(paper.get("bibtex"), dict):
+        fields = paper.get("bibtex", {}).get("fields", {}) or {}
+        year = fields.get("year")
+        if year and str(year).isdigit():
+            return str(year)
+    parsed_year, _ = _parse_year_month(str(paper.get("publication_date") or ""))
+    return parsed_year
+def _prepare_paper_matching_fields(paper: dict[str, Any]) -> None:
+    if "_authors" not in paper:
+        paper["_authors"] = _extract_authors(paper)
+    if "_year" not in paper:
+        paper["_year"] = _extract_year_for_matching(paper) or ""
+def _build_pdf_only_entries(
+    papers: list[dict[str, Any]],
+    pdf_paths: list[Path],
+    pdf_index: dict[str, list[Path]],
+) -> list[dict[str, Any]]:
+    matched: set[Path] = set()
+    for paper in papers:
+        _prepare_paper_matching_fields(paper)
+        pdf_path = _resolve_pdf(paper, pdf_index)
+        if pdf_path:
+            matched.add(pdf_path.resolve())
+    entries: list[dict[str, Any]] = []
+    for path in pdf_paths:
+        resolved = path.resolve()
+        if resolved in matched:
+            continue
+        title = _read_pdf_metadata_title(resolved) or _extract_title_from_filename(resolved.name)
+        if not title:
+            title = resolved.stem
+        year_hint, author_hint = _extract_year_author_from_filename(resolved.name)
+        entry: dict[str, Any] = {
+            "paper_title": title,
+            "paper_authors": [author_hint] if author_hint else [],
+            "publication_date": year_hint or "",
+            "source_hash": stable_hash(str(resolved)),
+            "source_path": str(resolved),
+            "_is_pdf_only": True,
+        }
+        entries.append(entry)
+    return entries
 def _load_or_merge_papers(
     db_paths: list[Path],
     bibtex_path: Path | None,
     cache_dir: Path | None,
     use_cache: bool,
+    pdf_roots: list[Path] | None = None,
 ) -> list[dict[str, Any]]:
     cache_meta = None
+    pdf_roots = pdf_roots or []
+    pdf_paths: list[Path] = []
+    pdf_roots_meta: list[dict[str, Any]] | None = None
+    if pdf_roots:
+        pdf_paths, pdf_roots_meta = _scan_pdf_roots(pdf_roots)
     if cache_dir and use_cache:
         cache_dir.mkdir(parents=True, exist_ok=True)
-        cache_meta = _build_cache_meta(db_paths, bibtex_path)
+        cache_meta = _build_cache_meta(db_paths, bibtex_path, pdf_roots_meta)
         cached = _load_cached_papers(cache_dir, cache_meta)
         if cached is not None:
             return cached
@@ -481,6 +617,9 @@ def _load_or_merge_papers(
         for bundle in inputs:
             enrich_with_bibtex(bundle["papers"], bibtex_path)
     papers = _merge_paper_inputs(inputs)
+    if pdf_paths:
+        pdf_index = _build_file_index_from_paths(pdf_paths, suffixes={".pdf"})
+        papers.extend(_build_pdf_only_entries(papers, pdf_paths, pdf_index))
     if cache_dir and use_cache and cache_meta is not None:
         _write_cached_papers(cache_dir, cache_meta, papers)
@@ -488,7 +627,18 @@ def _load_or_merge_papers(
 def _md_renderer() -> MarkdownIt:
-    return MarkdownIt("commonmark", {"html": False, "linkify": True})
+    md = MarkdownIt("commonmark", {"html": False, "linkify": True})
+    md.enable("table")
+    return md
+def _strip_paragraph_wrapped_tables(text: str) -> str:
+    lines = text.splitlines()
+    for idx, line in enumerate(lines):
+        line = re.sub(r"^\s*<p>\s*\|", "|", line)
+        line = re.sub(r"\|\s*</p>\s*$", "|", line)
+        lines[idx] = line
+    return "\n".join(lines)
 def _normalize_merge_title(value: str | None) -> str | None:
@@ -648,6 +798,7 @@ def _merge_paper_inputs(inputs: list[dict[str, Any]]) -> list[dict[str, Any]]:
 def _render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
+    text = _strip_paragraph_wrapped_tables(text)
     rendered, table_placeholders = _extract_html_table_placeholders(text)
     rendered, img_placeholders = _extract_html_img_placeholders(rendered)
     rendered, placeholders = _extract_math_placeholders(rendered)
@@ -1062,6 +1213,369 @@ def _render_paper_markdown(
     return template.render(**context), str(template_name), warning
+_TITLE_PREFIX_LEN = 16
+_TITLE_MIN_CHARS = 24
+_TITLE_MIN_TOKENS = 4
+_AUTHOR_YEAR_MIN_SIMILARITY = 0.8
+_LEADING_NUMERIC_MAX_LEN = 2
+_SIMILARITY_START = 0.95
+_SIMILARITY_STEP = 0.05
+_SIMILARITY_MAX_STEPS = 10
+def _normalize_title_key(title: str) -> str:
+    value = unicodedata.normalize("NFKD", title)
+    greek_map = {
+        "α": "alpha",
+        "β": "beta",
+        "γ": "gamma",
+        "δ": "delta",
+        "ε": "epsilon",
+        "ζ": "zeta",
+        "η": "eta",
+        "θ": "theta",
+        "ι": "iota",
+        "κ": "kappa",
+        "λ": "lambda",
+        "μ": "mu",
+        "ν": "nu",
+        "ξ": "xi",
+        "ο": "omicron",
+        "π": "pi",
+        "ρ": "rho",
+        "σ": "sigma",
+        "τ": "tau",
+        "υ": "upsilon",
+        "φ": "phi",
+        "χ": "chi",
+        "ψ": "psi",
+        "ω": "omega",
+    }
+    for char, name in greek_map.items():
+        value = value.replace(char, f" {name} ")
+    value = re.sub(
+        r"\\(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)\b",
+        r" \1 ",
+        value,
+        flags=re.IGNORECASE,
+    )
+    value = value.replace("{", "").replace("}", "")
+    value = value.replace("_", " ")
+    value = re.sub(r"([a-z])([0-9])", r"\1 \2", value, flags=re.IGNORECASE)
+    value = re.sub(r"([0-9])([a-z])", r"\1 \2", value, flags=re.IGNORECASE)
+    value = re.sub(r"[^a-z0-9]+", " ", value.lower())
+    value = re.sub(r"\s+", " ", value).strip()
+    tokens = value.split()
+    if not tokens:
+        return ""
+    merged: list[str] = []
+    idx = 0
+    while idx < len(tokens):
+        token = tokens[idx]
+        if len(token) == 1 and idx + 1 < len(tokens):
+            merged.append(token + tokens[idx + 1])
+            idx += 2
+            continue
+        merged.append(token)
+        idx += 1
+    return " ".join(merged)
+def _compact_title_key(title_key: str) -> str:
+    return title_key.replace(" ", "")
+def _strip_leading_numeric_tokens(title_key: str) -> str:
+    tokens = title_key.split()
+    idx = 0
+    while idx < len(tokens):
+        token = tokens[idx]
+        if token.isdigit() and len(token) <= _LEADING_NUMERIC_MAX_LEN:
+            idx += 1
+            continue
+        break
+    if idx == 0:
+        return title_key
+    return " ".join(tokens[idx:])
+def _strip_pdf_hash_suffix(name: str) -> str:
+    return re.sub(r"(?i)(\.pdf)(?:-[0-9a-f\-]{8,})$", r"\1", name)
+def _extract_title_from_filename(name: str) -> str:
+    base = name
+    lower = base.lower()
+    if lower.endswith(".md"):
+        base = base[:-3]
+        lower = base.lower()
+    if ".pdf-" in lower:
+        base = _strip_pdf_hash_suffix(base)
+        lower = base.lower()
+    if lower.endswith(".pdf"):
+        base = base[:-4]
+    base = base.replace("_", " ").strip()
+    match = re.match(r"\s*\d{4}\s*-\s*(.+)$", base)
+    if match:
+        return match.group(1).strip()
+    match = re.match(r"\s*.+?\s*-\s*\d{4}\s*-\s*(.+)$", base)
+    if match:
+        return match.group(1).strip()
+    return base.strip()
+def _clean_pdf_metadata_title(value: str | None, path: Path) -> str | None:
+    if not value:
+        return None
+    text = str(value).replace("\x00", "").strip()
+    if not text:
+        return None
+    text = re.sub(r"(?i)^microsoft\\s+word\\s*-\\s*", "", text)
+    text = re.sub(r"(?i)^pdf\\s*-\\s*", "", text)
+    text = re.sub(r"(?i)^untitled\\b", "", text).strip()
+    if text.lower().endswith(".pdf"):
+        text = text[:-4].strip()
+    if len(text) < 3:
+        return None
+    stem = path.stem.strip()
+    if stem and text.lower() == stem.lower():
+        return None
+    return text
+def _read_pdf_metadata_title(path: Path) -> str | None:
+    if not PYPDF_AVAILABLE:
+        return None
+    try:
+        reader = PdfReader(str(path))
+        meta = reader.metadata
+        title = meta.title if meta else None
+    except Exception:
+        return None
+    return _clean_pdf_metadata_title(title, path)
+def _is_pdf_like(path: Path) -> bool:
+    suffix = path.suffix.lower()
+    if suffix == ".pdf":
+        return True
+    name_lower = path.name.lower()
+    return ".pdf-" in name_lower and not name_lower.endswith(".md")
+def _scan_pdf_roots(roots: list[Path]) -> tuple[list[Path], list[dict[str, Any]]]:
+    pdf_paths: list[Path] = []
+    meta: list[dict[str, Any]] = []
+    seen: set[Path] = set()
+    for root in roots:
+        try:
+            if not root.exists() or not root.is_dir():
+                continue
+        except OSError:
+            continue
+        files: list[Path] = []
+        for path in root.rglob("*"):
+            try:
+                if not path.is_file():
+                    continue
+            except OSError:
+                continue
+            if not _is_pdf_like(path):
+                continue
+            resolved = path.resolve()
+            if resolved in seen:
+                continue
+            seen.add(resolved)
+            files.append(resolved)
+        max_mtime = 0.0
+        total_size = 0
+        for path in files:
+            try:
+                stats = path.stat()
+            except OSError:
+                continue
+            max_mtime = max(max_mtime, stats.st_mtime)
+            total_size += stats.st_size
+        pdf_paths.extend(files)
+        meta.append(
+            {
+                "path": str(root),
+                "count": len(files),
+                "max_mtime": max_mtime,
+                "size": total_size,
+            }
+        )
+    return pdf_paths, meta
+def _extract_year_author_from_filename(name: str) -> tuple[str | None, str | None]:
+    base = name
+    lower = base.lower()
+    if lower.endswith(".md"):
+        base = base[:-3]
+        lower = base.lower()
+    if ".pdf-" in lower:
+        base = _strip_pdf_hash_suffix(base)
+        lower = base.lower()
+    if lower.endswith(".pdf"):
+        base = base[:-4]
+    match = re.match(r"\s*(.+?)\s*-\s*((?:19|20)\d{2})\s*-\s*", base)
+    if match:
+        return match.group(2), match.group(1).strip()
+    match = re.match(r"\s*((?:19|20)\d{2})\s*-\s*", base)
+    if match:
+        return match.group(1), None
+    return None, None
+def _normalize_author_key(name: str) -> str:
+    raw = name.lower().strip()
+    raw = raw.replace("et al.", "").replace("et al", "")
+    if "," in raw:
+        raw = raw.split(",", 1)[0]
+    raw = re.sub(r"[^a-z0-9]+", " ", raw)
+    raw = re.sub(r"\s+", " ", raw).strip()
+    if not raw:
+        return ""
+    parts = raw.split()
+    return parts[-1] if parts else raw
+def _title_prefix_key(title_key: str) -> str | None:
+    if len(title_key.split()) < _TITLE_MIN_TOKENS:
+        return None
+    compact = _compact_title_key(title_key)
+    if len(compact) < _TITLE_PREFIX_LEN:
+        return None
+    prefix = compact[:_TITLE_PREFIX_LEN]
+    if not prefix:
+        return None
+    return f"prefix:{prefix}"
+def _title_overlap_match(a: str, b: str) -> bool:
+    if not a or not b:
+        return False
+    if a == b:
+        return True
+    shorter, longer = (a, b) if len(a) <= len(b) else (b, a)
+    token_count = len(shorter.split())
+    if len(shorter) >= _TITLE_MIN_CHARS or token_count >= _TITLE_MIN_TOKENS:
+        if longer.startswith(shorter) or shorter in longer:
+            return True
+    return False
+def _adaptive_similarity_match(title_key: str, candidates: list[Path]) -> Path | None:
+    if not title_key:
+        return None
+    scored: list[tuple[Path, float]] = []
+    for path in candidates:
+        candidate_title = _normalize_title_key(_extract_title_from_filename(path.name))
+        if not candidate_title:
+            continue
+        if _title_overlap_match(title_key, candidate_title):
+            return path
+        scored.append((path, _title_similarity(title_key, candidate_title)))
+    if not scored:
+        return None
+    def matches_at(threshold: float) -> list[Path]:
+        return [path for path, score in scored if score >= threshold]
+    threshold = _SIMILARITY_START
+    step = _SIMILARITY_STEP
+    prev_threshold = None
+    prev_count = None
+    for _ in range(_SIMILARITY_MAX_STEPS):
+        matches = matches_at(threshold)
+        if len(matches) == 1:
+            return matches[0]
+        if len(matches) == 0:
+            prev_threshold = threshold
+            prev_count = 0
+            threshold -= step
+            continue
+        if prev_count == 0 and prev_threshold is not None:
+            low = threshold
+            high = prev_threshold
+            for _ in range(_SIMILARITY_MAX_STEPS):
+                mid = (low + high) / 2
+                mid_matches = matches_at(mid)
+                if len(mid_matches) == 1:
+                    return mid_matches[0]
+                if len(mid_matches) == 0:
+                    high = mid
+                else:
+                    low = mid
+            return None
+        prev_threshold = threshold
+        prev_count = len(matches)
+        threshold -= step
+    return None
+def _resolve_by_title_and_meta(
+    paper: dict[str, Any],
+    file_index: dict[str, list[Path]],
+) -> Path | None:
+    title = str(paper.get("paper_title") or "")
+    title_key = _normalize_title_key(title)
+    if not title_key:
+        title_key = ""
+    candidates = file_index.get(title_key, [])
+    if candidates:
+        return candidates[0]
+    if title_key:
+        compact_key = _compact_title_key(title_key)
+        compact_candidates = file_index.get(f"compact:{compact_key}", [])
+        if compact_candidates:
+            return compact_candidates[0]
+        stripped_key = _strip_leading_numeric_tokens(title_key)
+        if stripped_key and stripped_key != title_key:
+            stripped_candidates = file_index.get(stripped_key, [])
+            if stripped_candidates:
+                return stripped_candidates[0]
+            stripped_compact = _compact_title_key(stripped_key)
+            stripped_candidates = file_index.get(f"compact:{stripped_compact}", [])
+            if stripped_candidates:
+                return stripped_candidates[0]
+    prefix_candidates: list[Path] = []
+    prefix_key = _title_prefix_key(title_key)
+    if prefix_key:
+        prefix_candidates = file_index.get(prefix_key, [])
+    if not prefix_candidates:
+        stripped_key = _strip_leading_numeric_tokens(title_key)
+        if stripped_key and stripped_key != title_key:
+            prefix_key = _title_prefix_key(stripped_key)
+            if prefix_key:
+                prefix_candidates = file_index.get(prefix_key, [])
+    if prefix_candidates:
+        match = _adaptive_similarity_match(title_key, prefix_candidates)
+        if match is not None:
+            return match
+    year = str(paper.get("_year") or "").strip()
+    if not year.isdigit():
+        return None
+    author_key = ""
+    authors = paper.get("_authors") or []
+    if authors:
+        author_key = _normalize_author_key(str(authors[0]))
+    candidates = []
+    if author_key:
+        candidates = file_index.get(f"authoryear:{year}:{author_key}", [])
+    if not candidates:
+        candidates = file_index.get(f"year:{year}", [])
+    if not candidates:
+        return None
+    if len(candidates) == 1 and not title_key:
+        return candidates[0]
+    match = _adaptive_similarity_match(title_key, candidates)
+    if match is not None:
+        return match
+    return None
 def _build_file_index(roots: list[Path], *, suffixes: set[str]) -> dict[str, list[Path]]:
     index: dict[str, list[Path]] = {}
     for root in roots:
@@ -1076,19 +1590,97 @@ def _build_file_index(roots: list[Path], *, suffixes: set[str]) -> dict[str, lis
                     continue
             except OSError:
                 continue
-            if path.suffix.lower() not in suffixes:
+            suffix = path.suffix.lower()
+            if suffix not in suffixes:
+                name_lower = path.name.lower()
+                if suffixes == {".pdf"} and ".pdf-" in name_lower and suffix != ".md":
+                    pass
+                else:
+                    continue
+            resolved = path.resolve()
+            name_key = path.name.lower()
+            index.setdefault(name_key, []).append(resolved)
+            title_candidate = _extract_title_from_filename(path.name)
+            title_key = _normalize_title_key(title_candidate)
+            if title_key:
+                if title_key != name_key:
+                    index.setdefault(title_key, []).append(resolved)
+                compact_key = _compact_title_key(title_key)
+                if compact_key:
+                    index.setdefault(f"compact:{compact_key}", []).append(resolved)
+                prefix_key = _title_prefix_key(title_key)
+                if prefix_key:
+                    index.setdefault(prefix_key, []).append(resolved)
+                stripped_key = _strip_leading_numeric_tokens(title_key)
+                if stripped_key and stripped_key != title_key:
+                    index.setdefault(stripped_key, []).append(resolved)
+                    stripped_compact = _compact_title_key(stripped_key)
+                    if stripped_compact:
+                        index.setdefault(f"compact:{stripped_compact}", []).append(resolved)
+                    stripped_prefix = _title_prefix_key(stripped_key)
+                    if stripped_prefix:
+                        index.setdefault(stripped_prefix, []).append(resolved)
+            year_hint, author_hint = _extract_year_author_from_filename(path.name)
+            if year_hint:
+                index.setdefault(f"year:{year_hint}", []).append(resolved)
+                if author_hint:
+                    author_key = _normalize_author_key(author_hint)
+                    if author_key:
+                        index.setdefault(f"authoryear:{year_hint}:{author_key}", []).append(resolved)
+    return index
+def _build_file_index_from_paths(paths: list[Path], *, suffixes: set[str]) -> dict[str, list[Path]]:
+    index: dict[str, list[Path]] = {}
+    for path in paths:
+        try:
+            if not path.is_file():
+                continue
+        except OSError:
+            continue
+        suffix = path.suffix.lower()
+        if suffix not in suffixes:
+            name_lower = path.name.lower()
+            if suffixes == {".pdf"} and ".pdf-" in name_lower and suffix != ".md":
+                pass
+            else:
                 continue
-            index.setdefault(path.name.lower(), []).append(path.resolve())
+        resolved = path.resolve()
+        name_key = path.name.lower()
+        index.setdefault(name_key, []).append(resolved)
+        title_candidate = _extract_title_from_filename(path.name)
+        title_key = _normalize_title_key(title_candidate)
+        if title_key:
+            if title_key != name_key:
+                index.setdefault(title_key, []).append(resolved)
+            compact_key = _compact_title_key(title_key)
+            if compact_key:
+                index.setdefault(f"compact:{compact_key}", []).append(resolved)
+            prefix_key = _title_prefix_key(title_key)
+            if prefix_key:
+                index.setdefault(prefix_key, []).append(resolved)
+            stripped_key = _strip_leading_numeric_tokens(title_key)
+            if stripped_key and stripped_key != title_key:
+                index.setdefault(stripped_key, []).append(resolved)
+                stripped_compact = _compact_title_key(stripped_key)
+                if stripped_compact:
+                    index.setdefault(f"compact:{stripped_compact}", []).append(resolved)
+                stripped_prefix = _title_prefix_key(stripped_key)
+                if stripped_prefix:
+                    index.setdefault(stripped_prefix, []).append(resolved)
     return index
 def _resolve_source_md(paper: dict[str, Any], md_index: dict[str, list[Path]]) -> Path | None:
     source_path = paper.get("source_path")
     if not source_path:
-        return None
-    name = Path(str(source_path)).name.lower()
-    candidates = md_index.get(name, [])
-    return candidates[0] if candidates else None
+        source_path = ""
+    if source_path:
+        name = Path(str(source_path)).name.lower()
+        candidates = md_index.get(name, [])
+        if candidates:
+            return candidates[0]
+    return _resolve_by_title_and_meta(paper, md_index)
 def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
@@ -1102,6 +1694,8 @@ def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
     if ".pdf-" in name.lower():
         base = name[: name.lower().rfind(".pdf-") + 4]
         return [Path(base).name]
+    if name.lower().endswith(".pdf"):
+        return [name]
     if name.lower().endswith(".pdf.md"):
         return [name[:-3]]
     return []
@@ -1112,7 +1706,7 @@ def _resolve_pdf(paper: dict[str, Any], pdf_index: dict[str, list[Path]]) -> Pat
         candidates = pdf_index.get(filename.lower(), [])
         if candidates:
             return candidates[0]
-    return None
+    return _resolve_by_title_and_meta(paper, pdf_index)
 def _ensure_under_roots(path: Path, roots: list[Path]) -> bool:
@@ -1126,6 +1720,150 @@ def _ensure_under_roots(path: Path, roots: list[Path]) -> bool:
     return False
+_BOOL_TRUE = {"1", "true", "yes", "with", "has"}
+_BOOL_FALSE = {"0", "false", "no", "without"}
+def _tokenize_filter_query(text: str) -> list[str]:
+    out: list[str] = []
+    buf: list[str] = []
+    in_quote = False
+    for ch in text:
+        if ch == '"':
+            in_quote = not in_quote
+            continue
+        if not in_quote and ch.isspace():
+            token = "".join(buf).strip()
+            if token:
+                out.append(token)
+            buf = []
+            continue
+        buf.append(ch)
+    token = "".join(buf).strip()
+    if token:
+        out.append(token)
+    return out
+def _normalize_presence_value(value: str) -> str | None:
+    token = value.strip().lower()
+    if token in _BOOL_TRUE:
+        return "with"
+    if token in _BOOL_FALSE:
+        return "without"
+    return None
+def _parse_filter_query(text: str) -> dict[str, set[str]]:
+    parsed = {
+        "pdf": set(),
+        "source": set(),
+        "summary": set(),
+        "template": set(),
+    }
+    for token in _tokenize_filter_query(text):
+        if ":" not in token:
+            continue
+        key, raw_value = token.split(":", 1)
+        key = key.strip().lower()
+        raw_value = raw_value.strip()
+        if not raw_value:
+            continue
+        if key in {"tmpl", "template"}:
+            for part in raw_value.split(","):
+                tag = part.strip()
+                if tag:
+                    parsed["template"].add(tag.lower())
+            continue
+        if key in {"pdf", "source", "summary"}:
+            for part in raw_value.split(","):
+                normalized = _normalize_presence_value(part)
+                if normalized:
+                    parsed[key].add(normalized)
+            continue
+        if key in {"has", "no"}:
+            targets = [part.strip().lower() for part in raw_value.split(",") if part.strip()]
+            for target in targets:
+                if target not in {"pdf", "source", "summary"}:
+                    continue
+                parsed[target].add("with" if key == "has" else "without")
+    return parsed
+def _presence_filter(values: list[str]) -> set[str] | None:
+    normalized = set()
+    for value in values:
+        token = _normalize_presence_value(value)
+        if token:
+            normalized.add(token)
+    if not normalized or normalized == {"with", "without"}:
+        return None
+    return normalized
+def _merge_filter_set(primary: set[str] | None, secondary: set[str] | None) -> set[str] | None:
+    if not primary:
+        return secondary
+    if not secondary:
+        return primary
+    return primary & secondary
+def _matches_presence(allowed: set[str] | None, has_value: bool) -> bool:
+    if not allowed:
+        return True
+    if has_value and "with" in allowed:
+        return True
+    if not has_value and "without" in allowed:
+        return True
+    return False
+def _template_tag_map(index: PaperIndex) -> dict[str, str]:
+    return {tag.lower(): tag for tag in index.template_tags}
+def _compute_counts(index: PaperIndex, ids: set[int]) -> dict[str, Any]:
+    template_order = list(index.template_tags)
+    template_counts = {tag: 0 for tag in template_order}
+    pdf_count = 0
+    source_count = 0
+    summary_count = 0
+    total_count = 0
+    tag_map = _template_tag_map(index)
+    for idx in ids:
+        paper = index.papers[idx]
+        if paper.get("_is_pdf_only"):
+            continue
+        total_count += 1
+        source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
+        has_source = source_hash in index.md_path_by_hash
+        has_pdf = source_hash in index.pdf_path_by_hash
+        has_summary = bool(paper.get("_has_summary"))
+        if has_source:
+            source_count += 1
+        if has_pdf:
+            pdf_count += 1
+        if has_summary:
+            summary_count += 1
+        for tag_lc in paper.get("_template_tags_lc") or []:
+            display = tag_map.get(tag_lc)
+            if display:
+                template_counts[display] = template_counts.get(display, 0) + 1
+    return {
+        "total": total_count,
+        "pdf": pdf_count,
+        "source": source_count,
+        "summary": summary_count,
+        "templates": template_counts,
+        "template_order": template_order,
+    }
 def _apply_query(index: PaperIndex, query: Query) -> set[int]:
     all_ids = set(index.ordered_ids)
@@ -1182,7 +1920,30 @@ def _apply_query(index: PaperIndex, query: Query) -> set[int]:
     return result
-def _page_shell(title: str, body_html: str, extra_head: str = "", extra_scripts: str = "") -> str:
+def _page_shell(
+    title: str,
+    body_html: str,
+    extra_head: str = "",
+    extra_scripts: str = "",
+    header_title: str | None = None,
+) -> str:
+    header_html = """
+    <header>
+      <a href="/">Papers</a>
+      <a href="/stats">Stats</a>
+    </header>
+"""
+    if header_title:
+        safe_title = html.escape(header_title)
+        header_html = f"""
+    <header class="detail-header">
+      <div class="header-row">
+        <a class="header-back" href="/">← Papers</a>
+        <span class="header-title" title="{safe_title}">{safe_title}</span>
+        <a class="header-link" href="/stats">Stats</a>
+      </div>
+    </header>
+"""
     return f"""<!doctype html>
 <html lang="en">
   <head>
@@ -1193,28 +1954,83 @@ def _page_shell(title: str, body_html: str, extra_head: str = "", extra_scripts:
       body {{ font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; margin: 0; }}
       header {{ position: sticky; top: 0; background: #0b1220; color: #fff; padding: 12px 16px; z-index: 10; }}
       header a {{ color: #cfe3ff; text-decoration: none; margin-right: 12px; }}
+      .detail-header .header-row {{ display: grid; grid-template-columns: auto minmax(0, 1fr) auto; align-items: center; gap: 12px; }}
+      .detail-header .header-title {{ text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
+      .detail-header .header-back {{ margin-right: 0; }}
+      .detail-header .header-link {{ margin-right: 0; }}
       .container {{ max-width: 1100px; margin: 0 auto; padding: 16px; }}
       .filters {{ display: grid; grid-template-columns: repeat(6, 1fr); gap: 8px; margin: 12px 0 16px; }}
       .filters input {{ width: 100%; padding: 8px; border: 1px solid #d0d7de; border-radius: 6px; }}
+      .filters select {{ width: 100%; border: 1px solid #d0d7de; border-radius: 6px; background: #fff; font-size: 13px; }}
+      .filters select:not([multiple]) {{ padding: 6px 8px; }}
+      .filters select[multiple] {{ padding: 2px; line-height: 1.25; min-height: 72px; font-size: 13px; }}
+      .filters select[multiple] option {{ padding: 2px 6px; line-height: 1.25; }}
+      .filters label {{ font-size: 12px; color: #57606a; }}
+      .filter-group {{ display: flex; flex-direction: column; gap: 4px; }}
       .card {{ border: 1px solid #d0d7de; border-radius: 10px; padding: 12px; margin: 10px 0; }}
       .muted {{ color: #57606a; font-size: 13px; }}
       .pill {{ display: inline-block; padding: 2px 8px; border-radius: 999px; border: 1px solid #d0d7de; margin-right: 6px; font-size: 12px; }}
+      .pill.template {{ border-color: #8a92a5; color: #243b53; background: #f6f8fa; }}
+      .pill.pdf-only {{ border-color: #c8a951; background: #fff8dc; color: #5b4a00; }}
       .warning {{ background: #fff4ce; border: 1px solid #ffd089; padding: 10px; border-radius: 10px; margin: 12px 0; }}
       .tabs {{ display: flex; gap: 8px; flex-wrap: wrap; }}
       .tab {{ display: inline-block; padding: 6px 12px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; color: #0969da; text-decoration: none; font-size: 13px; }}
       .tab:hover {{ background: #eef1f4; }}
       .tab.active {{ background: #0969da; border-color: #0969da; color: #fff; }}
+      .detail-shell {{ display: flex; flex-direction: column; gap: 12px; min-height: calc(100vh - 120px); }}
+      .detail-toolbar {{ display: flex; flex-wrap: wrap; align-items: center; justify-content: flex-start; gap: 12px; padding: 6px 8px 10px; border-bottom: 1px solid #e5e7eb; box-sizing: border-box; }}
+      .detail-toolbar .tabs {{ margin: 0; }}
+      .toolbar-actions {{ display: flex; flex-wrap: wrap; align-items: center; gap: 10px; margin-left: auto; padding-right: 16px; }}
+      .split-inline {{ display: flex; flex-wrap: wrap; align-items: center; gap: 6px; }}
+      .split-inline select {{ padding: 6px 8px; border-radius: 8px; border: 1px solid #d0d7de; background: #fff; min-width: 140px; }}
+      .split-actions {{ display: flex; align-items: center; justify-content: center; gap: 8px; }}
+      .split-actions button {{ padding: 6px 10px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; min-width: 36px; }}
+      .fullscreen-actions {{ display: flex; align-items: center; gap: 6px; }}
+      .fullscreen-actions button {{ padding: 6px 10px; border-radius: 8px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; }}
+      .fullscreen-exit {{ display: none; }}
+      body.detail-fullscreen {{ overflow: hidden; --outline-top: 16px; }}
+      body.detail-fullscreen header {{ display: none; }}
+      body.detail-fullscreen .container {{ max-width: 100%; padding: 0; }}
+      body.detail-fullscreen .detail-shell {{
+        position: fixed;
+        inset: 0;
+        padding: 12px 16px;
+        background: #fff;
+        z-index: 40;
+        overflow: auto;
+      }}
+      body.detail-fullscreen .detail-toolbar {{ position: sticky; top: 0; background: #fff; z-index: 41; }}
+      body.detail-fullscreen .fullscreen-enter {{ display: none; }}
+      body.detail-fullscreen .fullscreen-exit {{ display: inline-flex; }}
+      .detail-body {{ display: flex; flex-direction: column; gap: 8px; flex: 1; min-height: 0; }}
+      .help-icon {{ display: inline-flex; align-items: center; justify-content: center; width: 18px; height: 18px; border-radius: 50%; border: 1px solid #d0d7de; color: #57606a; font-size: 12px; cursor: default; position: relative; }}
+      .help-icon::after {{ content: attr(data-tip); display: none; position: absolute; top: 24px; right: 0; background: #0b1220; color: #e6edf3; padding: 8px 10px; border-radius: 8px; font-size: 12px; white-space: pre-line; width: 260px; z-index: 20; }}
+      .help-icon:hover::after {{ display: block; }}
+      .stats {{ margin: 12px 0 6px; }}
+      .stats-row {{ display: flex; flex-wrap: wrap; gap: 6px; align-items: center; }}
+      .stats-label {{ font-weight: 600; color: #0b1220; margin-right: 4px; }}
+      .pill.stat {{ background: #f6f8fa; border-color: #c7d2e0; color: #1f2a37; }}
       pre {{ overflow: auto; padding: 10px; background: #0b1220; color: #e6edf3; border-radius: 10px; }}
       code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
       a {{ color: #0969da; }}
+      @media (max-width: 768px) {{
+        .detail-toolbar {{
+          flex-wrap: nowrap;
+          overflow-x: auto;
+          padding-bottom: 8px;
+        }}
+        .detail-toolbar::-webkit-scrollbar {{ height: 6px; }}
+        .detail-toolbar::-webkit-scrollbar-thumb {{ background: #c7d2e0; border-radius: 999px; }}
+        .detail-toolbar .tabs,
+        .toolbar-actions {{
+          flex: 0 0 auto;
+        }}
+      }}
     </style>
     {extra_head}
   </head>
   <body>
-    <header>
-      <a href="/">Papers</a>
-      <a href="/stats">Stats</a>
-    </header>
+    {header_html}
     <div class="container">
       {body_html}
     </div>
@@ -1253,106 +2069,362 @@ def _build_pdfjs_viewer_url(pdf_url: str) -> str:
     return f"{_PDFJS_VIEWER_PATH}?file={encoded}"
-async def _index_page(request: Request) -> HTMLResponse:
-    return HTMLResponse(
-        _page_shell(
-            "Paper DB",
-            """
-<h2>Paper Database</h2>
-<div class="card">
-  <div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
-  <div style="display:flex; gap:8px; margin-top:8px;">
-    <input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
-    <select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
-      <option value="summary" selected>Open: Summary</option>
-      <option value="source">Open: Source</option>
-      <option value="pdf">Open: PDF</option>
-      <option value="pdfjs">Open: PDF Viewer</option>
-      <option value="split">Open: Split</option>
-    </select>
-  </div>
-  <details style="margin-top:10px;">
-    <summary>Advanced search</summary>
-    <div style="margin-top:10px;" class="muted">Build a query:</div>
-    <div class="filters" style="grid-template-columns: repeat(3, 1fr);">
-      <input id="advTitle" placeholder="title contains..." />
-      <input id="advAuthor" placeholder="author contains..." />
-      <input id="advTag" placeholder="tag (comma separated)" />
-      <input id="advYear" placeholder="year (e.g. 2020..2024)" />
-      <input id="advMonth" placeholder="month (01-12)" />
-      <input id="advVenue" placeholder="venue contains..." />
-    </div>
-    <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
-      <button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
-      <div class="muted">Generated: <code id="generated"></code></div>
-    </div>
-  </details>
+def _outline_assets(outline_top: str) -> tuple[str, str, str]:
+    outline_html = """
+<button id="outlineToggle" class="outline-toggle" title="Toggle outline">☰</button>
+<div id="outlinePanel" class="outline-panel collapsed">
+  <div class="outline-title">Outline</div>
+  <div id="outlineList" class="outline-list"></div>
 </div>
-<div id="results"></div>
-<div id="loading" class="muted">Loading...</div>
-<script>
-let page = 1;
-let loading = false;
-let done = false;
-function currentParams(nextPage) {
-  const params = new URLSearchParams();
-  params.set("page", String(nextPage));
-  params.set("page_size", "30");
-  const q = document.getElementById("query").value.trim();
-  if (q) params.set("q", q);
-  return params;
-}
-function escapeHtml(text) {
-  const div = document.createElement("div");
-  div.textContent = text;
-  return div.innerHTML;
-}
-function viewSuffixForItem(item) {
-  const view = document.getElementById("openView").value;
-  if (!view || view === "summary") return "";
-  const params = new URLSearchParams();
-  params.set("view", view);
-  if (view === "split") {
-    params.set("left", "summary");
-    if (item.has_pdf) {
-      params.set("right", "pdfjs");
-    } else if (item.has_source) {
-      params.set("right", "source");
-    } else {
-      params.set("right", "summary");
-    }
-  }
-  return `?${params.toString()}`;
-}
+<button id="backToTop" class="back-to-top" title="Back to top">↑</button>
+"""
+    outline_css = f"""
+<style>
+:root {{
+  --outline-top: {outline_top};
+}}
+.outline-toggle {{
+  position: fixed;
+  top: var(--outline-top);
+  left: 16px;
+  z-index: 20;
+  padding: 6px 10px;
+  border-radius: 8px;
+  border: 1px solid #d0d7de;
+  background: #f6f8fa;
+  cursor: pointer;
+}}
+.outline-panel {{
+  position: fixed;
+  top: calc(var(--outline-top) + 42px);
+  left: 16px;
+  width: 240px;
+  max-height: 60vh;
+  overflow: auto;
+  border: 1px solid #d0d7de;
+  border-radius: 10px;
+  background: #ffffff;
+  padding: 10px;
+  z-index: 20;
+  box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
+}}
+.outline-panel.collapsed {{
+  display: none;
+}}
+.outline-title {{
+  font-size: 12px;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: #57606a;
+  margin-bottom: 8px;
+}}
+.outline-list a {{
+  display: block;
+  color: #0969da;
+  text-decoration: none;
+  padding: 4px 0;
+}}
+.outline-list a:hover {{
+  text-decoration: underline;
+}}
+.back-to-top {{
+  position: fixed;
+  left: 16px;
+  bottom: 16px;
+  padding: 6px 10px;
+  border-radius: 999px;
+  border: 1px solid #d0d7de;
+  background: #ffffff;
+  cursor: pointer;
+  opacity: 0;
+  pointer-events: none;
+  transition: opacity 0.2s ease;
+  z-index: 20;
+}}
+.back-to-top.visible {{
+  opacity: 1;
+  pointer-events: auto;
+}}
+@media (max-width: 900px) {{
+  .outline-panel {{
+    width: 200px;
+  }}
+}}
+</style>
+"""
+    outline_js = """
+const outlineToggle = document.getElementById('outlineToggle');
+const outlinePanel = document.getElementById('outlinePanel');
+const outlineList = document.getElementById('outlineList');
+const backToTop = document.getElementById('backToTop');
+function slugify(text) {
+  return text.toLowerCase().trim()
+    .replace(/[^a-z0-9\\s-]/g, '')
+    .replace(/\\s+/g, '-')
+    .replace(/-+/g, '-');
+}
+function buildOutline() {
+  if (!outlineList) return;
+  const content = document.getElementById('content');
+  if (!content) return;
+  const headings = content.querySelectorAll('h1, h2, h3, h4');
+  if (!headings.length) {
+    outlineList.innerHTML = '<div class="muted">No headings</div>';
+    return;
+  }
+  const used = new Set();
+  outlineList.innerHTML = '';
+  headings.forEach((heading) => {
+    let id = heading.id;
+    if (!id) {
+      const base = slugify(heading.textContent || 'section') || 'section';
+      id = base;
+      let i = 1;
+      while (used.has(id) || document.getElementById(id)) {
+        id = `${base}-${i++}`;
+      }
+      heading.id = id;
+    }
+    used.add(id);
+    const level = parseInt(heading.tagName.slice(1), 10) || 1;
+    const link = document.createElement('a');
+    link.href = `#${id}`;
+    link.textContent = heading.textContent || '';
+    link.style.paddingLeft = `${(level - 1) * 12}px`;
+    outlineList.appendChild(link);
+  });
+}
+function toggleBackToTop() {
+  if (!backToTop) return;
+  if (window.scrollY > 300) {
+    backToTop.classList.add('visible');
+  } else {
+    backToTop.classList.remove('visible');
+  }
+}
+if (outlineToggle && outlinePanel) {
+  outlineToggle.addEventListener('click', () => {
+    outlinePanel.classList.toggle('collapsed');
+  });
+}
+if (backToTop) {
+  backToTop.addEventListener('click', () => {
+    window.scrollTo({ top: 0, behavior: 'smooth' });
+  });
+}
+buildOutline();
+window.addEventListener('scroll', toggleBackToTop);
+toggleBackToTop();
+"""
+    return outline_html, outline_css, outline_js
+async def _index_page(request: Request) -> HTMLResponse:
+    index: PaperIndex = request.app.state.index
+    template_options = "".join(
+        f'<option value="{html.escape(tag)}">{html.escape(tag)}</option>'
+        for tag in index.template_tags
+    )
+    if not template_options:
+        template_options = '<option value="" disabled>(no templates)</option>'
+    filter_help = (
+        "Filters syntax:\\n"
+        "pdf:yes|no source:yes|no summary:yes|no\\n"
+        "tmpl:<tag> or template:<tag>\\n"
+        "has:pdf / no:source aliases\\n"
+        "Content tags still use the search box (tag:fpga)."
+    )
+    filter_help_attr = html.escape(filter_help).replace("\n", "&#10;")
+    body_html = """
+<h2>Paper Database</h2>
+<div class="card">
+  <div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
+  <div style="display:flex; gap:8px; margin-top:8px;">
+    <input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
+    <select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
+      <option value="summary" selected>Open: Summary</option>
+      <option value="source">Open: Source</option>
+      <option value="pdf">Open: PDF</option>
+      <option value="pdfjs">Open: PDF Viewer</option>
+      <option value="split">Open: Split</option>
+    </select>
+  </div>
+  <div class="filters" style="grid-template-columns: repeat(4, 1fr); margin-top:10px;">
+    <div class="filter-group">
+      <label>PDF</label>
+      <select id="filterPdf" multiple size="2">
+        <option value="with">With</option>
+        <option value="without">Without</option>
+      </select>
+    </div>
+    <div class="filter-group">
+      <label>Source</label>
+      <select id="filterSource" multiple size="2">
+        <option value="with">With</option>
+        <option value="without">Without</option>
+      </select>
+    </div>
+    <div class="filter-group">
+      <label>Summary</label>
+      <select id="filterSummary" multiple size="2">
+        <option value="with">With</option>
+        <option value="without">Without</option>
+      </select>
+    </div>
+    <div class="filter-group">
+      <label>Template</label>
+      <select id="filterTemplate" multiple size="4">
+        __TEMPLATE_OPTIONS__
+      </select>
+    </div>
+  </div>
+  <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
+    <input id="filterQuery" placeholder='Filters... e.g. pdf:yes tmpl:simple' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
+    <span class="help-icon" data-tip="__FILTER_HELP__">?</span>
+  </div>
+  <details style="margin-top:10px;">
+    <summary>Advanced search</summary>
+    <div style="margin-top:10px;" class="muted">Build a query:</div>
+    <div class="filters" style="grid-template-columns: repeat(3, 1fr);">
+      <input id="advTitle" placeholder="title contains..." />
+      <input id="advAuthor" placeholder="author contains..." />
+      <input id="advTag" placeholder="tag (comma separated)" />
+      <input id="advYear" placeholder="year (e.g. 2020..2024)" />
+      <input id="advMonth" placeholder="month (01-12)" />
+      <input id="advVenue" placeholder="venue contains..." />
+    </div>
+    <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
+      <button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
+      <div class="muted">Generated: <code id="generated"></code></div>
+    </div>
+  </details>
+</div>
+<div id="stats" class="stats">
+  <div id="statsTotal" class="stats-row"></div>
+  <div id="statsFiltered" class="stats-row" style="margin-top:6px;"></div>
+</div>
+<div id="results"></div>
+<div id="loading" class="muted">Loading...</div>
+<script>
+let page = 1;
+let loading = false;
+let done = false;
+function currentParams(nextPage) {
+  const params = new URLSearchParams();
+  params.set("page", String(nextPage));
+  params.set("page_size", "30");
+  const q = document.getElementById("query").value.trim();
+  if (q) params.set("q", q);
+  const fq = document.getElementById("filterQuery").value.trim();
+  if (fq) params.set("fq", fq);
+  function addMulti(id, key) {
+    const el = document.getElementById(id);
+    const values = Array.from(el.selectedOptions).map(opt => opt.value).filter(Boolean);
+    for (const value of values) {
+      params.append(key, value);
+    }
+  }
+  addMulti("filterPdf", "pdf");
+  addMulti("filterSource", "source");
+  addMulti("filterSummary", "summary");
+  addMulti("filterTemplate", "template");
+  return params;
+}
+function escapeHtml(text) {
+  const div = document.createElement("div");
+  div.textContent = text;
+  return div.innerHTML;
+}
+function viewSuffixForItem(item) {
+  let view = document.getElementById("openView").value;
+  const isPdfOnly = item.is_pdf_only;
+  const pdfFallback = item.has_pdf ? "pdfjs" : "pdf";
+  if (isPdfOnly && (view === "summary" || view === "source")) {
+    view = pdfFallback;
+  }
+  if (!view || view === "summary") return "";
+  const params = new URLSearchParams();
+  params.set("view", view);
+  if (view === "split") {
+    if (isPdfOnly) {
+      params.set("left", pdfFallback);
+      params.set("right", pdfFallback);
+    } else {
+      params.set("left", "summary");
+      if (item.has_pdf) {
+        params.set("right", "pdfjs");
+      } else if (item.has_source) {
+        params.set("right", "source");
+      } else {
+        params.set("right", "summary");
+      }
+    }
+  }
+  return `?${params.toString()}`;
+}
 function renderItem(item) {
   const tags = (item.tags || []).map(t => `<span class="pill">${escapeHtml(t)}</span>`).join("");
+  const templateTags = (item.template_tags || []).map(t => `<span class="pill template">tmpl:${escapeHtml(t)}</span>`).join("");
   const authors = (item.authors || []).slice(0, 6).map(a => escapeHtml(a)).join(", ");
   const meta = `${escapeHtml(item.year || "")}-${escapeHtml(item.month || "")} · ${escapeHtml(item.venue || "")}`;
   const viewSuffix = viewSuffixForItem(item);
   const badges = [
     item.has_source ? `<span class="pill">source</span>` : "",
     item.has_pdf ? `<span class="pill">pdf</span>` : "",
+    item.is_pdf_only ? `<span class="pill pdf-only">pdf-only</span>` : "",
   ].join("");
   return `
     <div class="card">
       <div><a href="/paper/${encodeURIComponent(item.source_hash)}${viewSuffix}">${escapeHtml(item.title || "")}</a></div>
       <div class="muted">${authors}</div>
       <div class="muted">${meta}</div>
-      <div style="margin-top:6px">${badges} ${tags}</div>
+      <div style="margin-top:6px">${badges} ${templateTags} ${tags}</div>
     </div>
   `;
 }
+function renderStatsRow(targetId, label, counts) {
+  const row = document.getElementById(targetId);
+  if (!row || !counts) return;
+  const pills = [];
+  pills.push(`<span class="stats-label">${escapeHtml(label)}</span>`);
+  pills.push(`<span class="pill stat">Count ${counts.total}</span>`);
+  pills.push(`<span class="pill stat">PDF ${counts.pdf}</span>`);
+  pills.push(`<span class="pill stat">Source ${counts.source}</span>`);
+  pills.push(`<span class="pill stat">Summary ${counts.summary}</span>`);
+  const order = counts.template_order || Object.keys(counts.templates || {});
+  for (const tag of order) {
+    const count = (counts.templates && counts.templates[tag]) || 0;
+    pills.push(`<span class="pill stat">tmpl:${escapeHtml(tag)} ${count}</span>`);
+  }
+  row.innerHTML = pills.join("");
+}
+function updateStats(stats) {
+  if (!stats) return;
+  renderStatsRow("statsTotal", "Total", stats.all);
+  renderStatsRow("statsFiltered", "Filtered", stats.filtered);
+}
 async function loadMore() {
   if (loading || done) return;
   loading = true;
   document.getElementById("loading").textContent = "Loading...";
   const res = await fetch(`/api/papers?${currentParams(page).toString()}`);
   const data = await res.json();
+  if (data.stats) {
+    updateStats(data.stats);
+  }
   const results = document.getElementById("results");
   for (const item of data.items) {
     results.insertAdjacentHTML("beforeend", renderItem(item));
@@ -1376,6 +2448,11 @@ function resetAndLoad() {
 document.getElementById("query").addEventListener("change", resetAndLoad);
 document.getElementById("openView").addEventListener("change", resetAndLoad);
+document.getElementById("filterQuery").addEventListener("change", resetAndLoad);
+document.getElementById("filterPdf").addEventListener("change", resetAndLoad);
+document.getElementById("filterSource").addEventListener("change", resetAndLoad);
+document.getElementById("filterSummary").addEventListener("change", resetAndLoad);
+document.getElementById("filterTemplate").addEventListener("change", resetAndLoad);
 document.getElementById("buildQuery").addEventListener("click", () => {
   function add(field, value) {
@@ -1416,9 +2493,10 @@ window.addEventListener("scroll", () => {
 loadMore();
 </script>
-""",
-        )
-    )
+"""
+    body_html = body_html.replace("__TEMPLATE_OPTIONS__", template_options)
+    body_html = body_html.replace("__FILTER_HELP__", filter_help_attr)
+    return HTMLResponse(_page_shell("Paper DB", body_html))
 def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
@@ -1429,11 +2507,21 @@ def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
     page_size = min(max(1, page_size), 200)
     q = qp.get("q", "").strip()
+    filter_query = qp.get("fq", "").strip()
+    pdf_filters = [item for item in qp.getlist("pdf") if item]
+    source_filters = [item for item in qp.getlist("source") if item]
+    summary_filters = [item for item in qp.getlist("summary") if item]
+    template_filters = [item for item in qp.getlist("template") if item]
     return {
         "page": page,
         "page_size": page_size,
         "q": q,
+        "filter_query": filter_query,
+        "pdf": pdf_filters,
+        "source": source_filters,
+        "summary": summary_filters,
+        "template": template_filters,
     }
@@ -1443,13 +2531,55 @@ async def _api_papers(request: Request) -> JSONResponse:
     page = int(filters["page"])
     page_size = int(filters["page_size"])
     q = str(filters["q"])
+    filter_query = str(filters["filter_query"])
     query = parse_query(q)
     candidate = _apply_query(index, query)
+    filter_terms = _parse_filter_query(filter_query)
+    pdf_filter = _merge_filter_set(_presence_filter(filters["pdf"]), _presence_filter(list(filter_terms["pdf"])))
+    source_filter = _merge_filter_set(
+        _presence_filter(filters["source"]), _presence_filter(list(filter_terms["source"]))
+    )
+    summary_filter = _merge_filter_set(
+        _presence_filter(filters["summary"]), _presence_filter(list(filter_terms["summary"]))
+    )
+    template_selected = {item.lower() for item in filters["template"] if item}
+    template_filter = _merge_filter_set(
+        template_selected or None,
+        filter_terms["template"] or None,
+    )
+    if candidate:
+        filtered: set[int] = set()
+        for idx in candidate:
+            paper = index.papers[idx]
+            source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
+            has_source = source_hash in index.md_path_by_hash
+            has_pdf = source_hash in index.pdf_path_by_hash
+            has_summary = bool(paper.get("_has_summary"))
+            if not _matches_presence(pdf_filter, has_pdf):
+                continue
+            if not _matches_presence(source_filter, has_source):
+                continue
+            if not _matches_presence(summary_filter, has_summary):
+                continue
+            if template_filter:
+                tags = paper.get("_template_tags_lc") or []
+                if not any(tag in template_filter for tag in tags):
+                    continue
+            filtered.add(idx)
+        candidate = filtered
     ordered = [idx for idx in index.ordered_ids if idx in candidate]
     total = len(ordered)
     start = (page - 1) * page_size
     end = min(start + page_size, total)
     page_ids = ordered[start:end]
+    stats_payload = None
+    if page == 1:
+        all_ids = set(index.ordered_ids)
+        stats_payload = {
+            "all": _compute_counts(index, all_ids),
+            "filtered": _compute_counts(index, candidate),
+        }
     items: list[dict[str, Any]] = []
     for idx in page_ids:
@@ -1464,8 +2594,11 @@ async def _api_papers(request: Request) -> JSONResponse:
                 "month": paper.get("_month") or "",
                 "venue": paper.get("_venue") or "",
                 "tags": paper.get("_tags") or [],
+                "template_tags": paper.get("_template_tags") or [],
                 "has_source": source_hash in index.md_path_by_hash,
                 "has_pdf": source_hash in index.pdf_path_by_hash,
+                "has_summary": bool(paper.get("_has_summary")),
+                "is_pdf_only": bool(paper.get("_is_pdf_only")),
             }
         )
@@ -1476,6 +2609,7 @@ async def _api_papers(request: Request) -> JSONResponse:
             "total": total,
             "has_more": end < total,
             "items": items,
+            "stats": stats_payload,
         }
     )
@@ -1488,28 +2622,45 @@ async def _paper_detail(request: Request) -> HTMLResponse:
     if idx is None:
         return RedirectResponse("/")
     paper = index.papers[idx]
-    view = request.query_params.get("view", "summary")
+    is_pdf_only = bool(paper.get("_is_pdf_only"))
+    page_title = str(paper.get("paper_title") or "Paper")
+    view = request.query_params.get("view")
     template_param = request.query_params.get("template")
     embed = request.query_params.get("embed") == "1"
-    if view == "split":
-        embed = False
     pdf_path = index.pdf_path_by_hash.get(source_hash)
     pdf_url = f"/api/pdf/{source_hash}"
-    shell = _embed_shell if embed else _page_shell
     source_available = source_hash in index.md_path_by_hash
-    allowed_views = {"summary", "source", "pdf", "pdfjs"}
+    allowed_views = {"summary", "source", "pdf", "pdfjs", "split"}
+    if is_pdf_only:
+        allowed_views = {"pdf", "pdfjs", "split"}
     def normalize_view(value: str | None, default: str) -> str:
         if value in allowed_views:
             return value
         return default
-    default_right = "pdfjs" if pdf_path else ("source" if source_available else "summary")
-    left_param = request.query_params.get("left")
-    right_param = request.query_params.get("right")
-    left = normalize_view(left_param, "summary") if left_param else "summary"
-    right = normalize_view(right_param, default_right) if right_param else default_right
+    preferred_pdf_view = "pdfjs" if pdf_path else "pdf"
+    default_view = preferred_pdf_view if is_pdf_only else "summary"
+    view = normalize_view(view, default_view)
+    if view == "split":
+        embed = False
+    if is_pdf_only:
+        left_param = request.query_params.get("left")
+        right_param = request.query_params.get("right")
+        left = normalize_view(left_param, preferred_pdf_view) if left_param else preferred_pdf_view
+        right = normalize_view(right_param, preferred_pdf_view) if right_param else preferred_pdf_view
+    else:
+        default_right = "pdfjs" if pdf_path else ("source" if source_available else "summary")
+        left_param = request.query_params.get("left")
+        right_param = request.query_params.get("right")
+        left = normalize_view(left_param, "summary") if left_param else "summary"
+        right = normalize_view(right_param, default_right) if right_param else default_right
+    def render_page(title: str, body: str, extra_head: str = "", extra_scripts: str = "") -> HTMLResponse:
+        if embed:
+            return HTMLResponse(_embed_shell(title, body, extra_head, extra_scripts))
+        return HTMLResponse(_page_shell(title, body, extra_head, extra_scripts, header_title=page_title))
     def nav_link(label: str, v: str) -> str:
         active = " active" if view == v else ""
@@ -1522,16 +2673,83 @@ async def _paper_detail(request: Request) -> HTMLResponse:
         href = f"/paper/{source_hash}?{urlencode(params)}"
         return f'<a class="tab{active}" href="{html.escape(href)}">{html.escape(label)}</a>'
-    nav = f"""
-<div class="tabs" style="margin: 8px 0 14px;">
-  {nav_link("Summary", "summary")}
-  {nav_link("Source", "source")}
-  {nav_link("PDF", "pdf")}
-  {nav_link("PDF Viewer", "pdfjs")}
-  {nav_link("Split", "split")}
+    tab_defs = [
+        ("Summary", "summary"),
+        ("Source", "source"),
+        ("PDF", "pdf"),
+        ("PDF Viewer", "pdfjs"),
+        ("Split", "split"),
+    ]
+    if is_pdf_only:
+        tab_defs = [
+            ("PDF", "pdf"),
+            ("PDF Viewer", "pdfjs"),
+            ("Split", "split"),
+        ]
+    tabs_html = '<div class="tabs">' + "".join(nav_link(label, v) for label, v in tab_defs) + "</div>"
+    fullscreen_controls = """
+<div class="fullscreen-actions">
+  <button id="fullscreenEnter" class="fullscreen-enter" type="button" title="Enter fullscreen">Fullscreen</button>
+  <button id="fullscreenExit" class="fullscreen-exit" type="button" title="Exit fullscreen">Exit Fullscreen</button>
+</div>
+"""
+    def detail_toolbar(extra_controls: str = "") -> str:
+        if embed:
+            return ""
+        controls = extra_controls.strip()
+        toolbar_controls = f"{controls}{fullscreen_controls}" if controls else fullscreen_controls
+        return f"""
+<div class="detail-toolbar">
+  {tabs_html}
+  <div class="toolbar-actions">
+    {toolbar_controls}
+  </div>
+</div>
+"""
+    def wrap_detail(content: str, toolbar_html: str | None = None) -> str:
+        if embed:
+            return content
+        toolbar = detail_toolbar() if toolbar_html is None else toolbar_html
+        return f"""
+<div class="detail-shell">
+  {toolbar}
+  <div class="detail-body">
+    {content}
+  </div>
 </div>
 """
-    nav_html = "" if embed else nav
+    fullscreen_script = ""
+    if not embed:
+        fullscreen_script = """
+<script>
+const fullscreenEnter = document.getElementById('fullscreenEnter');
+const fullscreenExit = document.getElementById('fullscreenExit');
+function setFullscreen(enable) {
+  document.body.classList.toggle('detail-fullscreen', enable);
+}
+if (fullscreenEnter) {
+  fullscreenEnter.addEventListener('click', () => setFullscreen(true));
+}
+if (fullscreenExit) {
+  fullscreenExit.addEventListener('click', () => setFullscreen(false));
+}
+document.addEventListener('keydown', (event) => {
+  if (event.key === 'Escape' && document.body.classList.contains('detail-fullscreen')) {
+    setFullscreen(false);
+  }
+});
+</script>
+"""
+    pdf_only_warning_html = ""
+    if is_pdf_only:
+        pdf_only_warning_html = (
+            '<div class="warning">PDF-only entry: summary and source views are unavailable.</div>'
+        )
+    outline_top = "72px" if not embed else "16px"
+    outline_html, outline_css, outline_js = _outline_assets(outline_top)
     if view == "split":
         def pane_src(pane_view: str) -> str:
@@ -1550,6 +2768,11 @@ async def _paper_detail(request: Request) -> HTMLResponse:
             ("pdf", "PDF"),
             ("pdfjs", "PDF Viewer"),
         ]
+        if is_pdf_only:
+            options = [
+                ("pdf", "PDF"),
+                ("pdfjs", "PDF Viewer"),
+            ]
         left_options = "\n".join(
             f'<option value="{value}"{" selected" if value == left else ""}>{label}</option>'
             for value, label in options
@@ -1558,28 +2781,26 @@ async def _paper_detail(request: Request) -> HTMLResponse:
             f'<option value="{value}"{" selected" if value == right else ""}>{label}</option>'
             for value, label in options
         )
-        body = f"""
-<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>
-{nav}
-<div class="split-controls">
-  <div>
-    <div class="muted">Left pane</div>
-    <select id="splitLeft">
-      {left_options}
-    </select>
-  </div>
+        split_controls = f"""
+<div class="split-inline">
+  <span class="muted">Left</span>
+  <select id="splitLeft">
+    {left_options}
+  </select>
   <div class="split-actions">
     <button id="splitTighten" type="button" title="Tighten width">-</button>
     <button id="splitSwap" type="button" title="Swap panes">⇄</button>
     <button id="splitWiden" type="button" title="Widen width">+</button>
   </div>
-  <div>
-    <div class="muted">Right pane</div>
-    <select id="splitRight">
-      {right_options}
-    </select>
-  </div>
+  <span class="muted">Right</span>
+  <select id="splitRight">
+    {right_options}
+  </select>
 </div>
+"""
+        toolbar_html = detail_toolbar(split_controls)
+        split_layout = f"""
+{pdf_only_warning_html}
 <div class="split-layout">
   <div class="split-pane">
     <iframe id="leftPane" src="{html.escape(left_src)}" title="Left pane"></iframe>
@@ -1589,6 +2810,7 @@ async def _paper_detail(request: Request) -> HTMLResponse:
   </div>
 </div>
 """
+        body = wrap_detail(split_layout, toolbar_html=toolbar_html)
         extra_head = """
 <style>
 .container {
@@ -1596,43 +2818,14 @@ async def _paper_detail(request: Request) -> HTMLResponse:
   width: 100%;
   margin: 0 auto;
 }
-.split-controls {
-  display: grid;
-  grid-template-columns: 1fr auto 1fr;
-  gap: 12px;
-  align-items: end;
-  margin: 10px 0 14px;
-}
-.split-controls select {
-  padding: 6px 8px;
-  border-radius: 8px;
-  border: 1px solid #d0d7de;
-  background: #fff;
-  min-width: 160px;
-}
-.split-actions {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  gap: 8px;
-  height: 100%;
-}
-.split-actions button {
-  padding: 6px 10px;
-  border-radius: 999px;
-  border: 1px solid #d0d7de;
-  background: #f6f8fa;
-  cursor: pointer;
-  min-width: 36px;
-}
 .split-layout {
   display: flex;
   gap: 12px;
   width: 100%;
-  max-width: min(100%, var(--split-max-width, 100%));
+  max-width: var(--split-max-width, 100%);
   margin: 0 auto;
-  height: calc(100vh - 260px);
-  min-height: 420px;
+  flex: 1;
+  min-height: 440px;
 }
 .split-pane {
   flex: 1;
@@ -1649,14 +2842,11 @@ async def _paper_detail(request: Request) -> HTMLResponse:
 @media (max-width: 900px) {
   .split-layout {
     flex-direction: column;
-    height: auto;
+    min-height: 0;
   }
   .split-pane {
     height: 70vh;
   }
-  .split-controls {
-    grid-template-columns: 1fr;
-  }
 }
 </style>
 """
@@ -1717,28 +2907,46 @@ widenButton.addEventListener('click', () => {
 applySplitWidth();
 </script>
 """
-        return HTMLResponse(_page_shell("Split View", body, extra_head=extra_head, extra_scripts=extra_scripts))
+        return render_page(
+            "Split View",
+            body,
+            extra_head=extra_head,
+            extra_scripts=extra_scripts + fullscreen_script,
+        )
     if view == "source":
         source_path = index.md_path_by_hash.get(source_hash)
         if not source_path:
-            body = nav_html + '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
-            return HTMLResponse(shell("Source", body))
+            body = wrap_detail(
+                '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
+            )
+            return render_page("Source", body, extra_scripts=fullscreen_script)
         try:
             raw = source_path.read_text(encoding="utf-8")
         except UnicodeDecodeError:
             raw = source_path.read_text(encoding="latin-1")
         rendered = _render_markdown_with_math_placeholders(md, raw)
-        body = (
-            nav_html
-            + f"<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>"
-            + f'<div class="muted">{html.escape(str(source_path))}</div>'
-            + '<div class="muted" style="margin-top:10px;">Rendered from source markdown:</div>'
-            + f'<div id="content">{rendered}</div>'
-            + "<details style='margin-top:12px;'><summary>Raw markdown</summary>"
-            + f"<pre><code>{html.escape(raw)}</code></pre></details>"
+        body = wrap_detail(
+            f"""
+<div class="muted">{html.escape(str(source_path))}</div>
+<div class="muted" style="margin-top:10px;">Rendered from source markdown:</div>
+{outline_html}
+<div id="content">{rendered}</div>
+<details style="margin-top:12px;"><summary>Raw markdown</summary>
+  <pre><code>{html.escape(raw)}</code></pre>
+</details>
+"""
         )
-        extra_head = f'<link rel="stylesheet" href="{_CDN_KATEX}" />'
+        extra_head = f"""
+<link rel="stylesheet" href="{_CDN_KATEX}" />
+{outline_css}
+<style>
+#content img {{
+  max-width: 100%;
+  height: auto;
+}}
+</style>
+"""
         extra_scripts = f"""
 <script src="{_CDN_MERMAID}"></script>
 <script src="{_CDN_KATEX_JS}"></script>
@@ -1766,16 +2974,18 @@ if (window.renderMathInElement) {{
     throwOnError: false
   }});
 }}
+{outline_js}
 </script>
 """
-        return HTMLResponse(shell("Source", body, extra_head=extra_head, extra_scripts=extra_scripts))
+        return render_page("Source", body, extra_head=extra_head, extra_scripts=extra_scripts + fullscreen_script)
     if view == "pdf":
         if not pdf_path:
-            body = nav_html + '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
-            return HTMLResponse(shell("PDF", body))
-        body = nav_html + f"""
-<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>
+            body = wrap_detail('<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>')
+            return render_page("PDF", body, extra_scripts=fullscreen_script)
+        body = wrap_detail(
+            f"""
+{pdf_only_warning_html}
 <div class="muted">{html.escape(str(pdf_path.name))}</div>
 <div style="display:flex; gap:8px; align-items:center; margin: 10px 0;">
   <button id="prev" style="padding:6px 10px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Prev</button>
@@ -1787,6 +2997,7 @@ if (window.renderMathInElement) {{
 </div>
 <canvas id="the-canvas" style="width: 100%; border: 1px solid #d0d7de; border-radius: 10px;"></canvas>
 """
+        )
         extra_scripts = f"""
 <script src="{_CDN_PDFJS}"></script>
 <script>
@@ -1875,25 +3086,21 @@ window.addEventListener('resize', () => {{
 }});
 </script>
 """
-        return HTMLResponse(shell("PDF", body, extra_scripts=extra_scripts))
+        return render_page("PDF", body, extra_scripts=extra_scripts + fullscreen_script)
     if view == "pdfjs":
         if not pdf_path:
-            body = nav_html + '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
-            return HTMLResponse(shell("PDF Viewer", body))
+            body = wrap_detail('<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>')
+            return render_page("PDF Viewer", body, extra_scripts=fullscreen_script)
         viewer_url = _build_pdfjs_viewer_url(pdf_url)
-        header_html = ""
-        if not embed:
-            header_html = (
-                f"<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>"
-                + f'<div class="muted">{html.escape(str(pdf_path.name))}</div>'
-            )
-        frame_height = "calc(100vh - 220px)" if not embed else "calc(100vh - 32px)"
-        body = f"""
-{nav_html}
-{header_html}
+        frame_height = "calc(100vh - 32px)" if embed else "100%"
+        body = wrap_detail(
+            f"""
+{pdf_only_warning_html}
+<div class="muted">{html.escape(str(pdf_path.name))}</div>
 <iframe class="pdfjs-frame" src="{html.escape(viewer_url)}" title="PDF.js Viewer"></iframe>
 """
+        )
         extra_head = f"""
 <style>
 .pdfjs-frame {{
@@ -1901,10 +3108,11 @@ window.addEventListener('resize', () => {{
   height: {frame_height};
   border: 1px solid #d0d7de;
   border-radius: 10px;
+  flex: 1;
 }}
 </style>
 """
-        return HTMLResponse(shell("PDF Viewer", body, extra_head=extra_head))
+        return render_page("PDF Viewer", body, extra_head=extra_head, extra_scripts=fullscreen_script)
     selected_tag, available_templates = _select_template_tag(paper, template_param)
     markdown, template_name, warning = _render_paper_markdown(
@@ -1915,8 +3123,6 @@ window.addEventListener('resize', () => {{
     rendered_html = _render_markdown_with_math_placeholders(md, markdown)
     warning_html = f'<div class="warning">{html.escape(warning)}</div>' if warning else ""
-    title = str(paper.get("paper_title") or "Paper")
-    outline_top = "72px" if not embed else "16px"
     template_controls = f'<div class="muted">Template: {html.escape(template_name)}</div>'
     if available_templates:
         options = "\n".join(
@@ -1942,97 +3148,17 @@ if (templateSelect) {{
 }}
 </script>
 """
-    outline_html = """
-<button id="outlineToggle" class="outline-toggle" title="Toggle outline">☰</button>
-<div id="outlinePanel" class="outline-panel collapsed">
-  <div class="outline-title">Outline</div>
-  <div id="outlineList" class="outline-list"></div>
-</div>
-<button id="backToTop" class="back-to-top" title="Back to top">↑</button>
-"""
-    body = f"""
-<h2>{html.escape(title)}</h2>
+    content_html = f"""
 {template_controls}
 {warning_html}
-{nav_html}
 {outline_html}
 <div id="content">{rendered_html}</div>
 """
+    body = wrap_detail(content_html)
     extra_head = f"""
 <link rel="stylesheet" href="{_CDN_KATEX}" />
-<style>
-:root {{
-  --outline-top: {outline_top};
-}}
-.outline-toggle {{
-  position: fixed;
-  top: var(--outline-top);
-  left: 16px;
-  z-index: 20;
-  padding: 6px 10px;
-  border-radius: 8px;
-  border: 1px solid #d0d7de;
-  background: #f6f8fa;
-  cursor: pointer;
-}}
-.outline-panel {{
-  position: fixed;
-  top: calc(var(--outline-top) + 42px);
-  left: 16px;
-  width: 240px;
-  max-height: 60vh;
-  overflow: auto;
-  border: 1px solid #d0d7de;
-  border-radius: 10px;
-  background: #ffffff;
-  padding: 10px;
-  z-index: 20;
-  box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
-}}
-.outline-panel.collapsed {{
-  display: none;
-}}
-.outline-title {{
-  font-size: 12px;
-  text-transform: uppercase;
-  letter-spacing: 0.08em;
-  color: #57606a;
-  margin-bottom: 8px;
-}}
-.outline-list a {{
-  display: block;
-  color: #0969da;
-  text-decoration: none;
-  padding: 4px 0;
-}}
-.outline-list a:hover {{
-  text-decoration: underline;
-}}
-.back-to-top {{
-  position: fixed;
-  left: 16px;
-  bottom: 16px;
-  padding: 6px 10px;
-  border-radius: 999px;
-  border: 1px solid #d0d7de;
-  background: #ffffff;
-  cursor: pointer;
-  opacity: 0;
-  pointer-events: none;
-  transition: opacity 0.2s ease;
-  z-index: 20;
-}}
-.back-to-top.visible {{
-  opacity: 1;
-  pointer-events: auto;
-}}
-@media (max-width: 900px) {{
-  .outline-panel {{
-    width: 200px;
-  }}
-}}
-</style>
+{outline_css}
 """
     extra_scripts = f"""
 <script src="{_CDN_MERMAID}"></script>
@@ -2062,77 +3188,10 @@ if (window.renderMathInElement) {{
     throwOnError: false
   }});
 }}
-const outlineToggle = document.getElementById('outlineToggle');
-const outlinePanel = document.getElementById('outlinePanel');
-const outlineList = document.getElementById('outlineList');
-const backToTop = document.getElementById('backToTop');
-function slugify(text) {{
-  return text.toLowerCase().trim()
-    .replace(/[^a-z0-9\\s-]/g, '')
-    .replace(/\\s+/g, '-')
-    .replace(/-+/g, '-');
-}}
-function buildOutline() {{
-  if (!outlineList) return;
-  const content = document.getElementById('content');
-  if (!content) return;
-  const headings = content.querySelectorAll('h1, h2, h3, h4');
-  if (!headings.length) {{
-    outlineList.innerHTML = '<div class="muted">No headings</div>';
-    return;
-  }}
-  const used = new Set();
-  outlineList.innerHTML = '';
-  headings.forEach((heading) => {{
-    let id = heading.id;
-    if (!id) {{
-      const base = slugify(heading.textContent || 'section') || 'section';
-      id = base;
-      let i = 1;
-      while (used.has(id) || document.getElementById(id)) {{
-        id = `${{base}}-${{i++}}`;
-      }}
-      heading.id = id;
-    }}
-    used.add(id);
-    const level = parseInt(heading.tagName.slice(1), 10) || 1;
-    const link = document.createElement('a');
-    link.href = `#${{id}}`;
-    link.textContent = heading.textContent || '';
-    link.style.paddingLeft = `${{(level - 1) * 12}}px`;
-    outlineList.appendChild(link);
-  }});
-}}
-function toggleBackToTop() {{
-  if (!backToTop) return;
-  if (window.scrollY > 300) {{
-    backToTop.classList.add('visible');
-  }} else {{
-    backToTop.classList.remove('visible');
-  }}
-}}
-if (outlineToggle && outlinePanel) {{
-  outlineToggle.addEventListener('click', () => {{
-    outlinePanel.classList.toggle('collapsed');
-  }});
-}}
-if (backToTop) {{
-  backToTop.addEventListener('click', () => {{
-    window.scrollTo({{ top: 0, behavior: 'smooth' }});
-  }});
-}}
-buildOutline();
-window.addEventListener('scroll', toggleBackToTop);
-toggleBackToTop();
+{outline_js}
 </script>
 """
-    return HTMLResponse(shell(title, body, extra_head=extra_head, extra_scripts=extra_scripts))
+    return render_page(page_title, body, extra_head=extra_head, extra_scripts=extra_scripts + fullscreen_script)
 async def _api_stats(request: Request) -> JSONResponse:
@@ -2159,6 +3218,7 @@ async def _stats_page(request: Request) -> HTMLResponse:
 <div id="year" style="width:100%;height:360px"></div>
 <div id="month" style="width:100%;height:360px"></div>
 <div id="tags" style="width:100%;height:420px"></div>
+<div id="keywords" style="width:100%;height:420px"></div>
 <div id="authors" style="width:100%;height:420px"></div>
 <div id="venues" style="width:100%;height:420px"></div>
 """
@@ -2185,6 +3245,7 @@ async function main() {{
   bar('year', 'Publication Year', data.years || []);
   bar('month', 'Publication Month', data.months || []);
   bar('tags', 'Top Tags', (data.tags || []).slice(0, 20));
+  bar('keywords', 'Top Keywords', (data.keywords || []).slice(0, 20));
   bar('authors', 'Top Authors', (data.authors || []).slice(0, 20));
   bar('venues', 'Top Venues', (data.venues || []).slice(0, 20));
 }}
@@ -2272,7 +3333,7 @@ def create_app(
     cache_dir: Path | None = None,
     use_cache: bool = True,
 ) -> Starlette:
-    papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache)
+    papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache, pdf_roots=pdf_roots)
     md_roots = md_roots or []
     pdf_roots = pdf_roots or []

deepresearch-flow 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

deepresearch-flow 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl