deepresearch-flow 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import html
4
4
  import json
5
5
  import logging
6
+ import unicodedata
6
7
  from dataclasses import dataclass
7
8
  from html.parser import HTMLParser
8
9
  from pathlib import Path
@@ -32,6 +33,12 @@ try:
32
33
  except Exception:
33
34
  PYBTEX_AVAILABLE = False
34
35
 
36
+ try:
37
+ from pypdf import PdfReader
38
+ PYPDF_AVAILABLE = True
39
+ except Exception:
40
+ PYPDF_AVAILABLE = False
41
+
35
42
 
36
43
  _CDN_ECHARTS = "https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"
37
44
  _CDN_MERMAID = "https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"
@@ -60,6 +67,7 @@ class PaperIndex:
60
67
  stats: dict[str, Any]
61
68
  md_path_by_hash: dict[str, Path]
62
69
  pdf_path_by_hash: dict[str, Path]
70
+ template_tags: list[str]
63
71
 
64
72
 
65
73
  def _split_csv(values: list[str]) -> list[str]:
@@ -189,6 +197,41 @@ def _extract_tags(paper: dict[str, Any]) -> list[str]:
189
197
  return []
190
198
 
191
199
 
200
+ def _extract_keywords(paper: dict[str, Any]) -> list[str]:
201
+ keywords = paper.get("keywords") or []
202
+ if isinstance(keywords, list):
203
+ return [str(keyword).strip() for keyword in keywords if str(keyword).strip()]
204
+ if isinstance(keywords, str):
205
+ parts = re.split(r"[;,]", keywords)
206
+ return [part.strip() for part in parts if part.strip()]
207
+ return []
208
+
209
+
210
+ _SUMMARY_FIELDS = (
211
+ "summary",
212
+ "abstract",
213
+ "keywords",
214
+ "question1",
215
+ "question2",
216
+ "question3",
217
+ "question4",
218
+ "question5",
219
+ "question6",
220
+ "question7",
221
+ "question8",
222
+ )
223
+
224
+
225
+ def _has_summary(paper: dict[str, Any], template_tags: list[str]) -> bool:
226
+ if template_tags:
227
+ return True
228
+ for key in _SUMMARY_FIELDS:
229
+ value = paper.get(key)
230
+ if isinstance(value, str) and value.strip():
231
+ return True
232
+ return False
233
+
234
+
192
235
  def _extract_venue(paper: dict[str, Any]) -> str:
193
236
  if isinstance(paper.get("bibtex"), dict):
194
237
  bib = paper.get("bibtex") or {}
@@ -223,13 +266,16 @@ def build_index(
223
266
  year_counts: dict[str, int] = {}
224
267
  month_counts: dict[str, int] = {}
225
268
  tag_counts: dict[str, int] = {}
269
+ keyword_counts: dict[str, int] = {}
226
270
  author_counts: dict[str, int] = {}
227
271
  venue_counts: dict[str, int] = {}
272
+ template_tag_counts: dict[str, int] = {}
228
273
 
229
274
  def add_index(index: dict[str, set[int]], key: str, idx: int) -> None:
230
275
  index.setdefault(key, set()).add(idx)
231
276
 
232
277
  for idx, paper in enumerate(papers):
278
+ is_pdf_only = bool(paper.get("_is_pdf_only"))
233
279
  source_hash = paper.get("source_hash")
234
280
  if not source_hash and paper.get("source_path"):
235
281
  source_hash = stable_hash(str(paper.get("source_path")))
@@ -258,31 +304,54 @@ def build_index(
258
304
  paper["_month"] = month_label
259
305
  add_index(by_year, _normalize_key(year_label), idx)
260
306
  add_index(by_month, _normalize_key(month_label), idx)
261
- year_counts[year_label] = year_counts.get(year_label, 0) + 1
262
- month_counts[month_label] = month_counts.get(month_label, 0) + 1
307
+ if not is_pdf_only:
308
+ year_counts[year_label] = year_counts.get(year_label, 0) + 1
309
+ month_counts[month_label] = month_counts.get(month_label, 0) + 1
263
310
 
264
311
  venue = _extract_venue(paper).strip()
265
312
  paper["_venue"] = venue
266
313
  if venue:
267
314
  add_index(by_venue, _normalize_key(venue), idx)
268
- venue_counts[venue] = venue_counts.get(venue, 0) + 1
315
+ if not is_pdf_only:
316
+ venue_counts[venue] = venue_counts.get(venue, 0) + 1
269
317
  else:
270
318
  add_index(by_venue, "unknown", idx)
271
- venue_counts["Unknown"] = venue_counts.get("Unknown", 0) + 1
319
+ if not is_pdf_only:
320
+ venue_counts["Unknown"] = venue_counts.get("Unknown", 0) + 1
272
321
 
273
322
  authors = _extract_authors(paper)
274
323
  paper["_authors"] = authors
275
324
  for author in authors:
276
325
  key = _normalize_key(author)
277
326
  add_index(by_author, key, idx)
278
- author_counts[author] = author_counts.get(author, 0) + 1
327
+ if not is_pdf_only:
328
+ author_counts[author] = author_counts.get(author, 0) + 1
279
329
 
280
330
  tags = _extract_tags(paper)
281
331
  paper["_tags"] = tags
282
332
  for tag in tags:
283
333
  key = _normalize_key(tag)
284
334
  add_index(by_tag, key, idx)
285
- tag_counts[tag] = tag_counts.get(tag, 0) + 1
335
+ if not is_pdf_only:
336
+ tag_counts[tag] = tag_counts.get(tag, 0) + 1
337
+
338
+ keywords = _extract_keywords(paper)
339
+ paper["_keywords"] = keywords
340
+ for keyword in keywords:
341
+ if not is_pdf_only:
342
+ keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
343
+
344
+ template_tags = _available_templates(paper)
345
+ if not template_tags:
346
+ fallback_tag = paper.get("template_tag") or paper.get("prompt_template")
347
+ if fallback_tag:
348
+ template_tags = [str(fallback_tag)]
349
+ paper["_template_tags"] = template_tags
350
+ paper["_template_tags_lc"] = [tag.lower() for tag in template_tags]
351
+ paper["_has_summary"] = _has_summary(paper, template_tags)
352
+ if not is_pdf_only:
353
+ for tag in template_tags:
354
+ template_tag_counts[tag] = template_tag_counts.get(tag, 0) + 1
286
355
 
287
356
  search_parts = [title, venue, " ".join(authors), " ".join(tags)]
288
357
  paper["_search_lc"] = " ".join(part for part in search_parts if part).lower()
@@ -305,15 +374,19 @@ def build_index(
305
374
 
306
375
  ordered_ids = [idx for idx, _ in sorted(enumerate(papers), key=year_sort_key)]
307
376
 
377
+ stats_total = sum(1 for paper in papers if not paper.get("_is_pdf_only"))
308
378
  stats = {
309
- "total": len(papers),
379
+ "total": stats_total,
310
380
  "years": _sorted_counts(year_counts, numeric_desc=True),
311
381
  "months": _sorted_month_counts(month_counts),
312
382
  "tags": _sorted_counts(tag_counts),
383
+ "keywords": _sorted_counts(keyword_counts),
313
384
  "authors": _sorted_counts(author_counts),
314
385
  "venues": _sorted_counts(venue_counts),
315
386
  }
316
387
 
388
+ template_tags = sorted(template_tag_counts.keys(), key=lambda item: item.lower())
389
+
317
390
  return PaperIndex(
318
391
  papers=papers,
319
392
  id_by_hash=id_by_hash,
@@ -326,6 +399,7 @@ def build_index(
326
399
  stats=stats,
327
400
  md_path_by_hash=md_path_by_hash,
328
401
  pdf_path_by_hash=pdf_path_by_hash,
402
+ template_tags=template_tags,
329
403
  )
330
404
 
331
405
 
@@ -422,7 +496,11 @@ def _infer_template_tag(papers: list[dict[str, Any]], path: Path) -> str:
422
496
  return best_tag
423
497
 
424
498
 
425
- def _build_cache_meta(db_paths: list[Path], bibtex_path: Path | None) -> dict[str, Any]:
499
+ def _build_cache_meta(
500
+ db_paths: list[Path],
501
+ bibtex_path: Path | None,
502
+ pdf_roots_meta: list[dict[str, Any]] | None = None,
503
+ ) -> dict[str, Any]:
426
504
  def file_meta(path: Path) -> dict[str, Any]:
427
505
  try:
428
506
  stats = path.stat()
@@ -435,6 +513,8 @@ def _build_cache_meta(db_paths: list[Path], bibtex_path: Path | None) -> dict[st
435
513
  "inputs": [file_meta(path) for path in db_paths],
436
514
  "bibtex": file_meta(bibtex_path) if bibtex_path else None,
437
515
  }
516
+ if pdf_roots_meta is not None:
517
+ meta["pdf_roots"] = pdf_roots_meta
438
518
  return meta
439
519
 
440
520
 
@@ -462,16 +542,72 @@ def _write_cached_papers(cache_dir: Path, meta: dict[str, Any], papers: list[dic
462
542
  data_path.write_text(json.dumps(papers, ensure_ascii=False, indent=2), encoding="utf-8")
463
543
 
464
544
 
545
+ def _extract_year_for_matching(paper: dict[str, Any]) -> str | None:
546
+ if isinstance(paper.get("bibtex"), dict):
547
+ fields = paper.get("bibtex", {}).get("fields", {}) or {}
548
+ year = fields.get("year")
549
+ if year and str(year).isdigit():
550
+ return str(year)
551
+ parsed_year, _ = _parse_year_month(str(paper.get("publication_date") or ""))
552
+ return parsed_year
553
+
554
+
555
+ def _prepare_paper_matching_fields(paper: dict[str, Any]) -> None:
556
+ if "_authors" not in paper:
557
+ paper["_authors"] = _extract_authors(paper)
558
+ if "_year" not in paper:
559
+ paper["_year"] = _extract_year_for_matching(paper) or ""
560
+
561
+
562
+ def _build_pdf_only_entries(
563
+ papers: list[dict[str, Any]],
564
+ pdf_paths: list[Path],
565
+ pdf_index: dict[str, list[Path]],
566
+ ) -> list[dict[str, Any]]:
567
+ matched: set[Path] = set()
568
+ for paper in papers:
569
+ _prepare_paper_matching_fields(paper)
570
+ pdf_path = _resolve_pdf(paper, pdf_index)
571
+ if pdf_path:
572
+ matched.add(pdf_path.resolve())
573
+
574
+ entries: list[dict[str, Any]] = []
575
+ for path in pdf_paths:
576
+ resolved = path.resolve()
577
+ if resolved in matched:
578
+ continue
579
+ title = _read_pdf_metadata_title(resolved) or _extract_title_from_filename(resolved.name)
580
+ if not title:
581
+ title = resolved.stem
582
+ year_hint, author_hint = _extract_year_author_from_filename(resolved.name)
583
+ entry: dict[str, Any] = {
584
+ "paper_title": title,
585
+ "paper_authors": [author_hint] if author_hint else [],
586
+ "publication_date": year_hint or "",
587
+ "source_hash": stable_hash(str(resolved)),
588
+ "source_path": str(resolved),
589
+ "_is_pdf_only": True,
590
+ }
591
+ entries.append(entry)
592
+ return entries
593
+
594
+
465
595
  def _load_or_merge_papers(
466
596
  db_paths: list[Path],
467
597
  bibtex_path: Path | None,
468
598
  cache_dir: Path | None,
469
599
  use_cache: bool,
600
+ pdf_roots: list[Path] | None = None,
470
601
  ) -> list[dict[str, Any]]:
471
602
  cache_meta = None
603
+ pdf_roots = pdf_roots or []
604
+ pdf_paths: list[Path] = []
605
+ pdf_roots_meta: list[dict[str, Any]] | None = None
606
+ if pdf_roots:
607
+ pdf_paths, pdf_roots_meta = _scan_pdf_roots(pdf_roots)
472
608
  if cache_dir and use_cache:
473
609
  cache_dir.mkdir(parents=True, exist_ok=True)
474
- cache_meta = _build_cache_meta(db_paths, bibtex_path)
610
+ cache_meta = _build_cache_meta(db_paths, bibtex_path, pdf_roots_meta)
475
611
  cached = _load_cached_papers(cache_dir, cache_meta)
476
612
  if cached is not None:
477
613
  return cached
@@ -481,6 +617,9 @@ def _load_or_merge_papers(
481
617
  for bundle in inputs:
482
618
  enrich_with_bibtex(bundle["papers"], bibtex_path)
483
619
  papers = _merge_paper_inputs(inputs)
620
+ if pdf_paths:
621
+ pdf_index = _build_file_index_from_paths(pdf_paths, suffixes={".pdf"})
622
+ papers.extend(_build_pdf_only_entries(papers, pdf_paths, pdf_index))
484
623
 
485
624
  if cache_dir and use_cache and cache_meta is not None:
486
625
  _write_cached_papers(cache_dir, cache_meta, papers)
@@ -488,7 +627,18 @@ def _load_or_merge_papers(
488
627
 
489
628
 
490
629
  def _md_renderer() -> MarkdownIt:
491
- return MarkdownIt("commonmark", {"html": False, "linkify": True})
630
+ md = MarkdownIt("commonmark", {"html": False, "linkify": True})
631
+ md.enable("table")
632
+ return md
633
+
634
+
635
+ def _strip_paragraph_wrapped_tables(text: str) -> str:
636
+ lines = text.splitlines()
637
+ for idx, line in enumerate(lines):
638
+ line = re.sub(r"^\s*<p>\s*\|", "|", line)
639
+ line = re.sub(r"\|\s*</p>\s*$", "|", line)
640
+ lines[idx] = line
641
+ return "\n".join(lines)
492
642
 
493
643
 
494
644
  def _normalize_merge_title(value: str | None) -> str | None:
@@ -648,6 +798,7 @@ def _merge_paper_inputs(inputs: list[dict[str, Any]]) -> list[dict[str, Any]]:
648
798
 
649
799
 
650
800
  def _render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
801
+ text = _strip_paragraph_wrapped_tables(text)
651
802
  rendered, table_placeholders = _extract_html_table_placeholders(text)
652
803
  rendered, img_placeholders = _extract_html_img_placeholders(rendered)
653
804
  rendered, placeholders = _extract_math_placeholders(rendered)
@@ -1062,6 +1213,369 @@ def _render_paper_markdown(
1062
1213
  return template.render(**context), str(template_name), warning
1063
1214
 
1064
1215
 
1216
+ _TITLE_PREFIX_LEN = 16
1217
+ _TITLE_MIN_CHARS = 24
1218
+ _TITLE_MIN_TOKENS = 4
1219
+ _AUTHOR_YEAR_MIN_SIMILARITY = 0.8
1220
+ _LEADING_NUMERIC_MAX_LEN = 2
1221
+ _SIMILARITY_START = 0.95
1222
+ _SIMILARITY_STEP = 0.05
1223
+ _SIMILARITY_MAX_STEPS = 10
1224
+
1225
+
1226
+ def _normalize_title_key(title: str) -> str:
1227
+ value = unicodedata.normalize("NFKD", title)
1228
+ greek_map = {
1229
+ "α": "alpha",
1230
+ "β": "beta",
1231
+ "γ": "gamma",
1232
+ "δ": "delta",
1233
+ "ε": "epsilon",
1234
+ "ζ": "zeta",
1235
+ "η": "eta",
1236
+ "θ": "theta",
1237
+ "ι": "iota",
1238
+ "κ": "kappa",
1239
+ "λ": "lambda",
1240
+ "μ": "mu",
1241
+ "ν": "nu",
1242
+ "ξ": "xi",
1243
+ "ο": "omicron",
1244
+ "π": "pi",
1245
+ "ρ": "rho",
1246
+ "σ": "sigma",
1247
+ "τ": "tau",
1248
+ "υ": "upsilon",
1249
+ "φ": "phi",
1250
+ "χ": "chi",
1251
+ "ψ": "psi",
1252
+ "ω": "omega",
1253
+ }
1254
+ for char, name in greek_map.items():
1255
+ value = value.replace(char, f" {name} ")
1256
+ value = re.sub(
1257
+ r"\\(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)\b",
1258
+ r" \1 ",
1259
+ value,
1260
+ flags=re.IGNORECASE,
1261
+ )
1262
+ value = value.replace("{", "").replace("}", "")
1263
+ value = value.replace("_", " ")
1264
+ value = re.sub(r"([a-z])([0-9])", r"\1 \2", value, flags=re.IGNORECASE)
1265
+ value = re.sub(r"([0-9])([a-z])", r"\1 \2", value, flags=re.IGNORECASE)
1266
+ value = re.sub(r"[^a-z0-9]+", " ", value.lower())
1267
+ value = re.sub(r"\s+", " ", value).strip()
1268
+ tokens = value.split()
1269
+ if not tokens:
1270
+ return ""
1271
+ merged: list[str] = []
1272
+ idx = 0
1273
+ while idx < len(tokens):
1274
+ token = tokens[idx]
1275
+ if len(token) == 1 and idx + 1 < len(tokens):
1276
+ merged.append(token + tokens[idx + 1])
1277
+ idx += 2
1278
+ continue
1279
+ merged.append(token)
1280
+ idx += 1
1281
+ return " ".join(merged)
1282
+
1283
+
1284
+ def _compact_title_key(title_key: str) -> str:
1285
+ return title_key.replace(" ", "")
1286
+
1287
+
1288
+ def _strip_leading_numeric_tokens(title_key: str) -> str:
1289
+ tokens = title_key.split()
1290
+ idx = 0
1291
+ while idx < len(tokens):
1292
+ token = tokens[idx]
1293
+ if token.isdigit() and len(token) <= _LEADING_NUMERIC_MAX_LEN:
1294
+ idx += 1
1295
+ continue
1296
+ break
1297
+ if idx == 0:
1298
+ return title_key
1299
+ return " ".join(tokens[idx:])
1300
+
1301
+
1302
+ def _strip_pdf_hash_suffix(name: str) -> str:
1303
+ return re.sub(r"(?i)(\.pdf)(?:-[0-9a-f\-]{8,})$", r"\1", name)
1304
+
1305
+
1306
+ def _extract_title_from_filename(name: str) -> str:
1307
+ base = name
1308
+ lower = base.lower()
1309
+ if lower.endswith(".md"):
1310
+ base = base[:-3]
1311
+ lower = base.lower()
1312
+ if ".pdf-" in lower:
1313
+ base = _strip_pdf_hash_suffix(base)
1314
+ lower = base.lower()
1315
+ if lower.endswith(".pdf"):
1316
+ base = base[:-4]
1317
+ base = base.replace("_", " ").strip()
1318
+ match = re.match(r"\s*\d{4}\s*-\s*(.+)$", base)
1319
+ if match:
1320
+ return match.group(1).strip()
1321
+ match = re.match(r"\s*.+?\s*-\s*\d{4}\s*-\s*(.+)$", base)
1322
+ if match:
1323
+ return match.group(1).strip()
1324
+ return base.strip()
1325
+
1326
+
1327
+ def _clean_pdf_metadata_title(value: str | None, path: Path) -> str | None:
1328
+ if not value:
1329
+ return None
1330
+ text = str(value).replace("\x00", "").strip()
1331
+ if not text:
1332
+ return None
1333
+ text = re.sub(r"(?i)^microsoft\\s+word\\s*-\\s*", "", text)
1334
+ text = re.sub(r"(?i)^pdf\\s*-\\s*", "", text)
1335
+ text = re.sub(r"(?i)^untitled\\b", "", text).strip()
1336
+ if text.lower().endswith(".pdf"):
1337
+ text = text[:-4].strip()
1338
+ if len(text) < 3:
1339
+ return None
1340
+ stem = path.stem.strip()
1341
+ if stem and text.lower() == stem.lower():
1342
+ return None
1343
+ return text
1344
+
1345
+
1346
+ def _read_pdf_metadata_title(path: Path) -> str | None:
1347
+ if not PYPDF_AVAILABLE:
1348
+ return None
1349
+ try:
1350
+ reader = PdfReader(str(path))
1351
+ meta = reader.metadata
1352
+ title = meta.title if meta else None
1353
+ except Exception:
1354
+ return None
1355
+ return _clean_pdf_metadata_title(title, path)
1356
+
1357
+
1358
+ def _is_pdf_like(path: Path) -> bool:
1359
+ suffix = path.suffix.lower()
1360
+ if suffix == ".pdf":
1361
+ return True
1362
+ name_lower = path.name.lower()
1363
+ return ".pdf-" in name_lower and not name_lower.endswith(".md")
1364
+
1365
+
1366
+ def _scan_pdf_roots(roots: list[Path]) -> tuple[list[Path], list[dict[str, Any]]]:
1367
+ pdf_paths: list[Path] = []
1368
+ meta: list[dict[str, Any]] = []
1369
+ seen: set[Path] = set()
1370
+ for root in roots:
1371
+ try:
1372
+ if not root.exists() or not root.is_dir():
1373
+ continue
1374
+ except OSError:
1375
+ continue
1376
+ files: list[Path] = []
1377
+ for path in root.rglob("*"):
1378
+ try:
1379
+ if not path.is_file():
1380
+ continue
1381
+ except OSError:
1382
+ continue
1383
+ if not _is_pdf_like(path):
1384
+ continue
1385
+ resolved = path.resolve()
1386
+ if resolved in seen:
1387
+ continue
1388
+ seen.add(resolved)
1389
+ files.append(resolved)
1390
+ max_mtime = 0.0
1391
+ total_size = 0
1392
+ for path in files:
1393
+ try:
1394
+ stats = path.stat()
1395
+ except OSError:
1396
+ continue
1397
+ max_mtime = max(max_mtime, stats.st_mtime)
1398
+ total_size += stats.st_size
1399
+ pdf_paths.extend(files)
1400
+ meta.append(
1401
+ {
1402
+ "path": str(root),
1403
+ "count": len(files),
1404
+ "max_mtime": max_mtime,
1405
+ "size": total_size,
1406
+ }
1407
+ )
1408
+ return pdf_paths, meta
1409
+
1410
+
1411
+ def _extract_year_author_from_filename(name: str) -> tuple[str | None, str | None]:
1412
+ base = name
1413
+ lower = base.lower()
1414
+ if lower.endswith(".md"):
1415
+ base = base[:-3]
1416
+ lower = base.lower()
1417
+ if ".pdf-" in lower:
1418
+ base = _strip_pdf_hash_suffix(base)
1419
+ lower = base.lower()
1420
+ if lower.endswith(".pdf"):
1421
+ base = base[:-4]
1422
+ match = re.match(r"\s*(.+?)\s*-\s*((?:19|20)\d{2})\s*-\s*", base)
1423
+ if match:
1424
+ return match.group(2), match.group(1).strip()
1425
+ match = re.match(r"\s*((?:19|20)\d{2})\s*-\s*", base)
1426
+ if match:
1427
+ return match.group(1), None
1428
+ return None, None
1429
+
1430
+
1431
+ def _normalize_author_key(name: str) -> str:
1432
+ raw = name.lower().strip()
1433
+ raw = raw.replace("et al.", "").replace("et al", "")
1434
+ if "," in raw:
1435
+ raw = raw.split(",", 1)[0]
1436
+ raw = re.sub(r"[^a-z0-9]+", " ", raw)
1437
+ raw = re.sub(r"\s+", " ", raw).strip()
1438
+ if not raw:
1439
+ return ""
1440
+ parts = raw.split()
1441
+ return parts[-1] if parts else raw
1442
+
1443
+
1444
+ def _title_prefix_key(title_key: str) -> str | None:
1445
+ if len(title_key.split()) < _TITLE_MIN_TOKENS:
1446
+ return None
1447
+ compact = _compact_title_key(title_key)
1448
+ if len(compact) < _TITLE_PREFIX_LEN:
1449
+ return None
1450
+ prefix = compact[:_TITLE_PREFIX_LEN]
1451
+ if not prefix:
1452
+ return None
1453
+ return f"prefix:{prefix}"
1454
+
1455
+
1456
+ def _title_overlap_match(a: str, b: str) -> bool:
1457
+ if not a or not b:
1458
+ return False
1459
+ if a == b:
1460
+ return True
1461
+ shorter, longer = (a, b) if len(a) <= len(b) else (b, a)
1462
+ token_count = len(shorter.split())
1463
+ if len(shorter) >= _TITLE_MIN_CHARS or token_count >= _TITLE_MIN_TOKENS:
1464
+ if longer.startswith(shorter) or shorter in longer:
1465
+ return True
1466
+ return False
1467
+
1468
+
1469
+ def _adaptive_similarity_match(title_key: str, candidates: list[Path]) -> Path | None:
1470
+ if not title_key:
1471
+ return None
1472
+ scored: list[tuple[Path, float]] = []
1473
+ for path in candidates:
1474
+ candidate_title = _normalize_title_key(_extract_title_from_filename(path.name))
1475
+ if not candidate_title:
1476
+ continue
1477
+ if _title_overlap_match(title_key, candidate_title):
1478
+ return path
1479
+ scored.append((path, _title_similarity(title_key, candidate_title)))
1480
+ if not scored:
1481
+ return None
1482
+
1483
+ def matches_at(threshold: float) -> list[Path]:
1484
+ return [path for path, score in scored if score >= threshold]
1485
+
1486
+ threshold = _SIMILARITY_START
1487
+ step = _SIMILARITY_STEP
1488
+ prev_threshold = None
1489
+ prev_count = None
1490
+ for _ in range(_SIMILARITY_MAX_STEPS):
1491
+ matches = matches_at(threshold)
1492
+ if len(matches) == 1:
1493
+ return matches[0]
1494
+ if len(matches) == 0:
1495
+ prev_threshold = threshold
1496
+ prev_count = 0
1497
+ threshold -= step
1498
+ continue
1499
+ if prev_count == 0 and prev_threshold is not None:
1500
+ low = threshold
1501
+ high = prev_threshold
1502
+ for _ in range(_SIMILARITY_MAX_STEPS):
1503
+ mid = (low + high) / 2
1504
+ mid_matches = matches_at(mid)
1505
+ if len(mid_matches) == 1:
1506
+ return mid_matches[0]
1507
+ if len(mid_matches) == 0:
1508
+ high = mid
1509
+ else:
1510
+ low = mid
1511
+ return None
1512
+ prev_threshold = threshold
1513
+ prev_count = len(matches)
1514
+ threshold -= step
1515
+ return None
1516
+
1517
+
1518
+ def _resolve_by_title_and_meta(
1519
+ paper: dict[str, Any],
1520
+ file_index: dict[str, list[Path]],
1521
+ ) -> Path | None:
1522
+ title = str(paper.get("paper_title") or "")
1523
+ title_key = _normalize_title_key(title)
1524
+ if not title_key:
1525
+ title_key = ""
1526
+ candidates = file_index.get(title_key, [])
1527
+ if candidates:
1528
+ return candidates[0]
1529
+ if title_key:
1530
+ compact_key = _compact_title_key(title_key)
1531
+ compact_candidates = file_index.get(f"compact:{compact_key}", [])
1532
+ if compact_candidates:
1533
+ return compact_candidates[0]
1534
+ stripped_key = _strip_leading_numeric_tokens(title_key)
1535
+ if stripped_key and stripped_key != title_key:
1536
+ stripped_candidates = file_index.get(stripped_key, [])
1537
+ if stripped_candidates:
1538
+ return stripped_candidates[0]
1539
+ stripped_compact = _compact_title_key(stripped_key)
1540
+ stripped_candidates = file_index.get(f"compact:{stripped_compact}", [])
1541
+ if stripped_candidates:
1542
+ return stripped_candidates[0]
1543
+ prefix_candidates: list[Path] = []
1544
+ prefix_key = _title_prefix_key(title_key)
1545
+ if prefix_key:
1546
+ prefix_candidates = file_index.get(prefix_key, [])
1547
+ if not prefix_candidates:
1548
+ stripped_key = _strip_leading_numeric_tokens(title_key)
1549
+ if stripped_key and stripped_key != title_key:
1550
+ prefix_key = _title_prefix_key(stripped_key)
1551
+ if prefix_key:
1552
+ prefix_candidates = file_index.get(prefix_key, [])
1553
+ if prefix_candidates:
1554
+ match = _adaptive_similarity_match(title_key, prefix_candidates)
1555
+ if match is not None:
1556
+ return match
1557
+ year = str(paper.get("_year") or "").strip()
1558
+ if not year.isdigit():
1559
+ return None
1560
+ author_key = ""
1561
+ authors = paper.get("_authors") or []
1562
+ if authors:
1563
+ author_key = _normalize_author_key(str(authors[0]))
1564
+ candidates = []
1565
+ if author_key:
1566
+ candidates = file_index.get(f"authoryear:{year}:{author_key}", [])
1567
+ if not candidates:
1568
+ candidates = file_index.get(f"year:{year}", [])
1569
+ if not candidates:
1570
+ return None
1571
+ if len(candidates) == 1 and not title_key:
1572
+ return candidates[0]
1573
+ match = _adaptive_similarity_match(title_key, candidates)
1574
+ if match is not None:
1575
+ return match
1576
+ return None
1577
+
1578
+
1065
1579
  def _build_file_index(roots: list[Path], *, suffixes: set[str]) -> dict[str, list[Path]]:
1066
1580
  index: dict[str, list[Path]] = {}
1067
1581
  for root in roots:
@@ -1076,19 +1590,97 @@ def _build_file_index(roots: list[Path], *, suffixes: set[str]) -> dict[str, lis
1076
1590
  continue
1077
1591
  except OSError:
1078
1592
  continue
1079
- if path.suffix.lower() not in suffixes:
1593
+ suffix = path.suffix.lower()
1594
+ if suffix not in suffixes:
1595
+ name_lower = path.name.lower()
1596
+ if suffixes == {".pdf"} and ".pdf-" in name_lower and suffix != ".md":
1597
+ pass
1598
+ else:
1599
+ continue
1600
+ resolved = path.resolve()
1601
+ name_key = path.name.lower()
1602
+ index.setdefault(name_key, []).append(resolved)
1603
+ title_candidate = _extract_title_from_filename(path.name)
1604
+ title_key = _normalize_title_key(title_candidate)
1605
+ if title_key:
1606
+ if title_key != name_key:
1607
+ index.setdefault(title_key, []).append(resolved)
1608
+ compact_key = _compact_title_key(title_key)
1609
+ if compact_key:
1610
+ index.setdefault(f"compact:{compact_key}", []).append(resolved)
1611
+ prefix_key = _title_prefix_key(title_key)
1612
+ if prefix_key:
1613
+ index.setdefault(prefix_key, []).append(resolved)
1614
+ stripped_key = _strip_leading_numeric_tokens(title_key)
1615
+ if stripped_key and stripped_key != title_key:
1616
+ index.setdefault(stripped_key, []).append(resolved)
1617
+ stripped_compact = _compact_title_key(stripped_key)
1618
+ if stripped_compact:
1619
+ index.setdefault(f"compact:{stripped_compact}", []).append(resolved)
1620
+ stripped_prefix = _title_prefix_key(stripped_key)
1621
+ if stripped_prefix:
1622
+ index.setdefault(stripped_prefix, []).append(resolved)
1623
+ year_hint, author_hint = _extract_year_author_from_filename(path.name)
1624
+ if year_hint:
1625
+ index.setdefault(f"year:{year_hint}", []).append(resolved)
1626
+ if author_hint:
1627
+ author_key = _normalize_author_key(author_hint)
1628
+ if author_key:
1629
+ index.setdefault(f"authoryear:{year_hint}:{author_key}", []).append(resolved)
1630
+ return index
1631
+
1632
+
1633
+ def _build_file_index_from_paths(paths: list[Path], *, suffixes: set[str]) -> dict[str, list[Path]]:
1634
+ index: dict[str, list[Path]] = {}
1635
+ for path in paths:
1636
+ try:
1637
+ if not path.is_file():
1638
+ continue
1639
+ except OSError:
1640
+ continue
1641
+ suffix = path.suffix.lower()
1642
+ if suffix not in suffixes:
1643
+ name_lower = path.name.lower()
1644
+ if suffixes == {".pdf"} and ".pdf-" in name_lower and suffix != ".md":
1645
+ pass
1646
+ else:
1080
1647
  continue
1081
- index.setdefault(path.name.lower(), []).append(path.resolve())
1648
+ resolved = path.resolve()
1649
+ name_key = path.name.lower()
1650
+ index.setdefault(name_key, []).append(resolved)
1651
+ title_candidate = _extract_title_from_filename(path.name)
1652
+ title_key = _normalize_title_key(title_candidate)
1653
+ if title_key:
1654
+ if title_key != name_key:
1655
+ index.setdefault(title_key, []).append(resolved)
1656
+ compact_key = _compact_title_key(title_key)
1657
+ if compact_key:
1658
+ index.setdefault(f"compact:{compact_key}", []).append(resolved)
1659
+ prefix_key = _title_prefix_key(title_key)
1660
+ if prefix_key:
1661
+ index.setdefault(prefix_key, []).append(resolved)
1662
+ stripped_key = _strip_leading_numeric_tokens(title_key)
1663
+ if stripped_key and stripped_key != title_key:
1664
+ index.setdefault(stripped_key, []).append(resolved)
1665
+ stripped_compact = _compact_title_key(stripped_key)
1666
+ if stripped_compact:
1667
+ index.setdefault(f"compact:{stripped_compact}", []).append(resolved)
1668
+ stripped_prefix = _title_prefix_key(stripped_key)
1669
+ if stripped_prefix:
1670
+ index.setdefault(stripped_prefix, []).append(resolved)
1082
1671
  return index
1083
1672
 
1084
1673
 
1085
1674
  def _resolve_source_md(paper: dict[str, Any], md_index: dict[str, list[Path]]) -> Path | None:
1086
1675
  source_path = paper.get("source_path")
1087
1676
  if not source_path:
1088
- return None
1089
- name = Path(str(source_path)).name.lower()
1090
- candidates = md_index.get(name, [])
1091
- return candidates[0] if candidates else None
1677
+ source_path = ""
1678
+ if source_path:
1679
+ name = Path(str(source_path)).name.lower()
1680
+ candidates = md_index.get(name, [])
1681
+ if candidates:
1682
+ return candidates[0]
1683
+ return _resolve_by_title_and_meta(paper, md_index)
1092
1684
 
1093
1685
 
1094
1686
  def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
@@ -1102,6 +1694,8 @@ def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
1102
1694
  if ".pdf-" in name.lower():
1103
1695
  base = name[: name.lower().rfind(".pdf-") + 4]
1104
1696
  return [Path(base).name]
1697
+ if name.lower().endswith(".pdf"):
1698
+ return [name]
1105
1699
  if name.lower().endswith(".pdf.md"):
1106
1700
  return [name[:-3]]
1107
1701
  return []
@@ -1112,7 +1706,7 @@ def _resolve_pdf(paper: dict[str, Any], pdf_index: dict[str, list[Path]]) -> Pat
1112
1706
  candidates = pdf_index.get(filename.lower(), [])
1113
1707
  if candidates:
1114
1708
  return candidates[0]
1115
- return None
1709
+ return _resolve_by_title_and_meta(paper, pdf_index)
1116
1710
 
1117
1711
 
1118
1712
  def _ensure_under_roots(path: Path, roots: list[Path]) -> bool:
@@ -1126,6 +1720,150 @@ def _ensure_under_roots(path: Path, roots: list[Path]) -> bool:
1126
1720
  return False
1127
1721
 
1128
1722
 
1723
+ _BOOL_TRUE = {"1", "true", "yes", "with", "has"}
1724
+ _BOOL_FALSE = {"0", "false", "no", "without"}
1725
+
1726
+
1727
+ def _tokenize_filter_query(text: str) -> list[str]:
1728
+ out: list[str] = []
1729
+ buf: list[str] = []
1730
+ in_quote = False
1731
+
1732
+ for ch in text:
1733
+ if ch == '"':
1734
+ in_quote = not in_quote
1735
+ continue
1736
+ if not in_quote and ch.isspace():
1737
+ token = "".join(buf).strip()
1738
+ if token:
1739
+ out.append(token)
1740
+ buf = []
1741
+ continue
1742
+ buf.append(ch)
1743
+
1744
+ token = "".join(buf).strip()
1745
+ if token:
1746
+ out.append(token)
1747
+ return out
1748
+
1749
+
1750
+ def _normalize_presence_value(value: str) -> str | None:
1751
+ token = value.strip().lower()
1752
+ if token in _BOOL_TRUE:
1753
+ return "with"
1754
+ if token in _BOOL_FALSE:
1755
+ return "without"
1756
+ return None
1757
+
1758
+
1759
+ def _parse_filter_query(text: str) -> dict[str, set[str]]:
1760
+ parsed = {
1761
+ "pdf": set(),
1762
+ "source": set(),
1763
+ "summary": set(),
1764
+ "template": set(),
1765
+ }
1766
+ for token in _tokenize_filter_query(text):
1767
+ if ":" not in token:
1768
+ continue
1769
+ key, raw_value = token.split(":", 1)
1770
+ key = key.strip().lower()
1771
+ raw_value = raw_value.strip()
1772
+ if not raw_value:
1773
+ continue
1774
+ if key in {"tmpl", "template"}:
1775
+ for part in raw_value.split(","):
1776
+ tag = part.strip()
1777
+ if tag:
1778
+ parsed["template"].add(tag.lower())
1779
+ continue
1780
+ if key in {"pdf", "source", "summary"}:
1781
+ for part in raw_value.split(","):
1782
+ normalized = _normalize_presence_value(part)
1783
+ if normalized:
1784
+ parsed[key].add(normalized)
1785
+ continue
1786
+ if key in {"has", "no"}:
1787
+ targets = [part.strip().lower() for part in raw_value.split(",") if part.strip()]
1788
+ for target in targets:
1789
+ if target not in {"pdf", "source", "summary"}:
1790
+ continue
1791
+ parsed[target].add("with" if key == "has" else "without")
1792
+ return parsed
1793
+
1794
+
1795
+ def _presence_filter(values: list[str]) -> set[str] | None:
1796
+ normalized = set()
1797
+ for value in values:
1798
+ token = _normalize_presence_value(value)
1799
+ if token:
1800
+ normalized.add(token)
1801
+ if not normalized or normalized == {"with", "without"}:
1802
+ return None
1803
+ return normalized
1804
+
1805
+
1806
+ def _merge_filter_set(primary: set[str] | None, secondary: set[str] | None) -> set[str] | None:
1807
+ if not primary:
1808
+ return secondary
1809
+ if not secondary:
1810
+ return primary
1811
+ return primary & secondary
1812
+
1813
+
1814
+ def _matches_presence(allowed: set[str] | None, has_value: bool) -> bool:
1815
+ if not allowed:
1816
+ return True
1817
+ if has_value and "with" in allowed:
1818
+ return True
1819
+ if not has_value and "without" in allowed:
1820
+ return True
1821
+ return False
1822
+
1823
+
1824
+ def _template_tag_map(index: PaperIndex) -> dict[str, str]:
1825
+ return {tag.lower(): tag for tag in index.template_tags}
1826
+
1827
+
1828
+ def _compute_counts(index: PaperIndex, ids: set[int]) -> dict[str, Any]:
1829
+ template_order = list(index.template_tags)
1830
+ template_counts = {tag: 0 for tag in template_order}
1831
+ pdf_count = 0
1832
+ source_count = 0
1833
+ summary_count = 0
1834
+ total_count = 0
1835
+ tag_map = _template_tag_map(index)
1836
+
1837
+ for idx in ids:
1838
+ paper = index.papers[idx]
1839
+ if paper.get("_is_pdf_only"):
1840
+ continue
1841
+ total_count += 1
1842
+ source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
1843
+ has_source = source_hash in index.md_path_by_hash
1844
+ has_pdf = source_hash in index.pdf_path_by_hash
1845
+ has_summary = bool(paper.get("_has_summary"))
1846
+ if has_source:
1847
+ source_count += 1
1848
+ if has_pdf:
1849
+ pdf_count += 1
1850
+ if has_summary:
1851
+ summary_count += 1
1852
+ for tag_lc in paper.get("_template_tags_lc") or []:
1853
+ display = tag_map.get(tag_lc)
1854
+ if display:
1855
+ template_counts[display] = template_counts.get(display, 0) + 1
1856
+
1857
+ return {
1858
+ "total": total_count,
1859
+ "pdf": pdf_count,
1860
+ "source": source_count,
1861
+ "summary": summary_count,
1862
+ "templates": template_counts,
1863
+ "template_order": template_order,
1864
+ }
1865
+
1866
+
1129
1867
  def _apply_query(index: PaperIndex, query: Query) -> set[int]:
1130
1868
  all_ids = set(index.ordered_ids)
1131
1869
 
@@ -1182,7 +1920,30 @@ def _apply_query(index: PaperIndex, query: Query) -> set[int]:
1182
1920
  return result
1183
1921
 
1184
1922
 
1185
- def _page_shell(title: str, body_html: str, extra_head: str = "", extra_scripts: str = "") -> str:
1923
+ def _page_shell(
1924
+ title: str,
1925
+ body_html: str,
1926
+ extra_head: str = "",
1927
+ extra_scripts: str = "",
1928
+ header_title: str | None = None,
1929
+ ) -> str:
1930
+ header_html = """
1931
+ <header>
1932
+ <a href="/">Papers</a>
1933
+ <a href="/stats">Stats</a>
1934
+ </header>
1935
+ """
1936
+ if header_title:
1937
+ safe_title = html.escape(header_title)
1938
+ header_html = f"""
1939
+ <header class="detail-header">
1940
+ <div class="header-row">
1941
+ <a class="header-back" href="/">← Papers</a>
1942
+ <span class="header-title" title="{safe_title}">{safe_title}</span>
1943
+ <a class="header-link" href="/stats">Stats</a>
1944
+ </div>
1945
+ </header>
1946
+ """
1186
1947
  return f"""<!doctype html>
1187
1948
  <html lang="en">
1188
1949
  <head>
@@ -1193,28 +1954,83 @@ def _page_shell(title: str, body_html: str, extra_head: str = "", extra_scripts:
1193
1954
  body {{ font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; margin: 0; }}
1194
1955
  header {{ position: sticky; top: 0; background: #0b1220; color: #fff; padding: 12px 16px; z-index: 10; }}
1195
1956
  header a {{ color: #cfe3ff; text-decoration: none; margin-right: 12px; }}
1957
+ .detail-header .header-row {{ display: grid; grid-template-columns: auto minmax(0, 1fr) auto; align-items: center; gap: 12px; }}
1958
+ .detail-header .header-title {{ text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
1959
+ .detail-header .header-back {{ margin-right: 0; }}
1960
+ .detail-header .header-link {{ margin-right: 0; }}
1196
1961
  .container {{ max-width: 1100px; margin: 0 auto; padding: 16px; }}
1197
1962
  .filters {{ display: grid; grid-template-columns: repeat(6, 1fr); gap: 8px; margin: 12px 0 16px; }}
1198
1963
  .filters input {{ width: 100%; padding: 8px; border: 1px solid #d0d7de; border-radius: 6px; }}
1964
+ .filters select {{ width: 100%; border: 1px solid #d0d7de; border-radius: 6px; background: #fff; font-size: 13px; }}
1965
+ .filters select:not([multiple]) {{ padding: 6px 8px; }}
1966
+ .filters select[multiple] {{ padding: 2px; line-height: 1.25; min-height: 72px; font-size: 13px; }}
1967
+ .filters select[multiple] option {{ padding: 2px 6px; line-height: 1.25; }}
1968
+ .filters label {{ font-size: 12px; color: #57606a; }}
1969
+ .filter-group {{ display: flex; flex-direction: column; gap: 4px; }}
1199
1970
  .card {{ border: 1px solid #d0d7de; border-radius: 10px; padding: 12px; margin: 10px 0; }}
1200
1971
  .muted {{ color: #57606a; font-size: 13px; }}
1201
1972
  .pill {{ display: inline-block; padding: 2px 8px; border-radius: 999px; border: 1px solid #d0d7de; margin-right: 6px; font-size: 12px; }}
1973
+ .pill.template {{ border-color: #8a92a5; color: #243b53; background: #f6f8fa; }}
1974
+ .pill.pdf-only {{ border-color: #c8a951; background: #fff8dc; color: #5b4a00; }}
1202
1975
  .warning {{ background: #fff4ce; border: 1px solid #ffd089; padding: 10px; border-radius: 10px; margin: 12px 0; }}
1203
1976
  .tabs {{ display: flex; gap: 8px; flex-wrap: wrap; }}
1204
1977
  .tab {{ display: inline-block; padding: 6px 12px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; color: #0969da; text-decoration: none; font-size: 13px; }}
1205
1978
  .tab:hover {{ background: #eef1f4; }}
1206
1979
  .tab.active {{ background: #0969da; border-color: #0969da; color: #fff; }}
1980
+ .detail-shell {{ display: flex; flex-direction: column; gap: 12px; min-height: calc(100vh - 120px); }}
1981
+ .detail-toolbar {{ display: flex; flex-wrap: wrap; align-items: center; justify-content: flex-start; gap: 12px; padding: 6px 8px 10px; border-bottom: 1px solid #e5e7eb; box-sizing: border-box; }}
1982
+ .detail-toolbar .tabs {{ margin: 0; }}
1983
+ .toolbar-actions {{ display: flex; flex-wrap: wrap; align-items: center; gap: 10px; margin-left: auto; padding-right: 16px; }}
1984
+ .split-inline {{ display: flex; flex-wrap: wrap; align-items: center; gap: 6px; }}
1985
+ .split-inline select {{ padding: 6px 8px; border-radius: 8px; border: 1px solid #d0d7de; background: #fff; min-width: 140px; }}
1986
+ .split-actions {{ display: flex; align-items: center; justify-content: center; gap: 8px; }}
1987
+ .split-actions button {{ padding: 6px 10px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; min-width: 36px; }}
1988
+ .fullscreen-actions {{ display: flex; align-items: center; gap: 6px; }}
1989
+ .fullscreen-actions button {{ padding: 6px 10px; border-radius: 8px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; }}
1990
+ .fullscreen-exit {{ display: none; }}
1991
+ body.detail-fullscreen {{ overflow: hidden; --outline-top: 16px; }}
1992
+ body.detail-fullscreen header {{ display: none; }}
1993
+ body.detail-fullscreen .container {{ max-width: 100%; padding: 0; }}
1994
+ body.detail-fullscreen .detail-shell {{
1995
+ position: fixed;
1996
+ inset: 0;
1997
+ padding: 12px 16px;
1998
+ background: #fff;
1999
+ z-index: 40;
2000
+ overflow: auto;
2001
+ }}
2002
+ body.detail-fullscreen .detail-toolbar {{ position: sticky; top: 0; background: #fff; z-index: 41; }}
2003
+ body.detail-fullscreen .fullscreen-enter {{ display: none; }}
2004
+ body.detail-fullscreen .fullscreen-exit {{ display: inline-flex; }}
2005
+ .detail-body {{ display: flex; flex-direction: column; gap: 8px; flex: 1; min-height: 0; }}
2006
+ .help-icon {{ display: inline-flex; align-items: center; justify-content: center; width: 18px; height: 18px; border-radius: 50%; border: 1px solid #d0d7de; color: #57606a; font-size: 12px; cursor: default; position: relative; }}
2007
+ .help-icon::after {{ content: attr(data-tip); display: none; position: absolute; top: 24px; right: 0; background: #0b1220; color: #e6edf3; padding: 8px 10px; border-radius: 8px; font-size: 12px; white-space: pre-line; width: 260px; z-index: 20; }}
2008
+ .help-icon:hover::after {{ display: block; }}
2009
+ .stats {{ margin: 12px 0 6px; }}
2010
+ .stats-row {{ display: flex; flex-wrap: wrap; gap: 6px; align-items: center; }}
2011
+ .stats-label {{ font-weight: 600; color: #0b1220; margin-right: 4px; }}
2012
+ .pill.stat {{ background: #f6f8fa; border-color: #c7d2e0; color: #1f2a37; }}
1207
2013
  pre {{ overflow: auto; padding: 10px; background: #0b1220; color: #e6edf3; border-radius: 10px; }}
1208
2014
  code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
1209
2015
  a {{ color: #0969da; }}
2016
+ @media (max-width: 768px) {{
2017
+ .detail-toolbar {{
2018
+ flex-wrap: nowrap;
2019
+ overflow-x: auto;
2020
+ padding-bottom: 8px;
2021
+ }}
2022
+ .detail-toolbar::-webkit-scrollbar {{ height: 6px; }}
2023
+ .detail-toolbar::-webkit-scrollbar-thumb {{ background: #c7d2e0; border-radius: 999px; }}
2024
+ .detail-toolbar .tabs,
2025
+ .toolbar-actions {{
2026
+ flex: 0 0 auto;
2027
+ }}
2028
+ }}
1210
2029
  </style>
1211
2030
  {extra_head}
1212
2031
  </head>
1213
2032
  <body>
1214
- <header>
1215
- <a href="/">Papers</a>
1216
- <a href="/stats">Stats</a>
1217
- </header>
2033
+ {header_html}
1218
2034
  <div class="container">
1219
2035
  {body_html}
1220
2036
  </div>
@@ -1253,106 +2069,362 @@ def _build_pdfjs_viewer_url(pdf_url: str) -> str:
1253
2069
  return f"{_PDFJS_VIEWER_PATH}?file={encoded}"
1254
2070
 
1255
2071
 
1256
- async def _index_page(request: Request) -> HTMLResponse:
1257
- return HTMLResponse(
1258
- _page_shell(
1259
- "Paper DB",
1260
- """
1261
- <h2>Paper Database</h2>
1262
- <div class="card">
1263
- <div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
1264
- <div style="display:flex; gap:8px; margin-top:8px;">
1265
- <input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
1266
- <select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
1267
- <option value="summary" selected>Open: Summary</option>
1268
- <option value="source">Open: Source</option>
1269
- <option value="pdf">Open: PDF</option>
1270
- <option value="pdfjs">Open: PDF Viewer</option>
1271
- <option value="split">Open: Split</option>
1272
- </select>
1273
- </div>
1274
- <details style="margin-top:10px;">
1275
- <summary>Advanced search</summary>
1276
- <div style="margin-top:10px;" class="muted">Build a query:</div>
1277
- <div class="filters" style="grid-template-columns: repeat(3, 1fr);">
1278
- <input id="advTitle" placeholder="title contains..." />
1279
- <input id="advAuthor" placeholder="author contains..." />
1280
- <input id="advTag" placeholder="tag (comma separated)" />
1281
- <input id="advYear" placeholder="year (e.g. 2020..2024)" />
1282
- <input id="advMonth" placeholder="month (01-12)" />
1283
- <input id="advVenue" placeholder="venue contains..." />
1284
- </div>
1285
- <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
1286
- <button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
1287
- <div class="muted">Generated: <code id="generated"></code></div>
1288
- </div>
1289
- </details>
2072
+ def _outline_assets(outline_top: str) -> tuple[str, str, str]:
2073
+ outline_html = """
2074
+ <button id="outlineToggle" class="outline-toggle" title="Toggle outline">☰</button>
2075
+ <div id="outlinePanel" class="outline-panel collapsed">
2076
+ <div class="outline-title">Outline</div>
2077
+ <div id="outlineList" class="outline-list"></div>
1290
2078
  </div>
1291
- <div id="results"></div>
1292
- <div id="loading" class="muted">Loading...</div>
1293
- <script>
1294
- let page = 1;
1295
- let loading = false;
1296
- let done = false;
1297
-
1298
- function currentParams(nextPage) {
1299
- const params = new URLSearchParams();
1300
- params.set("page", String(nextPage));
1301
- params.set("page_size", "30");
1302
- const q = document.getElementById("query").value.trim();
1303
- if (q) params.set("q", q);
1304
- return params;
1305
- }
1306
-
1307
- function escapeHtml(text) {
1308
- const div = document.createElement("div");
1309
- div.textContent = text;
1310
- return div.innerHTML;
1311
- }
1312
-
1313
- function viewSuffixForItem(item) {
1314
- const view = document.getElementById("openView").value;
1315
- if (!view || view === "summary") return "";
1316
- const params = new URLSearchParams();
1317
- params.set("view", view);
1318
- if (view === "split") {
1319
- params.set("left", "summary");
1320
- if (item.has_pdf) {
1321
- params.set("right", "pdfjs");
1322
- } else if (item.has_source) {
1323
- params.set("right", "source");
1324
- } else {
1325
- params.set("right", "summary");
1326
- }
1327
- }
1328
- return `?${params.toString()}`;
1329
- }
1330
-
2079
+ <button id="backToTop" class="back-to-top" title="Back to top">↑</button>
2080
+ """
2081
+ outline_css = f"""
2082
+ <style>
2083
+ :root {{
2084
+ --outline-top: {outline_top};
2085
+ }}
2086
+ .outline-toggle {{
2087
+ position: fixed;
2088
+ top: var(--outline-top);
2089
+ left: 16px;
2090
+ z-index: 20;
2091
+ padding: 6px 10px;
2092
+ border-radius: 8px;
2093
+ border: 1px solid #d0d7de;
2094
+ background: #f6f8fa;
2095
+ cursor: pointer;
2096
+ }}
2097
+ .outline-panel {{
2098
+ position: fixed;
2099
+ top: calc(var(--outline-top) + 42px);
2100
+ left: 16px;
2101
+ width: 240px;
2102
+ max-height: 60vh;
2103
+ overflow: auto;
2104
+ border: 1px solid #d0d7de;
2105
+ border-radius: 10px;
2106
+ background: #ffffff;
2107
+ padding: 10px;
2108
+ z-index: 20;
2109
+ box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
2110
+ }}
2111
+ .outline-panel.collapsed {{
2112
+ display: none;
2113
+ }}
2114
+ .outline-title {{
2115
+ font-size: 12px;
2116
+ text-transform: uppercase;
2117
+ letter-spacing: 0.08em;
2118
+ color: #57606a;
2119
+ margin-bottom: 8px;
2120
+ }}
2121
+ .outline-list a {{
2122
+ display: block;
2123
+ color: #0969da;
2124
+ text-decoration: none;
2125
+ padding: 4px 0;
2126
+ }}
2127
+ .outline-list a:hover {{
2128
+ text-decoration: underline;
2129
+ }}
2130
+ .back-to-top {{
2131
+ position: fixed;
2132
+ left: 16px;
2133
+ bottom: 16px;
2134
+ padding: 6px 10px;
2135
+ border-radius: 999px;
2136
+ border: 1px solid #d0d7de;
2137
+ background: #ffffff;
2138
+ cursor: pointer;
2139
+ opacity: 0;
2140
+ pointer-events: none;
2141
+ transition: opacity 0.2s ease;
2142
+ z-index: 20;
2143
+ }}
2144
+ .back-to-top.visible {{
2145
+ opacity: 1;
2146
+ pointer-events: auto;
2147
+ }}
2148
+ @media (max-width: 900px) {{
2149
+ .outline-panel {{
2150
+ width: 200px;
2151
+ }}
2152
+ }}
2153
+ </style>
2154
+ """
2155
+ outline_js = """
2156
+ const outlineToggle = document.getElementById('outlineToggle');
2157
+ const outlinePanel = document.getElementById('outlinePanel');
2158
+ const outlineList = document.getElementById('outlineList');
2159
+ const backToTop = document.getElementById('backToTop');
2160
+
2161
+ function slugify(text) {
2162
+ return text.toLowerCase().trim()
2163
+ .replace(/[^a-z0-9\\s-]/g, '')
2164
+ .replace(/\\s+/g, '-')
2165
+ .replace(/-+/g, '-');
2166
+ }
2167
+
2168
+ function buildOutline() {
2169
+ if (!outlineList) return;
2170
+ const content = document.getElementById('content');
2171
+ if (!content) return;
2172
+ const headings = content.querySelectorAll('h1, h2, h3, h4');
2173
+ if (!headings.length) {
2174
+ outlineList.innerHTML = '<div class="muted">No headings</div>';
2175
+ return;
2176
+ }
2177
+ const used = new Set();
2178
+ outlineList.innerHTML = '';
2179
+ headings.forEach((heading) => {
2180
+ let id = heading.id;
2181
+ if (!id) {
2182
+ const base = slugify(heading.textContent || 'section') || 'section';
2183
+ id = base;
2184
+ let i = 1;
2185
+ while (used.has(id) || document.getElementById(id)) {
2186
+ id = `${base}-${i++}`;
2187
+ }
2188
+ heading.id = id;
2189
+ }
2190
+ used.add(id);
2191
+ const level = parseInt(heading.tagName.slice(1), 10) || 1;
2192
+ const link = document.createElement('a');
2193
+ link.href = `#${id}`;
2194
+ link.textContent = heading.textContent || '';
2195
+ link.style.paddingLeft = `${(level - 1) * 12}px`;
2196
+ outlineList.appendChild(link);
2197
+ });
2198
+ }
2199
+
2200
+ function toggleBackToTop() {
2201
+ if (!backToTop) return;
2202
+ if (window.scrollY > 300) {
2203
+ backToTop.classList.add('visible');
2204
+ } else {
2205
+ backToTop.classList.remove('visible');
2206
+ }
2207
+ }
2208
+
2209
+ if (outlineToggle && outlinePanel) {
2210
+ outlineToggle.addEventListener('click', () => {
2211
+ outlinePanel.classList.toggle('collapsed');
2212
+ });
2213
+ }
2214
+
2215
+ if (backToTop) {
2216
+ backToTop.addEventListener('click', () => {
2217
+ window.scrollTo({ top: 0, behavior: 'smooth' });
2218
+ });
2219
+ }
2220
+
2221
+ buildOutline();
2222
+ window.addEventListener('scroll', toggleBackToTop);
2223
+ toggleBackToTop();
2224
+ """
2225
+ return outline_html, outline_css, outline_js
2226
+
2227
+
2228
+ async def _index_page(request: Request) -> HTMLResponse:
2229
+ index: PaperIndex = request.app.state.index
2230
+ template_options = "".join(
2231
+ f'<option value="{html.escape(tag)}">{html.escape(tag)}</option>'
2232
+ for tag in index.template_tags
2233
+ )
2234
+ if not template_options:
2235
+ template_options = '<option value="" disabled>(no templates)</option>'
2236
+ filter_help = (
2237
+ "Filters syntax:\\n"
2238
+ "pdf:yes|no source:yes|no summary:yes|no\\n"
2239
+ "tmpl:<tag> or template:<tag>\\n"
2240
+ "has:pdf / no:source aliases\\n"
2241
+ "Content tags still use the search box (tag:fpga)."
2242
+ )
2243
+ filter_help_attr = html.escape(filter_help).replace("\n", "&#10;")
2244
+ body_html = """
2245
+ <h2>Paper Database</h2>
2246
+ <div class="card">
2247
+ <div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
2248
+ <div style="display:flex; gap:8px; margin-top:8px;">
2249
+ <input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
2250
+ <select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
2251
+ <option value="summary" selected>Open: Summary</option>
2252
+ <option value="source">Open: Source</option>
2253
+ <option value="pdf">Open: PDF</option>
2254
+ <option value="pdfjs">Open: PDF Viewer</option>
2255
+ <option value="split">Open: Split</option>
2256
+ </select>
2257
+ </div>
2258
+ <div class="filters" style="grid-template-columns: repeat(4, 1fr); margin-top:10px;">
2259
+ <div class="filter-group">
2260
+ <label>PDF</label>
2261
+ <select id="filterPdf" multiple size="2">
2262
+ <option value="with">With</option>
2263
+ <option value="without">Without</option>
2264
+ </select>
2265
+ </div>
2266
+ <div class="filter-group">
2267
+ <label>Source</label>
2268
+ <select id="filterSource" multiple size="2">
2269
+ <option value="with">With</option>
2270
+ <option value="without">Without</option>
2271
+ </select>
2272
+ </div>
2273
+ <div class="filter-group">
2274
+ <label>Summary</label>
2275
+ <select id="filterSummary" multiple size="2">
2276
+ <option value="with">With</option>
2277
+ <option value="without">Without</option>
2278
+ </select>
2279
+ </div>
2280
+ <div class="filter-group">
2281
+ <label>Template</label>
2282
+ <select id="filterTemplate" multiple size="4">
2283
+ __TEMPLATE_OPTIONS__
2284
+ </select>
2285
+ </div>
2286
+ </div>
2287
+ <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
2288
+ <input id="filterQuery" placeholder='Filters... e.g. pdf:yes tmpl:simple' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
2289
+ <span class="help-icon" data-tip="__FILTER_HELP__">?</span>
2290
+ </div>
2291
+ <details style="margin-top:10px;">
2292
+ <summary>Advanced search</summary>
2293
+ <div style="margin-top:10px;" class="muted">Build a query:</div>
2294
+ <div class="filters" style="grid-template-columns: repeat(3, 1fr);">
2295
+ <input id="advTitle" placeholder="title contains..." />
2296
+ <input id="advAuthor" placeholder="author contains..." />
2297
+ <input id="advTag" placeholder="tag (comma separated)" />
2298
+ <input id="advYear" placeholder="year (e.g. 2020..2024)" />
2299
+ <input id="advMonth" placeholder="month (01-12)" />
2300
+ <input id="advVenue" placeholder="venue contains..." />
2301
+ </div>
2302
+ <div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
2303
+ <button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
2304
+ <div class="muted">Generated: <code id="generated"></code></div>
2305
+ </div>
2306
+ </details>
2307
+ </div>
2308
+ <div id="stats" class="stats">
2309
+ <div id="statsTotal" class="stats-row"></div>
2310
+ <div id="statsFiltered" class="stats-row" style="margin-top:6px;"></div>
2311
+ </div>
2312
+ <div id="results"></div>
2313
+ <div id="loading" class="muted">Loading...</div>
2314
+ <script>
2315
+ let page = 1;
2316
+ let loading = false;
2317
+ let done = false;
2318
+
2319
+ function currentParams(nextPage) {
2320
+ const params = new URLSearchParams();
2321
+ params.set("page", String(nextPage));
2322
+ params.set("page_size", "30");
2323
+ const q = document.getElementById("query").value.trim();
2324
+ if (q) params.set("q", q);
2325
+ const fq = document.getElementById("filterQuery").value.trim();
2326
+ if (fq) params.set("fq", fq);
2327
+ function addMulti(id, key) {
2328
+ const el = document.getElementById(id);
2329
+ const values = Array.from(el.selectedOptions).map(opt => opt.value).filter(Boolean);
2330
+ for (const value of values) {
2331
+ params.append(key, value);
2332
+ }
2333
+ }
2334
+ addMulti("filterPdf", "pdf");
2335
+ addMulti("filterSource", "source");
2336
+ addMulti("filterSummary", "summary");
2337
+ addMulti("filterTemplate", "template");
2338
+ return params;
2339
+ }
2340
+
2341
+ function escapeHtml(text) {
2342
+ const div = document.createElement("div");
2343
+ div.textContent = text;
2344
+ return div.innerHTML;
2345
+ }
2346
+
2347
+ function viewSuffixForItem(item) {
2348
+ let view = document.getElementById("openView").value;
2349
+ const isPdfOnly = item.is_pdf_only;
2350
+ const pdfFallback = item.has_pdf ? "pdfjs" : "pdf";
2351
+ if (isPdfOnly && (view === "summary" || view === "source")) {
2352
+ view = pdfFallback;
2353
+ }
2354
+ if (!view || view === "summary") return "";
2355
+ const params = new URLSearchParams();
2356
+ params.set("view", view);
2357
+ if (view === "split") {
2358
+ if (isPdfOnly) {
2359
+ params.set("left", pdfFallback);
2360
+ params.set("right", pdfFallback);
2361
+ } else {
2362
+ params.set("left", "summary");
2363
+ if (item.has_pdf) {
2364
+ params.set("right", "pdfjs");
2365
+ } else if (item.has_source) {
2366
+ params.set("right", "source");
2367
+ } else {
2368
+ params.set("right", "summary");
2369
+ }
2370
+ }
2371
+ }
2372
+ return `?${params.toString()}`;
2373
+ }
2374
+
1331
2375
  function renderItem(item) {
1332
2376
  const tags = (item.tags || []).map(t => `<span class="pill">${escapeHtml(t)}</span>`).join("");
2377
+ const templateTags = (item.template_tags || []).map(t => `<span class="pill template">tmpl:${escapeHtml(t)}</span>`).join("");
1333
2378
  const authors = (item.authors || []).slice(0, 6).map(a => escapeHtml(a)).join(", ");
1334
2379
  const meta = `${escapeHtml(item.year || "")}-${escapeHtml(item.month || "")} · ${escapeHtml(item.venue || "")}`;
1335
2380
  const viewSuffix = viewSuffixForItem(item);
1336
2381
  const badges = [
1337
2382
  item.has_source ? `<span class="pill">source</span>` : "",
1338
2383
  item.has_pdf ? `<span class="pill">pdf</span>` : "",
2384
+ item.is_pdf_only ? `<span class="pill pdf-only">pdf-only</span>` : "",
1339
2385
  ].join("");
1340
2386
  return `
1341
2387
  <div class="card">
1342
2388
  <div><a href="/paper/${encodeURIComponent(item.source_hash)}${viewSuffix}">${escapeHtml(item.title || "")}</a></div>
1343
2389
  <div class="muted">${authors}</div>
1344
2390
  <div class="muted">${meta}</div>
1345
- <div style="margin-top:6px">${badges} ${tags}</div>
2391
+ <div style="margin-top:6px">${badges} ${templateTags} ${tags}</div>
1346
2392
  </div>
1347
2393
  `;
1348
2394
  }
1349
2395
 
2396
+ function renderStatsRow(targetId, label, counts) {
2397
+ const row = document.getElementById(targetId);
2398
+ if (!row || !counts) return;
2399
+ const pills = [];
2400
+ pills.push(`<span class="stats-label">${escapeHtml(label)}</span>`);
2401
+ pills.push(`<span class="pill stat">Count ${counts.total}</span>`);
2402
+ pills.push(`<span class="pill stat">PDF ${counts.pdf}</span>`);
2403
+ pills.push(`<span class="pill stat">Source ${counts.source}</span>`);
2404
+ pills.push(`<span class="pill stat">Summary ${counts.summary}</span>`);
2405
+ const order = counts.template_order || Object.keys(counts.templates || {});
2406
+ for (const tag of order) {
2407
+ const count = (counts.templates && counts.templates[tag]) || 0;
2408
+ pills.push(`<span class="pill stat">tmpl:${escapeHtml(tag)} ${count}</span>`);
2409
+ }
2410
+ row.innerHTML = pills.join("");
2411
+ }
2412
+
2413
+ function updateStats(stats) {
2414
+ if (!stats) return;
2415
+ renderStatsRow("statsTotal", "Total", stats.all);
2416
+ renderStatsRow("statsFiltered", "Filtered", stats.filtered);
2417
+ }
2418
+
1350
2419
  async function loadMore() {
1351
2420
  if (loading || done) return;
1352
2421
  loading = true;
1353
2422
  document.getElementById("loading").textContent = "Loading...";
1354
2423
  const res = await fetch(`/api/papers?${currentParams(page).toString()}`);
1355
2424
  const data = await res.json();
2425
+ if (data.stats) {
2426
+ updateStats(data.stats);
2427
+ }
1356
2428
  const results = document.getElementById("results");
1357
2429
  for (const item of data.items) {
1358
2430
  results.insertAdjacentHTML("beforeend", renderItem(item));
@@ -1376,6 +2448,11 @@ function resetAndLoad() {
1376
2448
 
1377
2449
  document.getElementById("query").addEventListener("change", resetAndLoad);
1378
2450
  document.getElementById("openView").addEventListener("change", resetAndLoad);
2451
+ document.getElementById("filterQuery").addEventListener("change", resetAndLoad);
2452
+ document.getElementById("filterPdf").addEventListener("change", resetAndLoad);
2453
+ document.getElementById("filterSource").addEventListener("change", resetAndLoad);
2454
+ document.getElementById("filterSummary").addEventListener("change", resetAndLoad);
2455
+ document.getElementById("filterTemplate").addEventListener("change", resetAndLoad);
1379
2456
 
1380
2457
  document.getElementById("buildQuery").addEventListener("click", () => {
1381
2458
  function add(field, value) {
@@ -1416,9 +2493,10 @@ window.addEventListener("scroll", () => {
1416
2493
 
1417
2494
  loadMore();
1418
2495
  </script>
1419
- """,
1420
- )
1421
- )
2496
+ """
2497
+ body_html = body_html.replace("__TEMPLATE_OPTIONS__", template_options)
2498
+ body_html = body_html.replace("__FILTER_HELP__", filter_help_attr)
2499
+ return HTMLResponse(_page_shell("Paper DB", body_html))
1422
2500
 
1423
2501
 
1424
2502
  def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
@@ -1429,11 +2507,21 @@ def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
1429
2507
  page_size = min(max(1, page_size), 200)
1430
2508
 
1431
2509
  q = qp.get("q", "").strip()
2510
+ filter_query = qp.get("fq", "").strip()
2511
+ pdf_filters = [item for item in qp.getlist("pdf") if item]
2512
+ source_filters = [item for item in qp.getlist("source") if item]
2513
+ summary_filters = [item for item in qp.getlist("summary") if item]
2514
+ template_filters = [item for item in qp.getlist("template") if item]
1432
2515
 
1433
2516
  return {
1434
2517
  "page": page,
1435
2518
  "page_size": page_size,
1436
2519
  "q": q,
2520
+ "filter_query": filter_query,
2521
+ "pdf": pdf_filters,
2522
+ "source": source_filters,
2523
+ "summary": summary_filters,
2524
+ "template": template_filters,
1437
2525
  }
1438
2526
 
1439
2527
 
@@ -1443,13 +2531,55 @@ async def _api_papers(request: Request) -> JSONResponse:
1443
2531
  page = int(filters["page"])
1444
2532
  page_size = int(filters["page_size"])
1445
2533
  q = str(filters["q"])
2534
+ filter_query = str(filters["filter_query"])
1446
2535
  query = parse_query(q)
1447
2536
  candidate = _apply_query(index, query)
2537
+ filter_terms = _parse_filter_query(filter_query)
2538
+ pdf_filter = _merge_filter_set(_presence_filter(filters["pdf"]), _presence_filter(list(filter_terms["pdf"])))
2539
+ source_filter = _merge_filter_set(
2540
+ _presence_filter(filters["source"]), _presence_filter(list(filter_terms["source"]))
2541
+ )
2542
+ summary_filter = _merge_filter_set(
2543
+ _presence_filter(filters["summary"]), _presence_filter(list(filter_terms["summary"]))
2544
+ )
2545
+ template_selected = {item.lower() for item in filters["template"] if item}
2546
+ template_filter = _merge_filter_set(
2547
+ template_selected or None,
2548
+ filter_terms["template"] or None,
2549
+ )
2550
+
2551
+ if candidate:
2552
+ filtered: set[int] = set()
2553
+ for idx in candidate:
2554
+ paper = index.papers[idx]
2555
+ source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
2556
+ has_source = source_hash in index.md_path_by_hash
2557
+ has_pdf = source_hash in index.pdf_path_by_hash
2558
+ has_summary = bool(paper.get("_has_summary"))
2559
+ if not _matches_presence(pdf_filter, has_pdf):
2560
+ continue
2561
+ if not _matches_presence(source_filter, has_source):
2562
+ continue
2563
+ if not _matches_presence(summary_filter, has_summary):
2564
+ continue
2565
+ if template_filter:
2566
+ tags = paper.get("_template_tags_lc") or []
2567
+ if not any(tag in template_filter for tag in tags):
2568
+ continue
2569
+ filtered.add(idx)
2570
+ candidate = filtered
1448
2571
  ordered = [idx for idx in index.ordered_ids if idx in candidate]
1449
2572
  total = len(ordered)
1450
2573
  start = (page - 1) * page_size
1451
2574
  end = min(start + page_size, total)
1452
2575
  page_ids = ordered[start:end]
2576
+ stats_payload = None
2577
+ if page == 1:
2578
+ all_ids = set(index.ordered_ids)
2579
+ stats_payload = {
2580
+ "all": _compute_counts(index, all_ids),
2581
+ "filtered": _compute_counts(index, candidate),
2582
+ }
1453
2583
 
1454
2584
  items: list[dict[str, Any]] = []
1455
2585
  for idx in page_ids:
@@ -1464,8 +2594,11 @@ async def _api_papers(request: Request) -> JSONResponse:
1464
2594
  "month": paper.get("_month") or "",
1465
2595
  "venue": paper.get("_venue") or "",
1466
2596
  "tags": paper.get("_tags") or [],
2597
+ "template_tags": paper.get("_template_tags") or [],
1467
2598
  "has_source": source_hash in index.md_path_by_hash,
1468
2599
  "has_pdf": source_hash in index.pdf_path_by_hash,
2600
+ "has_summary": bool(paper.get("_has_summary")),
2601
+ "is_pdf_only": bool(paper.get("_is_pdf_only")),
1469
2602
  }
1470
2603
  )
1471
2604
 
@@ -1476,6 +2609,7 @@ async def _api_papers(request: Request) -> JSONResponse:
1476
2609
  "total": total,
1477
2610
  "has_more": end < total,
1478
2611
  "items": items,
2612
+ "stats": stats_payload,
1479
2613
  }
1480
2614
  )
1481
2615
 
@@ -1488,28 +2622,45 @@ async def _paper_detail(request: Request) -> HTMLResponse:
1488
2622
  if idx is None:
1489
2623
  return RedirectResponse("/")
1490
2624
  paper = index.papers[idx]
1491
- view = request.query_params.get("view", "summary")
2625
+ is_pdf_only = bool(paper.get("_is_pdf_only"))
2626
+ page_title = str(paper.get("paper_title") or "Paper")
2627
+ view = request.query_params.get("view")
1492
2628
  template_param = request.query_params.get("template")
1493
2629
  embed = request.query_params.get("embed") == "1"
1494
- if view == "split":
1495
- embed = False
1496
2630
 
1497
2631
  pdf_path = index.pdf_path_by_hash.get(source_hash)
1498
2632
  pdf_url = f"/api/pdf/{source_hash}"
1499
- shell = _embed_shell if embed else _page_shell
1500
2633
  source_available = source_hash in index.md_path_by_hash
1501
- allowed_views = {"summary", "source", "pdf", "pdfjs"}
2634
+ allowed_views = {"summary", "source", "pdf", "pdfjs", "split"}
2635
+ if is_pdf_only:
2636
+ allowed_views = {"pdf", "pdfjs", "split"}
1502
2637
 
1503
2638
  def normalize_view(value: str | None, default: str) -> str:
1504
2639
  if value in allowed_views:
1505
2640
  return value
1506
2641
  return default
1507
2642
 
1508
- default_right = "pdfjs" if pdf_path else ("source" if source_available else "summary")
1509
- left_param = request.query_params.get("left")
1510
- right_param = request.query_params.get("right")
1511
- left = normalize_view(left_param, "summary") if left_param else "summary"
1512
- right = normalize_view(right_param, default_right) if right_param else default_right
2643
+ preferred_pdf_view = "pdfjs" if pdf_path else "pdf"
2644
+ default_view = preferred_pdf_view if is_pdf_only else "summary"
2645
+ view = normalize_view(view, default_view)
2646
+ if view == "split":
2647
+ embed = False
2648
+ if is_pdf_only:
2649
+ left_param = request.query_params.get("left")
2650
+ right_param = request.query_params.get("right")
2651
+ left = normalize_view(left_param, preferred_pdf_view) if left_param else preferred_pdf_view
2652
+ right = normalize_view(right_param, preferred_pdf_view) if right_param else preferred_pdf_view
2653
+ else:
2654
+ default_right = "pdfjs" if pdf_path else ("source" if source_available else "summary")
2655
+ left_param = request.query_params.get("left")
2656
+ right_param = request.query_params.get("right")
2657
+ left = normalize_view(left_param, "summary") if left_param else "summary"
2658
+ right = normalize_view(right_param, default_right) if right_param else default_right
2659
+
2660
+ def render_page(title: str, body: str, extra_head: str = "", extra_scripts: str = "") -> HTMLResponse:
2661
+ if embed:
2662
+ return HTMLResponse(_embed_shell(title, body, extra_head, extra_scripts))
2663
+ return HTMLResponse(_page_shell(title, body, extra_head, extra_scripts, header_title=page_title))
1513
2664
 
1514
2665
  def nav_link(label: str, v: str) -> str:
1515
2666
  active = " active" if view == v else ""
@@ -1522,16 +2673,83 @@ async def _paper_detail(request: Request) -> HTMLResponse:
1522
2673
  href = f"/paper/{source_hash}?{urlencode(params)}"
1523
2674
  return f'<a class="tab{active}" href="{html.escape(href)}">{html.escape(label)}</a>'
1524
2675
 
1525
- nav = f"""
1526
- <div class="tabs" style="margin: 8px 0 14px;">
1527
- {nav_link("Summary", "summary")}
1528
- {nav_link("Source", "source")}
1529
- {nav_link("PDF", "pdf")}
1530
- {nav_link("PDF Viewer", "pdfjs")}
1531
- {nav_link("Split", "split")}
2676
+ tab_defs = [
2677
+ ("Summary", "summary"),
2678
+ ("Source", "source"),
2679
+ ("PDF", "pdf"),
2680
+ ("PDF Viewer", "pdfjs"),
2681
+ ("Split", "split"),
2682
+ ]
2683
+ if is_pdf_only:
2684
+ tab_defs = [
2685
+ ("PDF", "pdf"),
2686
+ ("PDF Viewer", "pdfjs"),
2687
+ ("Split", "split"),
2688
+ ]
2689
+ tabs_html = '<div class="tabs">' + "".join(nav_link(label, v) for label, v in tab_defs) + "</div>"
2690
+ fullscreen_controls = """
2691
+ <div class="fullscreen-actions">
2692
+ <button id="fullscreenEnter" class="fullscreen-enter" type="button" title="Enter fullscreen">Fullscreen</button>
2693
+ <button id="fullscreenExit" class="fullscreen-exit" type="button" title="Exit fullscreen">Exit Fullscreen</button>
2694
+ </div>
2695
+ """
2696
+
2697
+ def detail_toolbar(extra_controls: str = "") -> str:
2698
+ if embed:
2699
+ return ""
2700
+ controls = extra_controls.strip()
2701
+ toolbar_controls = f"{controls}{fullscreen_controls}" if controls else fullscreen_controls
2702
+ return f"""
2703
+ <div class="detail-toolbar">
2704
+ {tabs_html}
2705
+ <div class="toolbar-actions">
2706
+ {toolbar_controls}
2707
+ </div>
2708
+ </div>
2709
+ """
2710
+
2711
+ def wrap_detail(content: str, toolbar_html: str | None = None) -> str:
2712
+ if embed:
2713
+ return content
2714
+ toolbar = detail_toolbar() if toolbar_html is None else toolbar_html
2715
+ return f"""
2716
+ <div class="detail-shell">
2717
+ {toolbar}
2718
+ <div class="detail-body">
2719
+ {content}
2720
+ </div>
1532
2721
  </div>
1533
2722
  """
1534
- nav_html = "" if embed else nav
2723
+
2724
+ fullscreen_script = ""
2725
+ if not embed:
2726
+ fullscreen_script = """
2727
+ <script>
2728
+ const fullscreenEnter = document.getElementById('fullscreenEnter');
2729
+ const fullscreenExit = document.getElementById('fullscreenExit');
2730
+ function setFullscreen(enable) {
2731
+ document.body.classList.toggle('detail-fullscreen', enable);
2732
+ }
2733
+ if (fullscreenEnter) {
2734
+ fullscreenEnter.addEventListener('click', () => setFullscreen(true));
2735
+ }
2736
+ if (fullscreenExit) {
2737
+ fullscreenExit.addEventListener('click', () => setFullscreen(false));
2738
+ }
2739
+ document.addEventListener('keydown', (event) => {
2740
+ if (event.key === 'Escape' && document.body.classList.contains('detail-fullscreen')) {
2741
+ setFullscreen(false);
2742
+ }
2743
+ });
2744
+ </script>
2745
+ """
2746
+ pdf_only_warning_html = ""
2747
+ if is_pdf_only:
2748
+ pdf_only_warning_html = (
2749
+ '<div class="warning">PDF-only entry: summary and source views are unavailable.</div>'
2750
+ )
2751
+ outline_top = "72px" if not embed else "16px"
2752
+ outline_html, outline_css, outline_js = _outline_assets(outline_top)
1535
2753
 
1536
2754
  if view == "split":
1537
2755
  def pane_src(pane_view: str) -> str:
@@ -1550,6 +2768,11 @@ async def _paper_detail(request: Request) -> HTMLResponse:
1550
2768
  ("pdf", "PDF"),
1551
2769
  ("pdfjs", "PDF Viewer"),
1552
2770
  ]
2771
+ if is_pdf_only:
2772
+ options = [
2773
+ ("pdf", "PDF"),
2774
+ ("pdfjs", "PDF Viewer"),
2775
+ ]
1553
2776
  left_options = "\n".join(
1554
2777
  f'<option value="{value}"{" selected" if value == left else ""}>{label}</option>'
1555
2778
  for value, label in options
@@ -1558,28 +2781,26 @@ async def _paper_detail(request: Request) -> HTMLResponse:
1558
2781
  f'<option value="{value}"{" selected" if value == right else ""}>{label}</option>'
1559
2782
  for value, label in options
1560
2783
  )
1561
- body = f"""
1562
- <h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>
1563
- {nav}
1564
- <div class="split-controls">
1565
- <div>
1566
- <div class="muted">Left pane</div>
1567
- <select id="splitLeft">
1568
- {left_options}
1569
- </select>
1570
- </div>
2784
+ split_controls = f"""
2785
+ <div class="split-inline">
2786
+ <span class="muted">Left</span>
2787
+ <select id="splitLeft">
2788
+ {left_options}
2789
+ </select>
1571
2790
  <div class="split-actions">
1572
2791
  <button id="splitTighten" type="button" title="Tighten width">-</button>
1573
2792
  <button id="splitSwap" type="button" title="Swap panes">⇄</button>
1574
2793
  <button id="splitWiden" type="button" title="Widen width">+</button>
1575
2794
  </div>
1576
- <div>
1577
- <div class="muted">Right pane</div>
1578
- <select id="splitRight">
1579
- {right_options}
1580
- </select>
1581
- </div>
2795
+ <span class="muted">Right</span>
2796
+ <select id="splitRight">
2797
+ {right_options}
2798
+ </select>
1582
2799
  </div>
2800
+ """
2801
+ toolbar_html = detail_toolbar(split_controls)
2802
+ split_layout = f"""
2803
+ {pdf_only_warning_html}
1583
2804
  <div class="split-layout">
1584
2805
  <div class="split-pane">
1585
2806
  <iframe id="leftPane" src="{html.escape(left_src)}" title="Left pane"></iframe>
@@ -1589,6 +2810,7 @@ async def _paper_detail(request: Request) -> HTMLResponse:
1589
2810
  </div>
1590
2811
  </div>
1591
2812
  """
2813
+ body = wrap_detail(split_layout, toolbar_html=toolbar_html)
1592
2814
  extra_head = """
1593
2815
  <style>
1594
2816
  .container {
@@ -1596,43 +2818,14 @@ async def _paper_detail(request: Request) -> HTMLResponse:
1596
2818
  width: 100%;
1597
2819
  margin: 0 auto;
1598
2820
  }
1599
- .split-controls {
1600
- display: grid;
1601
- grid-template-columns: 1fr auto 1fr;
1602
- gap: 12px;
1603
- align-items: end;
1604
- margin: 10px 0 14px;
1605
- }
1606
- .split-controls select {
1607
- padding: 6px 8px;
1608
- border-radius: 8px;
1609
- border: 1px solid #d0d7de;
1610
- background: #fff;
1611
- min-width: 160px;
1612
- }
1613
- .split-actions {
1614
- display: flex;
1615
- align-items: center;
1616
- justify-content: center;
1617
- gap: 8px;
1618
- height: 100%;
1619
- }
1620
- .split-actions button {
1621
- padding: 6px 10px;
1622
- border-radius: 999px;
1623
- border: 1px solid #d0d7de;
1624
- background: #f6f8fa;
1625
- cursor: pointer;
1626
- min-width: 36px;
1627
- }
1628
2821
  .split-layout {
1629
2822
  display: flex;
1630
2823
  gap: 12px;
1631
2824
  width: 100%;
1632
- max-width: min(100%, var(--split-max-width, 100%));
2825
+ max-width: var(--split-max-width, 100%);
1633
2826
  margin: 0 auto;
1634
- height: calc(100vh - 260px);
1635
- min-height: 420px;
2827
+ flex: 1;
2828
+ min-height: 440px;
1636
2829
  }
1637
2830
  .split-pane {
1638
2831
  flex: 1;
@@ -1649,14 +2842,11 @@ async def _paper_detail(request: Request) -> HTMLResponse:
1649
2842
  @media (max-width: 900px) {
1650
2843
  .split-layout {
1651
2844
  flex-direction: column;
1652
- height: auto;
2845
+ min-height: 0;
1653
2846
  }
1654
2847
  .split-pane {
1655
2848
  height: 70vh;
1656
2849
  }
1657
- .split-controls {
1658
- grid-template-columns: 1fr;
1659
- }
1660
2850
  }
1661
2851
  </style>
1662
2852
  """
@@ -1717,28 +2907,46 @@ widenButton.addEventListener('click', () => {
1717
2907
  applySplitWidth();
1718
2908
  </script>
1719
2909
  """
1720
- return HTMLResponse(_page_shell("Split View", body, extra_head=extra_head, extra_scripts=extra_scripts))
2910
+ return render_page(
2911
+ "Split View",
2912
+ body,
2913
+ extra_head=extra_head,
2914
+ extra_scripts=extra_scripts + fullscreen_script,
2915
+ )
1721
2916
 
1722
2917
  if view == "source":
1723
2918
  source_path = index.md_path_by_hash.get(source_hash)
1724
2919
  if not source_path:
1725
- body = nav_html + '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
1726
- return HTMLResponse(shell("Source", body))
2920
+ body = wrap_detail(
2921
+ '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
2922
+ )
2923
+ return render_page("Source", body, extra_scripts=fullscreen_script)
1727
2924
  try:
1728
2925
  raw = source_path.read_text(encoding="utf-8")
1729
2926
  except UnicodeDecodeError:
1730
2927
  raw = source_path.read_text(encoding="latin-1")
1731
2928
  rendered = _render_markdown_with_math_placeholders(md, raw)
1732
- body = (
1733
- nav_html
1734
- + f"<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>"
1735
- + f'<div class="muted">{html.escape(str(source_path))}</div>'
1736
- + '<div class="muted" style="margin-top:10px;">Rendered from source markdown:</div>'
1737
- + f'<div id="content">{rendered}</div>'
1738
- + "<details style='margin-top:12px;'><summary>Raw markdown</summary>"
1739
- + f"<pre><code>{html.escape(raw)}</code></pre></details>"
2929
+ body = wrap_detail(
2930
+ f"""
2931
+ <div class="muted">{html.escape(str(source_path))}</div>
2932
+ <div class="muted" style="margin-top:10px;">Rendered from source markdown:</div>
2933
+ {outline_html}
2934
+ <div id="content">{rendered}</div>
2935
+ <details style="margin-top:12px;"><summary>Raw markdown</summary>
2936
+ <pre><code>{html.escape(raw)}</code></pre>
2937
+ </details>
2938
+ """
1740
2939
  )
1741
- extra_head = f'<link rel="stylesheet" href="{_CDN_KATEX}" />'
2940
+ extra_head = f"""
2941
+ <link rel="stylesheet" href="{_CDN_KATEX}" />
2942
+ {outline_css}
2943
+ <style>
2944
+ #content img {{
2945
+ max-width: 100%;
2946
+ height: auto;
2947
+ }}
2948
+ </style>
2949
+ """
1742
2950
  extra_scripts = f"""
1743
2951
  <script src="{_CDN_MERMAID}"></script>
1744
2952
  <script src="{_CDN_KATEX_JS}"></script>
@@ -1766,16 +2974,18 @@ if (window.renderMathInElement) {{
1766
2974
  throwOnError: false
1767
2975
  }});
1768
2976
  }}
2977
+ {outline_js}
1769
2978
  </script>
1770
2979
  """
1771
- return HTMLResponse(shell("Source", body, extra_head=extra_head, extra_scripts=extra_scripts))
2980
+ return render_page("Source", body, extra_head=extra_head, extra_scripts=extra_scripts + fullscreen_script)
1772
2981
 
1773
2982
  if view == "pdf":
1774
2983
  if not pdf_path:
1775
- body = nav_html + '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
1776
- return HTMLResponse(shell("PDF", body))
1777
- body = nav_html + f"""
1778
- <h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>
2984
+ body = wrap_detail('<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>')
2985
+ return render_page("PDF", body, extra_scripts=fullscreen_script)
2986
+ body = wrap_detail(
2987
+ f"""
2988
+ {pdf_only_warning_html}
1779
2989
  <div class="muted">{html.escape(str(pdf_path.name))}</div>
1780
2990
  <div style="display:flex; gap:8px; align-items:center; margin: 10px 0;">
1781
2991
  <button id="prev" style="padding:6px 10px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Prev</button>
@@ -1787,6 +2997,7 @@ if (window.renderMathInElement) {{
1787
2997
  </div>
1788
2998
  <canvas id="the-canvas" style="width: 100%; border: 1px solid #d0d7de; border-radius: 10px;"></canvas>
1789
2999
  """
3000
+ )
1790
3001
  extra_scripts = f"""
1791
3002
  <script src="{_CDN_PDFJS}"></script>
1792
3003
  <script>
@@ -1875,25 +3086,21 @@ window.addEventListener('resize', () => {{
1875
3086
  }});
1876
3087
  </script>
1877
3088
  """
1878
- return HTMLResponse(shell("PDF", body, extra_scripts=extra_scripts))
3089
+ return render_page("PDF", body, extra_scripts=extra_scripts + fullscreen_script)
1879
3090
 
1880
3091
  if view == "pdfjs":
1881
3092
  if not pdf_path:
1882
- body = nav_html + '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
1883
- return HTMLResponse(shell("PDF Viewer", body))
3093
+ body = wrap_detail('<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>')
3094
+ return render_page("PDF Viewer", body, extra_scripts=fullscreen_script)
1884
3095
  viewer_url = _build_pdfjs_viewer_url(pdf_url)
1885
- header_html = ""
1886
- if not embed:
1887
- header_html = (
1888
- f"<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>"
1889
- + f'<div class="muted">{html.escape(str(pdf_path.name))}</div>'
1890
- )
1891
- frame_height = "calc(100vh - 220px)" if not embed else "calc(100vh - 32px)"
1892
- body = f"""
1893
- {nav_html}
1894
- {header_html}
3096
+ frame_height = "calc(100vh - 32px)" if embed else "100%"
3097
+ body = wrap_detail(
3098
+ f"""
3099
+ {pdf_only_warning_html}
3100
+ <div class="muted">{html.escape(str(pdf_path.name))}</div>
1895
3101
  <iframe class="pdfjs-frame" src="{html.escape(viewer_url)}" title="PDF.js Viewer"></iframe>
1896
3102
  """
3103
+ )
1897
3104
  extra_head = f"""
1898
3105
  <style>
1899
3106
  .pdfjs-frame {{
@@ -1901,10 +3108,11 @@ window.addEventListener('resize', () => {{
1901
3108
  height: {frame_height};
1902
3109
  border: 1px solid #d0d7de;
1903
3110
  border-radius: 10px;
3111
+ flex: 1;
1904
3112
  }}
1905
3113
  </style>
1906
3114
  """
1907
- return HTMLResponse(shell("PDF Viewer", body, extra_head=extra_head))
3115
+ return render_page("PDF Viewer", body, extra_head=extra_head, extra_scripts=fullscreen_script)
1908
3116
 
1909
3117
  selected_tag, available_templates = _select_template_tag(paper, template_param)
1910
3118
  markdown, template_name, warning = _render_paper_markdown(
@@ -1915,8 +3123,6 @@ window.addEventListener('resize', () => {{
1915
3123
  rendered_html = _render_markdown_with_math_placeholders(md, markdown)
1916
3124
 
1917
3125
  warning_html = f'<div class="warning">{html.escape(warning)}</div>' if warning else ""
1918
- title = str(paper.get("paper_title") or "Paper")
1919
- outline_top = "72px" if not embed else "16px"
1920
3126
  template_controls = f'<div class="muted">Template: {html.escape(template_name)}</div>'
1921
3127
  if available_templates:
1922
3128
  options = "\n".join(
@@ -1942,97 +3148,17 @@ if (templateSelect) {{
1942
3148
  }}
1943
3149
  </script>
1944
3150
  """
1945
- outline_html = """
1946
- <button id="outlineToggle" class="outline-toggle" title="Toggle outline">☰</button>
1947
- <div id="outlinePanel" class="outline-panel collapsed">
1948
- <div class="outline-title">Outline</div>
1949
- <div id="outlineList" class="outline-list"></div>
1950
- </div>
1951
- <button id="backToTop" class="back-to-top" title="Back to top">↑</button>
1952
- """
1953
- body = f"""
1954
- <h2>{html.escape(title)}</h2>
3151
+ content_html = f"""
1955
3152
  {template_controls}
1956
3153
  {warning_html}
1957
- {nav_html}
1958
3154
  {outline_html}
1959
3155
  <div id="content">{rendered_html}</div>
1960
3156
  """
3157
+ body = wrap_detail(content_html)
1961
3158
 
1962
3159
  extra_head = f"""
1963
3160
  <link rel="stylesheet" href="{_CDN_KATEX}" />
1964
- <style>
1965
- :root {{
1966
- --outline-top: {outline_top};
1967
- }}
1968
- .outline-toggle {{
1969
- position: fixed;
1970
- top: var(--outline-top);
1971
- left: 16px;
1972
- z-index: 20;
1973
- padding: 6px 10px;
1974
- border-radius: 8px;
1975
- border: 1px solid #d0d7de;
1976
- background: #f6f8fa;
1977
- cursor: pointer;
1978
- }}
1979
- .outline-panel {{
1980
- position: fixed;
1981
- top: calc(var(--outline-top) + 42px);
1982
- left: 16px;
1983
- width: 240px;
1984
- max-height: 60vh;
1985
- overflow: auto;
1986
- border: 1px solid #d0d7de;
1987
- border-radius: 10px;
1988
- background: #ffffff;
1989
- padding: 10px;
1990
- z-index: 20;
1991
- box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
1992
- }}
1993
- .outline-panel.collapsed {{
1994
- display: none;
1995
- }}
1996
- .outline-title {{
1997
- font-size: 12px;
1998
- text-transform: uppercase;
1999
- letter-spacing: 0.08em;
2000
- color: #57606a;
2001
- margin-bottom: 8px;
2002
- }}
2003
- .outline-list a {{
2004
- display: block;
2005
- color: #0969da;
2006
- text-decoration: none;
2007
- padding: 4px 0;
2008
- }}
2009
- .outline-list a:hover {{
2010
- text-decoration: underline;
2011
- }}
2012
- .back-to-top {{
2013
- position: fixed;
2014
- left: 16px;
2015
- bottom: 16px;
2016
- padding: 6px 10px;
2017
- border-radius: 999px;
2018
- border: 1px solid #d0d7de;
2019
- background: #ffffff;
2020
- cursor: pointer;
2021
- opacity: 0;
2022
- pointer-events: none;
2023
- transition: opacity 0.2s ease;
2024
- z-index: 20;
2025
- }}
2026
- .back-to-top.visible {{
2027
- opacity: 1;
2028
- pointer-events: auto;
2029
- }}
2030
- @media (max-width: 900px) {{
2031
- .outline-panel {{
2032
- width: 200px;
2033
- }}
2034
- }}
2035
- </style>
3161
+ {outline_css}
2036
3162
  """
2037
3163
  extra_scripts = f"""
2038
3164
  <script src="{_CDN_MERMAID}"></script>
@@ -2062,77 +3188,10 @@ if (window.renderMathInElement) {{
2062
3188
  throwOnError: false
2063
3189
  }});
2064
3190
  }}
2065
- const outlineToggle = document.getElementById('outlineToggle');
2066
- const outlinePanel = document.getElementById('outlinePanel');
2067
- const outlineList = document.getElementById('outlineList');
2068
- const backToTop = document.getElementById('backToTop');
2069
-
2070
- function slugify(text) {{
2071
- return text.toLowerCase().trim()
2072
- .replace(/[^a-z0-9\\s-]/g, '')
2073
- .replace(/\\s+/g, '-')
2074
- .replace(/-+/g, '-');
2075
- }}
2076
-
2077
- function buildOutline() {{
2078
- if (!outlineList) return;
2079
- const content = document.getElementById('content');
2080
- if (!content) return;
2081
- const headings = content.querySelectorAll('h1, h2, h3, h4');
2082
- if (!headings.length) {{
2083
- outlineList.innerHTML = '<div class="muted">No headings</div>';
2084
- return;
2085
- }}
2086
- const used = new Set();
2087
- outlineList.innerHTML = '';
2088
- headings.forEach((heading) => {{
2089
- let id = heading.id;
2090
- if (!id) {{
2091
- const base = slugify(heading.textContent || 'section') || 'section';
2092
- id = base;
2093
- let i = 1;
2094
- while (used.has(id) || document.getElementById(id)) {{
2095
- id = `${{base}}-${{i++}}`;
2096
- }}
2097
- heading.id = id;
2098
- }}
2099
- used.add(id);
2100
- const level = parseInt(heading.tagName.slice(1), 10) || 1;
2101
- const link = document.createElement('a');
2102
- link.href = `#${{id}}`;
2103
- link.textContent = heading.textContent || '';
2104
- link.style.paddingLeft = `${{(level - 1) * 12}}px`;
2105
- outlineList.appendChild(link);
2106
- }});
2107
- }}
2108
-
2109
- function toggleBackToTop() {{
2110
- if (!backToTop) return;
2111
- if (window.scrollY > 300) {{
2112
- backToTop.classList.add('visible');
2113
- }} else {{
2114
- backToTop.classList.remove('visible');
2115
- }}
2116
- }}
2117
-
2118
- if (outlineToggle && outlinePanel) {{
2119
- outlineToggle.addEventListener('click', () => {{
2120
- outlinePanel.classList.toggle('collapsed');
2121
- }});
2122
- }}
2123
-
2124
- if (backToTop) {{
2125
- backToTop.addEventListener('click', () => {{
2126
- window.scrollTo({{ top: 0, behavior: 'smooth' }});
2127
- }});
2128
- }}
2129
-
2130
- buildOutline();
2131
- window.addEventListener('scroll', toggleBackToTop);
2132
- toggleBackToTop();
3191
+ {outline_js}
2133
3192
  </script>
2134
3193
  """
2135
- return HTMLResponse(shell(title, body, extra_head=extra_head, extra_scripts=extra_scripts))
3194
+ return render_page(page_title, body, extra_head=extra_head, extra_scripts=extra_scripts + fullscreen_script)
2136
3195
 
2137
3196
 
2138
3197
  async def _api_stats(request: Request) -> JSONResponse:
@@ -2159,6 +3218,7 @@ async def _stats_page(request: Request) -> HTMLResponse:
2159
3218
  <div id="year" style="width:100%;height:360px"></div>
2160
3219
  <div id="month" style="width:100%;height:360px"></div>
2161
3220
  <div id="tags" style="width:100%;height:420px"></div>
3221
+ <div id="keywords" style="width:100%;height:420px"></div>
2162
3222
  <div id="authors" style="width:100%;height:420px"></div>
2163
3223
  <div id="venues" style="width:100%;height:420px"></div>
2164
3224
  """
@@ -2185,6 +3245,7 @@ async function main() {{
2185
3245
  bar('year', 'Publication Year', data.years || []);
2186
3246
  bar('month', 'Publication Month', data.months || []);
2187
3247
  bar('tags', 'Top Tags', (data.tags || []).slice(0, 20));
3248
+ bar('keywords', 'Top Keywords', (data.keywords || []).slice(0, 20));
2188
3249
  bar('authors', 'Top Authors', (data.authors || []).slice(0, 20));
2189
3250
  bar('venues', 'Top Venues', (data.venues || []).slice(0, 20));
2190
3251
  }}
@@ -2272,7 +3333,7 @@ def create_app(
2272
3333
  cache_dir: Path | None = None,
2273
3334
  use_cache: bool = True,
2274
3335
  ) -> Starlette:
2275
- papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache)
3336
+ papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache, pdf_roots=pdf_roots)
2276
3337
 
2277
3338
  md_roots = md_roots or []
2278
3339
  pdf_roots = pdf_roots or []