deepresearch-flow 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepresearch_flow/__main__.py +7 -0
- deepresearch_flow/paper/db.py +34 -1
- deepresearch_flow/paper/extract.py +10 -3
- deepresearch_flow/paper/web/app.py +1417 -356
- {deepresearch_flow-0.1.1.dist-info → deepresearch_flow-0.2.0.dist-info}/METADATA +14 -6
- {deepresearch_flow-0.1.1.dist-info → deepresearch_flow-0.2.0.dist-info}/RECORD +10 -9
- {deepresearch_flow-0.1.1.dist-info → deepresearch_flow-0.2.0.dist-info}/WHEEL +0 -0
- {deepresearch_flow-0.1.1.dist-info → deepresearch_flow-0.2.0.dist-info}/entry_points.txt +0 -0
- {deepresearch_flow-0.1.1.dist-info → deepresearch_flow-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {deepresearch_flow-0.1.1.dist-info → deepresearch_flow-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import html
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
+
import unicodedata
|
|
6
7
|
from dataclasses import dataclass
|
|
7
8
|
from html.parser import HTMLParser
|
|
8
9
|
from pathlib import Path
|
|
@@ -32,6 +33,12 @@ try:
|
|
|
32
33
|
except Exception:
|
|
33
34
|
PYBTEX_AVAILABLE = False
|
|
34
35
|
|
|
36
|
+
try:
|
|
37
|
+
from pypdf import PdfReader
|
|
38
|
+
PYPDF_AVAILABLE = True
|
|
39
|
+
except Exception:
|
|
40
|
+
PYPDF_AVAILABLE = False
|
|
41
|
+
|
|
35
42
|
|
|
36
43
|
_CDN_ECHARTS = "https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"
|
|
37
44
|
_CDN_MERMAID = "https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"
|
|
@@ -60,6 +67,7 @@ class PaperIndex:
|
|
|
60
67
|
stats: dict[str, Any]
|
|
61
68
|
md_path_by_hash: dict[str, Path]
|
|
62
69
|
pdf_path_by_hash: dict[str, Path]
|
|
70
|
+
template_tags: list[str]
|
|
63
71
|
|
|
64
72
|
|
|
65
73
|
def _split_csv(values: list[str]) -> list[str]:
|
|
@@ -189,6 +197,41 @@ def _extract_tags(paper: dict[str, Any]) -> list[str]:
|
|
|
189
197
|
return []
|
|
190
198
|
|
|
191
199
|
|
|
200
|
+
def _extract_keywords(paper: dict[str, Any]) -> list[str]:
|
|
201
|
+
keywords = paper.get("keywords") or []
|
|
202
|
+
if isinstance(keywords, list):
|
|
203
|
+
return [str(keyword).strip() for keyword in keywords if str(keyword).strip()]
|
|
204
|
+
if isinstance(keywords, str):
|
|
205
|
+
parts = re.split(r"[;,]", keywords)
|
|
206
|
+
return [part.strip() for part in parts if part.strip()]
|
|
207
|
+
return []
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
_SUMMARY_FIELDS = (
|
|
211
|
+
"summary",
|
|
212
|
+
"abstract",
|
|
213
|
+
"keywords",
|
|
214
|
+
"question1",
|
|
215
|
+
"question2",
|
|
216
|
+
"question3",
|
|
217
|
+
"question4",
|
|
218
|
+
"question5",
|
|
219
|
+
"question6",
|
|
220
|
+
"question7",
|
|
221
|
+
"question8",
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _has_summary(paper: dict[str, Any], template_tags: list[str]) -> bool:
|
|
226
|
+
if template_tags:
|
|
227
|
+
return True
|
|
228
|
+
for key in _SUMMARY_FIELDS:
|
|
229
|
+
value = paper.get(key)
|
|
230
|
+
if isinstance(value, str) and value.strip():
|
|
231
|
+
return True
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
|
|
192
235
|
def _extract_venue(paper: dict[str, Any]) -> str:
|
|
193
236
|
if isinstance(paper.get("bibtex"), dict):
|
|
194
237
|
bib = paper.get("bibtex") or {}
|
|
@@ -223,13 +266,16 @@ def build_index(
|
|
|
223
266
|
year_counts: dict[str, int] = {}
|
|
224
267
|
month_counts: dict[str, int] = {}
|
|
225
268
|
tag_counts: dict[str, int] = {}
|
|
269
|
+
keyword_counts: dict[str, int] = {}
|
|
226
270
|
author_counts: dict[str, int] = {}
|
|
227
271
|
venue_counts: dict[str, int] = {}
|
|
272
|
+
template_tag_counts: dict[str, int] = {}
|
|
228
273
|
|
|
229
274
|
def add_index(index: dict[str, set[int]], key: str, idx: int) -> None:
|
|
230
275
|
index.setdefault(key, set()).add(idx)
|
|
231
276
|
|
|
232
277
|
for idx, paper in enumerate(papers):
|
|
278
|
+
is_pdf_only = bool(paper.get("_is_pdf_only"))
|
|
233
279
|
source_hash = paper.get("source_hash")
|
|
234
280
|
if not source_hash and paper.get("source_path"):
|
|
235
281
|
source_hash = stable_hash(str(paper.get("source_path")))
|
|
@@ -258,31 +304,54 @@ def build_index(
|
|
|
258
304
|
paper["_month"] = month_label
|
|
259
305
|
add_index(by_year, _normalize_key(year_label), idx)
|
|
260
306
|
add_index(by_month, _normalize_key(month_label), idx)
|
|
261
|
-
|
|
262
|
-
|
|
307
|
+
if not is_pdf_only:
|
|
308
|
+
year_counts[year_label] = year_counts.get(year_label, 0) + 1
|
|
309
|
+
month_counts[month_label] = month_counts.get(month_label, 0) + 1
|
|
263
310
|
|
|
264
311
|
venue = _extract_venue(paper).strip()
|
|
265
312
|
paper["_venue"] = venue
|
|
266
313
|
if venue:
|
|
267
314
|
add_index(by_venue, _normalize_key(venue), idx)
|
|
268
|
-
|
|
315
|
+
if not is_pdf_only:
|
|
316
|
+
venue_counts[venue] = venue_counts.get(venue, 0) + 1
|
|
269
317
|
else:
|
|
270
318
|
add_index(by_venue, "unknown", idx)
|
|
271
|
-
|
|
319
|
+
if not is_pdf_only:
|
|
320
|
+
venue_counts["Unknown"] = venue_counts.get("Unknown", 0) + 1
|
|
272
321
|
|
|
273
322
|
authors = _extract_authors(paper)
|
|
274
323
|
paper["_authors"] = authors
|
|
275
324
|
for author in authors:
|
|
276
325
|
key = _normalize_key(author)
|
|
277
326
|
add_index(by_author, key, idx)
|
|
278
|
-
|
|
327
|
+
if not is_pdf_only:
|
|
328
|
+
author_counts[author] = author_counts.get(author, 0) + 1
|
|
279
329
|
|
|
280
330
|
tags = _extract_tags(paper)
|
|
281
331
|
paper["_tags"] = tags
|
|
282
332
|
for tag in tags:
|
|
283
333
|
key = _normalize_key(tag)
|
|
284
334
|
add_index(by_tag, key, idx)
|
|
285
|
-
|
|
335
|
+
if not is_pdf_only:
|
|
336
|
+
tag_counts[tag] = tag_counts.get(tag, 0) + 1
|
|
337
|
+
|
|
338
|
+
keywords = _extract_keywords(paper)
|
|
339
|
+
paper["_keywords"] = keywords
|
|
340
|
+
for keyword in keywords:
|
|
341
|
+
if not is_pdf_only:
|
|
342
|
+
keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
|
|
343
|
+
|
|
344
|
+
template_tags = _available_templates(paper)
|
|
345
|
+
if not template_tags:
|
|
346
|
+
fallback_tag = paper.get("template_tag") or paper.get("prompt_template")
|
|
347
|
+
if fallback_tag:
|
|
348
|
+
template_tags = [str(fallback_tag)]
|
|
349
|
+
paper["_template_tags"] = template_tags
|
|
350
|
+
paper["_template_tags_lc"] = [tag.lower() for tag in template_tags]
|
|
351
|
+
paper["_has_summary"] = _has_summary(paper, template_tags)
|
|
352
|
+
if not is_pdf_only:
|
|
353
|
+
for tag in template_tags:
|
|
354
|
+
template_tag_counts[tag] = template_tag_counts.get(tag, 0) + 1
|
|
286
355
|
|
|
287
356
|
search_parts = [title, venue, " ".join(authors), " ".join(tags)]
|
|
288
357
|
paper["_search_lc"] = " ".join(part for part in search_parts if part).lower()
|
|
@@ -305,15 +374,19 @@ def build_index(
|
|
|
305
374
|
|
|
306
375
|
ordered_ids = [idx for idx, _ in sorted(enumerate(papers), key=year_sort_key)]
|
|
307
376
|
|
|
377
|
+
stats_total = sum(1 for paper in papers if not paper.get("_is_pdf_only"))
|
|
308
378
|
stats = {
|
|
309
|
-
"total":
|
|
379
|
+
"total": stats_total,
|
|
310
380
|
"years": _sorted_counts(year_counts, numeric_desc=True),
|
|
311
381
|
"months": _sorted_month_counts(month_counts),
|
|
312
382
|
"tags": _sorted_counts(tag_counts),
|
|
383
|
+
"keywords": _sorted_counts(keyword_counts),
|
|
313
384
|
"authors": _sorted_counts(author_counts),
|
|
314
385
|
"venues": _sorted_counts(venue_counts),
|
|
315
386
|
}
|
|
316
387
|
|
|
388
|
+
template_tags = sorted(template_tag_counts.keys(), key=lambda item: item.lower())
|
|
389
|
+
|
|
317
390
|
return PaperIndex(
|
|
318
391
|
papers=papers,
|
|
319
392
|
id_by_hash=id_by_hash,
|
|
@@ -326,6 +399,7 @@ def build_index(
|
|
|
326
399
|
stats=stats,
|
|
327
400
|
md_path_by_hash=md_path_by_hash,
|
|
328
401
|
pdf_path_by_hash=pdf_path_by_hash,
|
|
402
|
+
template_tags=template_tags,
|
|
329
403
|
)
|
|
330
404
|
|
|
331
405
|
|
|
@@ -422,7 +496,11 @@ def _infer_template_tag(papers: list[dict[str, Any]], path: Path) -> str:
|
|
|
422
496
|
return best_tag
|
|
423
497
|
|
|
424
498
|
|
|
425
|
-
def _build_cache_meta(
|
|
499
|
+
def _build_cache_meta(
|
|
500
|
+
db_paths: list[Path],
|
|
501
|
+
bibtex_path: Path | None,
|
|
502
|
+
pdf_roots_meta: list[dict[str, Any]] | None = None,
|
|
503
|
+
) -> dict[str, Any]:
|
|
426
504
|
def file_meta(path: Path) -> dict[str, Any]:
|
|
427
505
|
try:
|
|
428
506
|
stats = path.stat()
|
|
@@ -435,6 +513,8 @@ def _build_cache_meta(db_paths: list[Path], bibtex_path: Path | None) -> dict[st
|
|
|
435
513
|
"inputs": [file_meta(path) for path in db_paths],
|
|
436
514
|
"bibtex": file_meta(bibtex_path) if bibtex_path else None,
|
|
437
515
|
}
|
|
516
|
+
if pdf_roots_meta is not None:
|
|
517
|
+
meta["pdf_roots"] = pdf_roots_meta
|
|
438
518
|
return meta
|
|
439
519
|
|
|
440
520
|
|
|
@@ -462,16 +542,72 @@ def _write_cached_papers(cache_dir: Path, meta: dict[str, Any], papers: list[dic
|
|
|
462
542
|
data_path.write_text(json.dumps(papers, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
463
543
|
|
|
464
544
|
|
|
545
|
+
def _extract_year_for_matching(paper: dict[str, Any]) -> str | None:
|
|
546
|
+
if isinstance(paper.get("bibtex"), dict):
|
|
547
|
+
fields = paper.get("bibtex", {}).get("fields", {}) or {}
|
|
548
|
+
year = fields.get("year")
|
|
549
|
+
if year and str(year).isdigit():
|
|
550
|
+
return str(year)
|
|
551
|
+
parsed_year, _ = _parse_year_month(str(paper.get("publication_date") or ""))
|
|
552
|
+
return parsed_year
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _prepare_paper_matching_fields(paper: dict[str, Any]) -> None:
|
|
556
|
+
if "_authors" not in paper:
|
|
557
|
+
paper["_authors"] = _extract_authors(paper)
|
|
558
|
+
if "_year" not in paper:
|
|
559
|
+
paper["_year"] = _extract_year_for_matching(paper) or ""
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _build_pdf_only_entries(
|
|
563
|
+
papers: list[dict[str, Any]],
|
|
564
|
+
pdf_paths: list[Path],
|
|
565
|
+
pdf_index: dict[str, list[Path]],
|
|
566
|
+
) -> list[dict[str, Any]]:
|
|
567
|
+
matched: set[Path] = set()
|
|
568
|
+
for paper in papers:
|
|
569
|
+
_prepare_paper_matching_fields(paper)
|
|
570
|
+
pdf_path = _resolve_pdf(paper, pdf_index)
|
|
571
|
+
if pdf_path:
|
|
572
|
+
matched.add(pdf_path.resolve())
|
|
573
|
+
|
|
574
|
+
entries: list[dict[str, Any]] = []
|
|
575
|
+
for path in pdf_paths:
|
|
576
|
+
resolved = path.resolve()
|
|
577
|
+
if resolved in matched:
|
|
578
|
+
continue
|
|
579
|
+
title = _read_pdf_metadata_title(resolved) or _extract_title_from_filename(resolved.name)
|
|
580
|
+
if not title:
|
|
581
|
+
title = resolved.stem
|
|
582
|
+
year_hint, author_hint = _extract_year_author_from_filename(resolved.name)
|
|
583
|
+
entry: dict[str, Any] = {
|
|
584
|
+
"paper_title": title,
|
|
585
|
+
"paper_authors": [author_hint] if author_hint else [],
|
|
586
|
+
"publication_date": year_hint or "",
|
|
587
|
+
"source_hash": stable_hash(str(resolved)),
|
|
588
|
+
"source_path": str(resolved),
|
|
589
|
+
"_is_pdf_only": True,
|
|
590
|
+
}
|
|
591
|
+
entries.append(entry)
|
|
592
|
+
return entries
|
|
593
|
+
|
|
594
|
+
|
|
465
595
|
def _load_or_merge_papers(
|
|
466
596
|
db_paths: list[Path],
|
|
467
597
|
bibtex_path: Path | None,
|
|
468
598
|
cache_dir: Path | None,
|
|
469
599
|
use_cache: bool,
|
|
600
|
+
pdf_roots: list[Path] | None = None,
|
|
470
601
|
) -> list[dict[str, Any]]:
|
|
471
602
|
cache_meta = None
|
|
603
|
+
pdf_roots = pdf_roots or []
|
|
604
|
+
pdf_paths: list[Path] = []
|
|
605
|
+
pdf_roots_meta: list[dict[str, Any]] | None = None
|
|
606
|
+
if pdf_roots:
|
|
607
|
+
pdf_paths, pdf_roots_meta = _scan_pdf_roots(pdf_roots)
|
|
472
608
|
if cache_dir and use_cache:
|
|
473
609
|
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
474
|
-
cache_meta = _build_cache_meta(db_paths, bibtex_path)
|
|
610
|
+
cache_meta = _build_cache_meta(db_paths, bibtex_path, pdf_roots_meta)
|
|
475
611
|
cached = _load_cached_papers(cache_dir, cache_meta)
|
|
476
612
|
if cached is not None:
|
|
477
613
|
return cached
|
|
@@ -481,6 +617,9 @@ def _load_or_merge_papers(
|
|
|
481
617
|
for bundle in inputs:
|
|
482
618
|
enrich_with_bibtex(bundle["papers"], bibtex_path)
|
|
483
619
|
papers = _merge_paper_inputs(inputs)
|
|
620
|
+
if pdf_paths:
|
|
621
|
+
pdf_index = _build_file_index_from_paths(pdf_paths, suffixes={".pdf"})
|
|
622
|
+
papers.extend(_build_pdf_only_entries(papers, pdf_paths, pdf_index))
|
|
484
623
|
|
|
485
624
|
if cache_dir and use_cache and cache_meta is not None:
|
|
486
625
|
_write_cached_papers(cache_dir, cache_meta, papers)
|
|
@@ -488,7 +627,18 @@ def _load_or_merge_papers(
|
|
|
488
627
|
|
|
489
628
|
|
|
490
629
|
def _md_renderer() -> MarkdownIt:
|
|
491
|
-
|
|
630
|
+
md = MarkdownIt("commonmark", {"html": False, "linkify": True})
|
|
631
|
+
md.enable("table")
|
|
632
|
+
return md
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _strip_paragraph_wrapped_tables(text: str) -> str:
|
|
636
|
+
lines = text.splitlines()
|
|
637
|
+
for idx, line in enumerate(lines):
|
|
638
|
+
line = re.sub(r"^\s*<p>\s*\|", "|", line)
|
|
639
|
+
line = re.sub(r"\|\s*</p>\s*$", "|", line)
|
|
640
|
+
lines[idx] = line
|
|
641
|
+
return "\n".join(lines)
|
|
492
642
|
|
|
493
643
|
|
|
494
644
|
def _normalize_merge_title(value: str | None) -> str | None:
|
|
@@ -648,6 +798,7 @@ def _merge_paper_inputs(inputs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
|
648
798
|
|
|
649
799
|
|
|
650
800
|
def _render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
|
|
801
|
+
text = _strip_paragraph_wrapped_tables(text)
|
|
651
802
|
rendered, table_placeholders = _extract_html_table_placeholders(text)
|
|
652
803
|
rendered, img_placeholders = _extract_html_img_placeholders(rendered)
|
|
653
804
|
rendered, placeholders = _extract_math_placeholders(rendered)
|
|
@@ -1062,6 +1213,369 @@ def _render_paper_markdown(
|
|
|
1062
1213
|
return template.render(**context), str(template_name), warning
|
|
1063
1214
|
|
|
1064
1215
|
|
|
1216
|
+
_TITLE_PREFIX_LEN = 16
|
|
1217
|
+
_TITLE_MIN_CHARS = 24
|
|
1218
|
+
_TITLE_MIN_TOKENS = 4
|
|
1219
|
+
_AUTHOR_YEAR_MIN_SIMILARITY = 0.8
|
|
1220
|
+
_LEADING_NUMERIC_MAX_LEN = 2
|
|
1221
|
+
_SIMILARITY_START = 0.95
|
|
1222
|
+
_SIMILARITY_STEP = 0.05
|
|
1223
|
+
_SIMILARITY_MAX_STEPS = 10
|
|
1224
|
+
|
|
1225
|
+
|
|
1226
|
+
def _normalize_title_key(title: str) -> str:
|
|
1227
|
+
value = unicodedata.normalize("NFKD", title)
|
|
1228
|
+
greek_map = {
|
|
1229
|
+
"α": "alpha",
|
|
1230
|
+
"β": "beta",
|
|
1231
|
+
"γ": "gamma",
|
|
1232
|
+
"δ": "delta",
|
|
1233
|
+
"ε": "epsilon",
|
|
1234
|
+
"ζ": "zeta",
|
|
1235
|
+
"η": "eta",
|
|
1236
|
+
"θ": "theta",
|
|
1237
|
+
"ι": "iota",
|
|
1238
|
+
"κ": "kappa",
|
|
1239
|
+
"λ": "lambda",
|
|
1240
|
+
"μ": "mu",
|
|
1241
|
+
"ν": "nu",
|
|
1242
|
+
"ξ": "xi",
|
|
1243
|
+
"ο": "omicron",
|
|
1244
|
+
"π": "pi",
|
|
1245
|
+
"ρ": "rho",
|
|
1246
|
+
"σ": "sigma",
|
|
1247
|
+
"τ": "tau",
|
|
1248
|
+
"υ": "upsilon",
|
|
1249
|
+
"φ": "phi",
|
|
1250
|
+
"χ": "chi",
|
|
1251
|
+
"ψ": "psi",
|
|
1252
|
+
"ω": "omega",
|
|
1253
|
+
}
|
|
1254
|
+
for char, name in greek_map.items():
|
|
1255
|
+
value = value.replace(char, f" {name} ")
|
|
1256
|
+
value = re.sub(
|
|
1257
|
+
r"\\(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)\b",
|
|
1258
|
+
r" \1 ",
|
|
1259
|
+
value,
|
|
1260
|
+
flags=re.IGNORECASE,
|
|
1261
|
+
)
|
|
1262
|
+
value = value.replace("{", "").replace("}", "")
|
|
1263
|
+
value = value.replace("_", " ")
|
|
1264
|
+
value = re.sub(r"([a-z])([0-9])", r"\1 \2", value, flags=re.IGNORECASE)
|
|
1265
|
+
value = re.sub(r"([0-9])([a-z])", r"\1 \2", value, flags=re.IGNORECASE)
|
|
1266
|
+
value = re.sub(r"[^a-z0-9]+", " ", value.lower())
|
|
1267
|
+
value = re.sub(r"\s+", " ", value).strip()
|
|
1268
|
+
tokens = value.split()
|
|
1269
|
+
if not tokens:
|
|
1270
|
+
return ""
|
|
1271
|
+
merged: list[str] = []
|
|
1272
|
+
idx = 0
|
|
1273
|
+
while idx < len(tokens):
|
|
1274
|
+
token = tokens[idx]
|
|
1275
|
+
if len(token) == 1 and idx + 1 < len(tokens):
|
|
1276
|
+
merged.append(token + tokens[idx + 1])
|
|
1277
|
+
idx += 2
|
|
1278
|
+
continue
|
|
1279
|
+
merged.append(token)
|
|
1280
|
+
idx += 1
|
|
1281
|
+
return " ".join(merged)
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
def _compact_title_key(title_key: str) -> str:
|
|
1285
|
+
return title_key.replace(" ", "")
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
def _strip_leading_numeric_tokens(title_key: str) -> str:
|
|
1289
|
+
tokens = title_key.split()
|
|
1290
|
+
idx = 0
|
|
1291
|
+
while idx < len(tokens):
|
|
1292
|
+
token = tokens[idx]
|
|
1293
|
+
if token.isdigit() and len(token) <= _LEADING_NUMERIC_MAX_LEN:
|
|
1294
|
+
idx += 1
|
|
1295
|
+
continue
|
|
1296
|
+
break
|
|
1297
|
+
if idx == 0:
|
|
1298
|
+
return title_key
|
|
1299
|
+
return " ".join(tokens[idx:])
|
|
1300
|
+
|
|
1301
|
+
|
|
1302
|
+
def _strip_pdf_hash_suffix(name: str) -> str:
|
|
1303
|
+
return re.sub(r"(?i)(\.pdf)(?:-[0-9a-f\-]{8,})$", r"\1", name)
|
|
1304
|
+
|
|
1305
|
+
|
|
1306
|
+
def _extract_title_from_filename(name: str) -> str:
|
|
1307
|
+
base = name
|
|
1308
|
+
lower = base.lower()
|
|
1309
|
+
if lower.endswith(".md"):
|
|
1310
|
+
base = base[:-3]
|
|
1311
|
+
lower = base.lower()
|
|
1312
|
+
if ".pdf-" in lower:
|
|
1313
|
+
base = _strip_pdf_hash_suffix(base)
|
|
1314
|
+
lower = base.lower()
|
|
1315
|
+
if lower.endswith(".pdf"):
|
|
1316
|
+
base = base[:-4]
|
|
1317
|
+
base = base.replace("_", " ").strip()
|
|
1318
|
+
match = re.match(r"\s*\d{4}\s*-\s*(.+)$", base)
|
|
1319
|
+
if match:
|
|
1320
|
+
return match.group(1).strip()
|
|
1321
|
+
match = re.match(r"\s*.+?\s*-\s*\d{4}\s*-\s*(.+)$", base)
|
|
1322
|
+
if match:
|
|
1323
|
+
return match.group(1).strip()
|
|
1324
|
+
return base.strip()
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
def _clean_pdf_metadata_title(value: str | None, path: Path) -> str | None:
|
|
1328
|
+
if not value:
|
|
1329
|
+
return None
|
|
1330
|
+
text = str(value).replace("\x00", "").strip()
|
|
1331
|
+
if not text:
|
|
1332
|
+
return None
|
|
1333
|
+
text = re.sub(r"(?i)^microsoft\\s+word\\s*-\\s*", "", text)
|
|
1334
|
+
text = re.sub(r"(?i)^pdf\\s*-\\s*", "", text)
|
|
1335
|
+
text = re.sub(r"(?i)^untitled\\b", "", text).strip()
|
|
1336
|
+
if text.lower().endswith(".pdf"):
|
|
1337
|
+
text = text[:-4].strip()
|
|
1338
|
+
if len(text) < 3:
|
|
1339
|
+
return None
|
|
1340
|
+
stem = path.stem.strip()
|
|
1341
|
+
if stem and text.lower() == stem.lower():
|
|
1342
|
+
return None
|
|
1343
|
+
return text
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
def _read_pdf_metadata_title(path: Path) -> str | None:
|
|
1347
|
+
if not PYPDF_AVAILABLE:
|
|
1348
|
+
return None
|
|
1349
|
+
try:
|
|
1350
|
+
reader = PdfReader(str(path))
|
|
1351
|
+
meta = reader.metadata
|
|
1352
|
+
title = meta.title if meta else None
|
|
1353
|
+
except Exception:
|
|
1354
|
+
return None
|
|
1355
|
+
return _clean_pdf_metadata_title(title, path)
|
|
1356
|
+
|
|
1357
|
+
|
|
1358
|
+
def _is_pdf_like(path: Path) -> bool:
|
|
1359
|
+
suffix = path.suffix.lower()
|
|
1360
|
+
if suffix == ".pdf":
|
|
1361
|
+
return True
|
|
1362
|
+
name_lower = path.name.lower()
|
|
1363
|
+
return ".pdf-" in name_lower and not name_lower.endswith(".md")
|
|
1364
|
+
|
|
1365
|
+
|
|
1366
|
+
def _scan_pdf_roots(roots: list[Path]) -> tuple[list[Path], list[dict[str, Any]]]:
|
|
1367
|
+
pdf_paths: list[Path] = []
|
|
1368
|
+
meta: list[dict[str, Any]] = []
|
|
1369
|
+
seen: set[Path] = set()
|
|
1370
|
+
for root in roots:
|
|
1371
|
+
try:
|
|
1372
|
+
if not root.exists() or not root.is_dir():
|
|
1373
|
+
continue
|
|
1374
|
+
except OSError:
|
|
1375
|
+
continue
|
|
1376
|
+
files: list[Path] = []
|
|
1377
|
+
for path in root.rglob("*"):
|
|
1378
|
+
try:
|
|
1379
|
+
if not path.is_file():
|
|
1380
|
+
continue
|
|
1381
|
+
except OSError:
|
|
1382
|
+
continue
|
|
1383
|
+
if not _is_pdf_like(path):
|
|
1384
|
+
continue
|
|
1385
|
+
resolved = path.resolve()
|
|
1386
|
+
if resolved in seen:
|
|
1387
|
+
continue
|
|
1388
|
+
seen.add(resolved)
|
|
1389
|
+
files.append(resolved)
|
|
1390
|
+
max_mtime = 0.0
|
|
1391
|
+
total_size = 0
|
|
1392
|
+
for path in files:
|
|
1393
|
+
try:
|
|
1394
|
+
stats = path.stat()
|
|
1395
|
+
except OSError:
|
|
1396
|
+
continue
|
|
1397
|
+
max_mtime = max(max_mtime, stats.st_mtime)
|
|
1398
|
+
total_size += stats.st_size
|
|
1399
|
+
pdf_paths.extend(files)
|
|
1400
|
+
meta.append(
|
|
1401
|
+
{
|
|
1402
|
+
"path": str(root),
|
|
1403
|
+
"count": len(files),
|
|
1404
|
+
"max_mtime": max_mtime,
|
|
1405
|
+
"size": total_size,
|
|
1406
|
+
}
|
|
1407
|
+
)
|
|
1408
|
+
return pdf_paths, meta
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
def _extract_year_author_from_filename(name: str) -> tuple[str | None, str | None]:
|
|
1412
|
+
base = name
|
|
1413
|
+
lower = base.lower()
|
|
1414
|
+
if lower.endswith(".md"):
|
|
1415
|
+
base = base[:-3]
|
|
1416
|
+
lower = base.lower()
|
|
1417
|
+
if ".pdf-" in lower:
|
|
1418
|
+
base = _strip_pdf_hash_suffix(base)
|
|
1419
|
+
lower = base.lower()
|
|
1420
|
+
if lower.endswith(".pdf"):
|
|
1421
|
+
base = base[:-4]
|
|
1422
|
+
match = re.match(r"\s*(.+?)\s*-\s*((?:19|20)\d{2})\s*-\s*", base)
|
|
1423
|
+
if match:
|
|
1424
|
+
return match.group(2), match.group(1).strip()
|
|
1425
|
+
match = re.match(r"\s*((?:19|20)\d{2})\s*-\s*", base)
|
|
1426
|
+
if match:
|
|
1427
|
+
return match.group(1), None
|
|
1428
|
+
return None, None
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
|
+
def _normalize_author_key(name: str) -> str:
|
|
1432
|
+
raw = name.lower().strip()
|
|
1433
|
+
raw = raw.replace("et al.", "").replace("et al", "")
|
|
1434
|
+
if "," in raw:
|
|
1435
|
+
raw = raw.split(",", 1)[0]
|
|
1436
|
+
raw = re.sub(r"[^a-z0-9]+", " ", raw)
|
|
1437
|
+
raw = re.sub(r"\s+", " ", raw).strip()
|
|
1438
|
+
if not raw:
|
|
1439
|
+
return ""
|
|
1440
|
+
parts = raw.split()
|
|
1441
|
+
return parts[-1] if parts else raw
|
|
1442
|
+
|
|
1443
|
+
|
|
1444
|
+
def _title_prefix_key(title_key: str) -> str | None:
|
|
1445
|
+
if len(title_key.split()) < _TITLE_MIN_TOKENS:
|
|
1446
|
+
return None
|
|
1447
|
+
compact = _compact_title_key(title_key)
|
|
1448
|
+
if len(compact) < _TITLE_PREFIX_LEN:
|
|
1449
|
+
return None
|
|
1450
|
+
prefix = compact[:_TITLE_PREFIX_LEN]
|
|
1451
|
+
if not prefix:
|
|
1452
|
+
return None
|
|
1453
|
+
return f"prefix:{prefix}"
|
|
1454
|
+
|
|
1455
|
+
|
|
1456
|
+
def _title_overlap_match(a: str, b: str) -> bool:
|
|
1457
|
+
if not a or not b:
|
|
1458
|
+
return False
|
|
1459
|
+
if a == b:
|
|
1460
|
+
return True
|
|
1461
|
+
shorter, longer = (a, b) if len(a) <= len(b) else (b, a)
|
|
1462
|
+
token_count = len(shorter.split())
|
|
1463
|
+
if len(shorter) >= _TITLE_MIN_CHARS or token_count >= _TITLE_MIN_TOKENS:
|
|
1464
|
+
if longer.startswith(shorter) or shorter in longer:
|
|
1465
|
+
return True
|
|
1466
|
+
return False
|
|
1467
|
+
|
|
1468
|
+
|
|
1469
|
+
def _adaptive_similarity_match(title_key: str, candidates: list[Path]) -> Path | None:
|
|
1470
|
+
if not title_key:
|
|
1471
|
+
return None
|
|
1472
|
+
scored: list[tuple[Path, float]] = []
|
|
1473
|
+
for path in candidates:
|
|
1474
|
+
candidate_title = _normalize_title_key(_extract_title_from_filename(path.name))
|
|
1475
|
+
if not candidate_title:
|
|
1476
|
+
continue
|
|
1477
|
+
if _title_overlap_match(title_key, candidate_title):
|
|
1478
|
+
return path
|
|
1479
|
+
scored.append((path, _title_similarity(title_key, candidate_title)))
|
|
1480
|
+
if not scored:
|
|
1481
|
+
return None
|
|
1482
|
+
|
|
1483
|
+
def matches_at(threshold: float) -> list[Path]:
|
|
1484
|
+
return [path for path, score in scored if score >= threshold]
|
|
1485
|
+
|
|
1486
|
+
threshold = _SIMILARITY_START
|
|
1487
|
+
step = _SIMILARITY_STEP
|
|
1488
|
+
prev_threshold = None
|
|
1489
|
+
prev_count = None
|
|
1490
|
+
for _ in range(_SIMILARITY_MAX_STEPS):
|
|
1491
|
+
matches = matches_at(threshold)
|
|
1492
|
+
if len(matches) == 1:
|
|
1493
|
+
return matches[0]
|
|
1494
|
+
if len(matches) == 0:
|
|
1495
|
+
prev_threshold = threshold
|
|
1496
|
+
prev_count = 0
|
|
1497
|
+
threshold -= step
|
|
1498
|
+
continue
|
|
1499
|
+
if prev_count == 0 and prev_threshold is not None:
|
|
1500
|
+
low = threshold
|
|
1501
|
+
high = prev_threshold
|
|
1502
|
+
for _ in range(_SIMILARITY_MAX_STEPS):
|
|
1503
|
+
mid = (low + high) / 2
|
|
1504
|
+
mid_matches = matches_at(mid)
|
|
1505
|
+
if len(mid_matches) == 1:
|
|
1506
|
+
return mid_matches[0]
|
|
1507
|
+
if len(mid_matches) == 0:
|
|
1508
|
+
high = mid
|
|
1509
|
+
else:
|
|
1510
|
+
low = mid
|
|
1511
|
+
return None
|
|
1512
|
+
prev_threshold = threshold
|
|
1513
|
+
prev_count = len(matches)
|
|
1514
|
+
threshold -= step
|
|
1515
|
+
return None
|
|
1516
|
+
|
|
1517
|
+
|
|
1518
|
+
def _resolve_by_title_and_meta(
|
|
1519
|
+
paper: dict[str, Any],
|
|
1520
|
+
file_index: dict[str, list[Path]],
|
|
1521
|
+
) -> Path | None:
|
|
1522
|
+
title = str(paper.get("paper_title") or "")
|
|
1523
|
+
title_key = _normalize_title_key(title)
|
|
1524
|
+
if not title_key:
|
|
1525
|
+
title_key = ""
|
|
1526
|
+
candidates = file_index.get(title_key, [])
|
|
1527
|
+
if candidates:
|
|
1528
|
+
return candidates[0]
|
|
1529
|
+
if title_key:
|
|
1530
|
+
compact_key = _compact_title_key(title_key)
|
|
1531
|
+
compact_candidates = file_index.get(f"compact:{compact_key}", [])
|
|
1532
|
+
if compact_candidates:
|
|
1533
|
+
return compact_candidates[0]
|
|
1534
|
+
stripped_key = _strip_leading_numeric_tokens(title_key)
|
|
1535
|
+
if stripped_key and stripped_key != title_key:
|
|
1536
|
+
stripped_candidates = file_index.get(stripped_key, [])
|
|
1537
|
+
if stripped_candidates:
|
|
1538
|
+
return stripped_candidates[0]
|
|
1539
|
+
stripped_compact = _compact_title_key(stripped_key)
|
|
1540
|
+
stripped_candidates = file_index.get(f"compact:{stripped_compact}", [])
|
|
1541
|
+
if stripped_candidates:
|
|
1542
|
+
return stripped_candidates[0]
|
|
1543
|
+
prefix_candidates: list[Path] = []
|
|
1544
|
+
prefix_key = _title_prefix_key(title_key)
|
|
1545
|
+
if prefix_key:
|
|
1546
|
+
prefix_candidates = file_index.get(prefix_key, [])
|
|
1547
|
+
if not prefix_candidates:
|
|
1548
|
+
stripped_key = _strip_leading_numeric_tokens(title_key)
|
|
1549
|
+
if stripped_key and stripped_key != title_key:
|
|
1550
|
+
prefix_key = _title_prefix_key(stripped_key)
|
|
1551
|
+
if prefix_key:
|
|
1552
|
+
prefix_candidates = file_index.get(prefix_key, [])
|
|
1553
|
+
if prefix_candidates:
|
|
1554
|
+
match = _adaptive_similarity_match(title_key, prefix_candidates)
|
|
1555
|
+
if match is not None:
|
|
1556
|
+
return match
|
|
1557
|
+
year = str(paper.get("_year") or "").strip()
|
|
1558
|
+
if not year.isdigit():
|
|
1559
|
+
return None
|
|
1560
|
+
author_key = ""
|
|
1561
|
+
authors = paper.get("_authors") or []
|
|
1562
|
+
if authors:
|
|
1563
|
+
author_key = _normalize_author_key(str(authors[0]))
|
|
1564
|
+
candidates = []
|
|
1565
|
+
if author_key:
|
|
1566
|
+
candidates = file_index.get(f"authoryear:{year}:{author_key}", [])
|
|
1567
|
+
if not candidates:
|
|
1568
|
+
candidates = file_index.get(f"year:{year}", [])
|
|
1569
|
+
if not candidates:
|
|
1570
|
+
return None
|
|
1571
|
+
if len(candidates) == 1 and not title_key:
|
|
1572
|
+
return candidates[0]
|
|
1573
|
+
match = _adaptive_similarity_match(title_key, candidates)
|
|
1574
|
+
if match is not None:
|
|
1575
|
+
return match
|
|
1576
|
+
return None
|
|
1577
|
+
|
|
1578
|
+
|
|
1065
1579
|
def _build_file_index(roots: list[Path], *, suffixes: set[str]) -> dict[str, list[Path]]:
|
|
1066
1580
|
index: dict[str, list[Path]] = {}
|
|
1067
1581
|
for root in roots:
|
|
@@ -1076,19 +1590,97 @@ def _build_file_index(roots: list[Path], *, suffixes: set[str]) -> dict[str, lis
|
|
|
1076
1590
|
continue
|
|
1077
1591
|
except OSError:
|
|
1078
1592
|
continue
|
|
1079
|
-
|
|
1593
|
+
suffix = path.suffix.lower()
|
|
1594
|
+
if suffix not in suffixes:
|
|
1595
|
+
name_lower = path.name.lower()
|
|
1596
|
+
if suffixes == {".pdf"} and ".pdf-" in name_lower and suffix != ".md":
|
|
1597
|
+
pass
|
|
1598
|
+
else:
|
|
1599
|
+
continue
|
|
1600
|
+
resolved = path.resolve()
|
|
1601
|
+
name_key = path.name.lower()
|
|
1602
|
+
index.setdefault(name_key, []).append(resolved)
|
|
1603
|
+
title_candidate = _extract_title_from_filename(path.name)
|
|
1604
|
+
title_key = _normalize_title_key(title_candidate)
|
|
1605
|
+
if title_key:
|
|
1606
|
+
if title_key != name_key:
|
|
1607
|
+
index.setdefault(title_key, []).append(resolved)
|
|
1608
|
+
compact_key = _compact_title_key(title_key)
|
|
1609
|
+
if compact_key:
|
|
1610
|
+
index.setdefault(f"compact:{compact_key}", []).append(resolved)
|
|
1611
|
+
prefix_key = _title_prefix_key(title_key)
|
|
1612
|
+
if prefix_key:
|
|
1613
|
+
index.setdefault(prefix_key, []).append(resolved)
|
|
1614
|
+
stripped_key = _strip_leading_numeric_tokens(title_key)
|
|
1615
|
+
if stripped_key and stripped_key != title_key:
|
|
1616
|
+
index.setdefault(stripped_key, []).append(resolved)
|
|
1617
|
+
stripped_compact = _compact_title_key(stripped_key)
|
|
1618
|
+
if stripped_compact:
|
|
1619
|
+
index.setdefault(f"compact:{stripped_compact}", []).append(resolved)
|
|
1620
|
+
stripped_prefix = _title_prefix_key(stripped_key)
|
|
1621
|
+
if stripped_prefix:
|
|
1622
|
+
index.setdefault(stripped_prefix, []).append(resolved)
|
|
1623
|
+
year_hint, author_hint = _extract_year_author_from_filename(path.name)
|
|
1624
|
+
if year_hint:
|
|
1625
|
+
index.setdefault(f"year:{year_hint}", []).append(resolved)
|
|
1626
|
+
if author_hint:
|
|
1627
|
+
author_key = _normalize_author_key(author_hint)
|
|
1628
|
+
if author_key:
|
|
1629
|
+
index.setdefault(f"authoryear:{year_hint}:{author_key}", []).append(resolved)
|
|
1630
|
+
return index
|
|
1631
|
+
|
|
1632
|
+
|
|
1633
|
+
def _build_file_index_from_paths(paths: list[Path], *, suffixes: set[str]) -> dict[str, list[Path]]:
|
|
1634
|
+
index: dict[str, list[Path]] = {}
|
|
1635
|
+
for path in paths:
|
|
1636
|
+
try:
|
|
1637
|
+
if not path.is_file():
|
|
1638
|
+
continue
|
|
1639
|
+
except OSError:
|
|
1640
|
+
continue
|
|
1641
|
+
suffix = path.suffix.lower()
|
|
1642
|
+
if suffix not in suffixes:
|
|
1643
|
+
name_lower = path.name.lower()
|
|
1644
|
+
if suffixes == {".pdf"} and ".pdf-" in name_lower and suffix != ".md":
|
|
1645
|
+
pass
|
|
1646
|
+
else:
|
|
1080
1647
|
continue
|
|
1081
|
-
|
|
1648
|
+
resolved = path.resolve()
|
|
1649
|
+
name_key = path.name.lower()
|
|
1650
|
+
index.setdefault(name_key, []).append(resolved)
|
|
1651
|
+
title_candidate = _extract_title_from_filename(path.name)
|
|
1652
|
+
title_key = _normalize_title_key(title_candidate)
|
|
1653
|
+
if title_key:
|
|
1654
|
+
if title_key != name_key:
|
|
1655
|
+
index.setdefault(title_key, []).append(resolved)
|
|
1656
|
+
compact_key = _compact_title_key(title_key)
|
|
1657
|
+
if compact_key:
|
|
1658
|
+
index.setdefault(f"compact:{compact_key}", []).append(resolved)
|
|
1659
|
+
prefix_key = _title_prefix_key(title_key)
|
|
1660
|
+
if prefix_key:
|
|
1661
|
+
index.setdefault(prefix_key, []).append(resolved)
|
|
1662
|
+
stripped_key = _strip_leading_numeric_tokens(title_key)
|
|
1663
|
+
if stripped_key and stripped_key != title_key:
|
|
1664
|
+
index.setdefault(stripped_key, []).append(resolved)
|
|
1665
|
+
stripped_compact = _compact_title_key(stripped_key)
|
|
1666
|
+
if stripped_compact:
|
|
1667
|
+
index.setdefault(f"compact:{stripped_compact}", []).append(resolved)
|
|
1668
|
+
stripped_prefix = _title_prefix_key(stripped_key)
|
|
1669
|
+
if stripped_prefix:
|
|
1670
|
+
index.setdefault(stripped_prefix, []).append(resolved)
|
|
1082
1671
|
return index
|
|
1083
1672
|
|
|
1084
1673
|
|
|
1085
1674
|
def _resolve_source_md(paper: dict[str, Any], md_index: dict[str, list[Path]]) -> Path | None:
|
|
1086
1675
|
source_path = paper.get("source_path")
|
|
1087
1676
|
if not source_path:
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1677
|
+
source_path = ""
|
|
1678
|
+
if source_path:
|
|
1679
|
+
name = Path(str(source_path)).name.lower()
|
|
1680
|
+
candidates = md_index.get(name, [])
|
|
1681
|
+
if candidates:
|
|
1682
|
+
return candidates[0]
|
|
1683
|
+
return _resolve_by_title_and_meta(paper, md_index)
|
|
1092
1684
|
|
|
1093
1685
|
|
|
1094
1686
|
def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
|
|
@@ -1102,6 +1694,8 @@ def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
|
|
|
1102
1694
|
if ".pdf-" in name.lower():
|
|
1103
1695
|
base = name[: name.lower().rfind(".pdf-") + 4]
|
|
1104
1696
|
return [Path(base).name]
|
|
1697
|
+
if name.lower().endswith(".pdf"):
|
|
1698
|
+
return [name]
|
|
1105
1699
|
if name.lower().endswith(".pdf.md"):
|
|
1106
1700
|
return [name[:-3]]
|
|
1107
1701
|
return []
|
|
@@ -1112,7 +1706,7 @@ def _resolve_pdf(paper: dict[str, Any], pdf_index: dict[str, list[Path]]) -> Pat
|
|
|
1112
1706
|
candidates = pdf_index.get(filename.lower(), [])
|
|
1113
1707
|
if candidates:
|
|
1114
1708
|
return candidates[0]
|
|
1115
|
-
return
|
|
1709
|
+
return _resolve_by_title_and_meta(paper, pdf_index)
|
|
1116
1710
|
|
|
1117
1711
|
|
|
1118
1712
|
def _ensure_under_roots(path: Path, roots: list[Path]) -> bool:
|
|
@@ -1126,6 +1720,150 @@ def _ensure_under_roots(path: Path, roots: list[Path]) -> bool:
|
|
|
1126
1720
|
return False
|
|
1127
1721
|
|
|
1128
1722
|
|
|
1723
|
+
_BOOL_TRUE = {"1", "true", "yes", "with", "has"}
|
|
1724
|
+
_BOOL_FALSE = {"0", "false", "no", "without"}
|
|
1725
|
+
|
|
1726
|
+
|
|
1727
|
+
def _tokenize_filter_query(text: str) -> list[str]:
|
|
1728
|
+
out: list[str] = []
|
|
1729
|
+
buf: list[str] = []
|
|
1730
|
+
in_quote = False
|
|
1731
|
+
|
|
1732
|
+
for ch in text:
|
|
1733
|
+
if ch == '"':
|
|
1734
|
+
in_quote = not in_quote
|
|
1735
|
+
continue
|
|
1736
|
+
if not in_quote and ch.isspace():
|
|
1737
|
+
token = "".join(buf).strip()
|
|
1738
|
+
if token:
|
|
1739
|
+
out.append(token)
|
|
1740
|
+
buf = []
|
|
1741
|
+
continue
|
|
1742
|
+
buf.append(ch)
|
|
1743
|
+
|
|
1744
|
+
token = "".join(buf).strip()
|
|
1745
|
+
if token:
|
|
1746
|
+
out.append(token)
|
|
1747
|
+
return out
|
|
1748
|
+
|
|
1749
|
+
|
|
1750
|
+
def _normalize_presence_value(value: str) -> str | None:
|
|
1751
|
+
token = value.strip().lower()
|
|
1752
|
+
if token in _BOOL_TRUE:
|
|
1753
|
+
return "with"
|
|
1754
|
+
if token in _BOOL_FALSE:
|
|
1755
|
+
return "without"
|
|
1756
|
+
return None
|
|
1757
|
+
|
|
1758
|
+
|
|
1759
|
+
def _parse_filter_query(text: str) -> dict[str, set[str]]:
|
|
1760
|
+
parsed = {
|
|
1761
|
+
"pdf": set(),
|
|
1762
|
+
"source": set(),
|
|
1763
|
+
"summary": set(),
|
|
1764
|
+
"template": set(),
|
|
1765
|
+
}
|
|
1766
|
+
for token in _tokenize_filter_query(text):
|
|
1767
|
+
if ":" not in token:
|
|
1768
|
+
continue
|
|
1769
|
+
key, raw_value = token.split(":", 1)
|
|
1770
|
+
key = key.strip().lower()
|
|
1771
|
+
raw_value = raw_value.strip()
|
|
1772
|
+
if not raw_value:
|
|
1773
|
+
continue
|
|
1774
|
+
if key in {"tmpl", "template"}:
|
|
1775
|
+
for part in raw_value.split(","):
|
|
1776
|
+
tag = part.strip()
|
|
1777
|
+
if tag:
|
|
1778
|
+
parsed["template"].add(tag.lower())
|
|
1779
|
+
continue
|
|
1780
|
+
if key in {"pdf", "source", "summary"}:
|
|
1781
|
+
for part in raw_value.split(","):
|
|
1782
|
+
normalized = _normalize_presence_value(part)
|
|
1783
|
+
if normalized:
|
|
1784
|
+
parsed[key].add(normalized)
|
|
1785
|
+
continue
|
|
1786
|
+
if key in {"has", "no"}:
|
|
1787
|
+
targets = [part.strip().lower() for part in raw_value.split(",") if part.strip()]
|
|
1788
|
+
for target in targets:
|
|
1789
|
+
if target not in {"pdf", "source", "summary"}:
|
|
1790
|
+
continue
|
|
1791
|
+
parsed[target].add("with" if key == "has" else "without")
|
|
1792
|
+
return parsed
|
|
1793
|
+
|
|
1794
|
+
|
|
1795
|
+
def _presence_filter(values: list[str]) -> set[str] | None:
|
|
1796
|
+
normalized = set()
|
|
1797
|
+
for value in values:
|
|
1798
|
+
token = _normalize_presence_value(value)
|
|
1799
|
+
if token:
|
|
1800
|
+
normalized.add(token)
|
|
1801
|
+
if not normalized or normalized == {"with", "without"}:
|
|
1802
|
+
return None
|
|
1803
|
+
return normalized
|
|
1804
|
+
|
|
1805
|
+
|
|
1806
|
+
def _merge_filter_set(primary: set[str] | None, secondary: set[str] | None) -> set[str] | None:
|
|
1807
|
+
if not primary:
|
|
1808
|
+
return secondary
|
|
1809
|
+
if not secondary:
|
|
1810
|
+
return primary
|
|
1811
|
+
return primary & secondary
|
|
1812
|
+
|
|
1813
|
+
|
|
1814
|
+
def _matches_presence(allowed: set[str] | None, has_value: bool) -> bool:
|
|
1815
|
+
if not allowed:
|
|
1816
|
+
return True
|
|
1817
|
+
if has_value and "with" in allowed:
|
|
1818
|
+
return True
|
|
1819
|
+
if not has_value and "without" in allowed:
|
|
1820
|
+
return True
|
|
1821
|
+
return False
|
|
1822
|
+
|
|
1823
|
+
|
|
1824
|
+
def _template_tag_map(index: PaperIndex) -> dict[str, str]:
|
|
1825
|
+
return {tag.lower(): tag for tag in index.template_tags}
|
|
1826
|
+
|
|
1827
|
+
|
|
1828
|
+
def _compute_counts(index: PaperIndex, ids: set[int]) -> dict[str, Any]:
|
|
1829
|
+
template_order = list(index.template_tags)
|
|
1830
|
+
template_counts = {tag: 0 for tag in template_order}
|
|
1831
|
+
pdf_count = 0
|
|
1832
|
+
source_count = 0
|
|
1833
|
+
summary_count = 0
|
|
1834
|
+
total_count = 0
|
|
1835
|
+
tag_map = _template_tag_map(index)
|
|
1836
|
+
|
|
1837
|
+
for idx in ids:
|
|
1838
|
+
paper = index.papers[idx]
|
|
1839
|
+
if paper.get("_is_pdf_only"):
|
|
1840
|
+
continue
|
|
1841
|
+
total_count += 1
|
|
1842
|
+
source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
|
|
1843
|
+
has_source = source_hash in index.md_path_by_hash
|
|
1844
|
+
has_pdf = source_hash in index.pdf_path_by_hash
|
|
1845
|
+
has_summary = bool(paper.get("_has_summary"))
|
|
1846
|
+
if has_source:
|
|
1847
|
+
source_count += 1
|
|
1848
|
+
if has_pdf:
|
|
1849
|
+
pdf_count += 1
|
|
1850
|
+
if has_summary:
|
|
1851
|
+
summary_count += 1
|
|
1852
|
+
for tag_lc in paper.get("_template_tags_lc") or []:
|
|
1853
|
+
display = tag_map.get(tag_lc)
|
|
1854
|
+
if display:
|
|
1855
|
+
template_counts[display] = template_counts.get(display, 0) + 1
|
|
1856
|
+
|
|
1857
|
+
return {
|
|
1858
|
+
"total": total_count,
|
|
1859
|
+
"pdf": pdf_count,
|
|
1860
|
+
"source": source_count,
|
|
1861
|
+
"summary": summary_count,
|
|
1862
|
+
"templates": template_counts,
|
|
1863
|
+
"template_order": template_order,
|
|
1864
|
+
}
|
|
1865
|
+
|
|
1866
|
+
|
|
1129
1867
|
def _apply_query(index: PaperIndex, query: Query) -> set[int]:
|
|
1130
1868
|
all_ids = set(index.ordered_ids)
|
|
1131
1869
|
|
|
@@ -1182,7 +1920,30 @@ def _apply_query(index: PaperIndex, query: Query) -> set[int]:
|
|
|
1182
1920
|
return result
|
|
1183
1921
|
|
|
1184
1922
|
|
|
1185
|
-
def _page_shell(
|
|
1923
|
+
def _page_shell(
|
|
1924
|
+
title: str,
|
|
1925
|
+
body_html: str,
|
|
1926
|
+
extra_head: str = "",
|
|
1927
|
+
extra_scripts: str = "",
|
|
1928
|
+
header_title: str | None = None,
|
|
1929
|
+
) -> str:
|
|
1930
|
+
header_html = """
|
|
1931
|
+
<header>
|
|
1932
|
+
<a href="/">Papers</a>
|
|
1933
|
+
<a href="/stats">Stats</a>
|
|
1934
|
+
</header>
|
|
1935
|
+
"""
|
|
1936
|
+
if header_title:
|
|
1937
|
+
safe_title = html.escape(header_title)
|
|
1938
|
+
header_html = f"""
|
|
1939
|
+
<header class="detail-header">
|
|
1940
|
+
<div class="header-row">
|
|
1941
|
+
<a class="header-back" href="/">← Papers</a>
|
|
1942
|
+
<span class="header-title" title="{safe_title}">{safe_title}</span>
|
|
1943
|
+
<a class="header-link" href="/stats">Stats</a>
|
|
1944
|
+
</div>
|
|
1945
|
+
</header>
|
|
1946
|
+
"""
|
|
1186
1947
|
return f"""<!doctype html>
|
|
1187
1948
|
<html lang="en">
|
|
1188
1949
|
<head>
|
|
@@ -1193,28 +1954,83 @@ def _page_shell(title: str, body_html: str, extra_head: str = "", extra_scripts:
|
|
|
1193
1954
|
body {{ font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; margin: 0; }}
|
|
1194
1955
|
header {{ position: sticky; top: 0; background: #0b1220; color: #fff; padding: 12px 16px; z-index: 10; }}
|
|
1195
1956
|
header a {{ color: #cfe3ff; text-decoration: none; margin-right: 12px; }}
|
|
1957
|
+
.detail-header .header-row {{ display: grid; grid-template-columns: auto minmax(0, 1fr) auto; align-items: center; gap: 12px; }}
|
|
1958
|
+
.detail-header .header-title {{ text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
|
|
1959
|
+
.detail-header .header-back {{ margin-right: 0; }}
|
|
1960
|
+
.detail-header .header-link {{ margin-right: 0; }}
|
|
1196
1961
|
.container {{ max-width: 1100px; margin: 0 auto; padding: 16px; }}
|
|
1197
1962
|
.filters {{ display: grid; grid-template-columns: repeat(6, 1fr); gap: 8px; margin: 12px 0 16px; }}
|
|
1198
1963
|
.filters input {{ width: 100%; padding: 8px; border: 1px solid #d0d7de; border-radius: 6px; }}
|
|
1964
|
+
.filters select {{ width: 100%; border: 1px solid #d0d7de; border-radius: 6px; background: #fff; font-size: 13px; }}
|
|
1965
|
+
.filters select:not([multiple]) {{ padding: 6px 8px; }}
|
|
1966
|
+
.filters select[multiple] {{ padding: 2px; line-height: 1.25; min-height: 72px; font-size: 13px; }}
|
|
1967
|
+
.filters select[multiple] option {{ padding: 2px 6px; line-height: 1.25; }}
|
|
1968
|
+
.filters label {{ font-size: 12px; color: #57606a; }}
|
|
1969
|
+
.filter-group {{ display: flex; flex-direction: column; gap: 4px; }}
|
|
1199
1970
|
.card {{ border: 1px solid #d0d7de; border-radius: 10px; padding: 12px; margin: 10px 0; }}
|
|
1200
1971
|
.muted {{ color: #57606a; font-size: 13px; }}
|
|
1201
1972
|
.pill {{ display: inline-block; padding: 2px 8px; border-radius: 999px; border: 1px solid #d0d7de; margin-right: 6px; font-size: 12px; }}
|
|
1973
|
+
.pill.template {{ border-color: #8a92a5; color: #243b53; background: #f6f8fa; }}
|
|
1974
|
+
.pill.pdf-only {{ border-color: #c8a951; background: #fff8dc; color: #5b4a00; }}
|
|
1202
1975
|
.warning {{ background: #fff4ce; border: 1px solid #ffd089; padding: 10px; border-radius: 10px; margin: 12px 0; }}
|
|
1203
1976
|
.tabs {{ display: flex; gap: 8px; flex-wrap: wrap; }}
|
|
1204
1977
|
.tab {{ display: inline-block; padding: 6px 12px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; color: #0969da; text-decoration: none; font-size: 13px; }}
|
|
1205
1978
|
.tab:hover {{ background: #eef1f4; }}
|
|
1206
1979
|
.tab.active {{ background: #0969da; border-color: #0969da; color: #fff; }}
|
|
1980
|
+
.detail-shell {{ display: flex; flex-direction: column; gap: 12px; min-height: calc(100vh - 120px); }}
|
|
1981
|
+
.detail-toolbar {{ display: flex; flex-wrap: wrap; align-items: center; justify-content: flex-start; gap: 12px; padding: 6px 8px 10px; border-bottom: 1px solid #e5e7eb; box-sizing: border-box; }}
|
|
1982
|
+
.detail-toolbar .tabs {{ margin: 0; }}
|
|
1983
|
+
.toolbar-actions {{ display: flex; flex-wrap: wrap; align-items: center; gap: 10px; margin-left: auto; padding-right: 16px; }}
|
|
1984
|
+
.split-inline {{ display: flex; flex-wrap: wrap; align-items: center; gap: 6px; }}
|
|
1985
|
+
.split-inline select {{ padding: 6px 8px; border-radius: 8px; border: 1px solid #d0d7de; background: #fff; min-width: 140px; }}
|
|
1986
|
+
.split-actions {{ display: flex; align-items: center; justify-content: center; gap: 8px; }}
|
|
1987
|
+
.split-actions button {{ padding: 6px 10px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; min-width: 36px; }}
|
|
1988
|
+
.fullscreen-actions {{ display: flex; align-items: center; gap: 6px; }}
|
|
1989
|
+
.fullscreen-actions button {{ padding: 6px 10px; border-radius: 8px; border: 1px solid #d0d7de; background: #f6f8fa; cursor: pointer; }}
|
|
1990
|
+
.fullscreen-exit {{ display: none; }}
|
|
1991
|
+
body.detail-fullscreen {{ overflow: hidden; --outline-top: 16px; }}
|
|
1992
|
+
body.detail-fullscreen header {{ display: none; }}
|
|
1993
|
+
body.detail-fullscreen .container {{ max-width: 100%; padding: 0; }}
|
|
1994
|
+
body.detail-fullscreen .detail-shell {{
|
|
1995
|
+
position: fixed;
|
|
1996
|
+
inset: 0;
|
|
1997
|
+
padding: 12px 16px;
|
|
1998
|
+
background: #fff;
|
|
1999
|
+
z-index: 40;
|
|
2000
|
+
overflow: auto;
|
|
2001
|
+
}}
|
|
2002
|
+
body.detail-fullscreen .detail-toolbar {{ position: sticky; top: 0; background: #fff; z-index: 41; }}
|
|
2003
|
+
body.detail-fullscreen .fullscreen-enter {{ display: none; }}
|
|
2004
|
+
body.detail-fullscreen .fullscreen-exit {{ display: inline-flex; }}
|
|
2005
|
+
.detail-body {{ display: flex; flex-direction: column; gap: 8px; flex: 1; min-height: 0; }}
|
|
2006
|
+
.help-icon {{ display: inline-flex; align-items: center; justify-content: center; width: 18px; height: 18px; border-radius: 50%; border: 1px solid #d0d7de; color: #57606a; font-size: 12px; cursor: default; position: relative; }}
|
|
2007
|
+
.help-icon::after {{ content: attr(data-tip); display: none; position: absolute; top: 24px; right: 0; background: #0b1220; color: #e6edf3; padding: 8px 10px; border-radius: 8px; font-size: 12px; white-space: pre-line; width: 260px; z-index: 20; }}
|
|
2008
|
+
.help-icon:hover::after {{ display: block; }}
|
|
2009
|
+
.stats {{ margin: 12px 0 6px; }}
|
|
2010
|
+
.stats-row {{ display: flex; flex-wrap: wrap; gap: 6px; align-items: center; }}
|
|
2011
|
+
.stats-label {{ font-weight: 600; color: #0b1220; margin-right: 4px; }}
|
|
2012
|
+
.pill.stat {{ background: #f6f8fa; border-color: #c7d2e0; color: #1f2a37; }}
|
|
1207
2013
|
pre {{ overflow: auto; padding: 10px; background: #0b1220; color: #e6edf3; border-radius: 10px; }}
|
|
1208
2014
|
code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
|
|
1209
2015
|
a {{ color: #0969da; }}
|
|
2016
|
+
@media (max-width: 768px) {{
|
|
2017
|
+
.detail-toolbar {{
|
|
2018
|
+
flex-wrap: nowrap;
|
|
2019
|
+
overflow-x: auto;
|
|
2020
|
+
padding-bottom: 8px;
|
|
2021
|
+
}}
|
|
2022
|
+
.detail-toolbar::-webkit-scrollbar {{ height: 6px; }}
|
|
2023
|
+
.detail-toolbar::-webkit-scrollbar-thumb {{ background: #c7d2e0; border-radius: 999px; }}
|
|
2024
|
+
.detail-toolbar .tabs,
|
|
2025
|
+
.toolbar-actions {{
|
|
2026
|
+
flex: 0 0 auto;
|
|
2027
|
+
}}
|
|
2028
|
+
}}
|
|
1210
2029
|
</style>
|
|
1211
2030
|
{extra_head}
|
|
1212
2031
|
</head>
|
|
1213
2032
|
<body>
|
|
1214
|
-
|
|
1215
|
-
<a href="/">Papers</a>
|
|
1216
|
-
<a href="/stats">Stats</a>
|
|
1217
|
-
</header>
|
|
2033
|
+
{header_html}
|
|
1218
2034
|
<div class="container">
|
|
1219
2035
|
{body_html}
|
|
1220
2036
|
</div>
|
|
@@ -1253,106 +2069,362 @@ def _build_pdfjs_viewer_url(pdf_url: str) -> str:
|
|
|
1253
2069
|
return f"{_PDFJS_VIEWER_PATH}?file={encoded}"
|
|
1254
2070
|
|
|
1255
2071
|
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
<
|
|
1262
|
-
<div class="card">
|
|
1263
|
-
<div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
|
|
1264
|
-
<div style="display:flex; gap:8px; margin-top:8px;">
|
|
1265
|
-
<input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
|
|
1266
|
-
<select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
|
|
1267
|
-
<option value="summary" selected>Open: Summary</option>
|
|
1268
|
-
<option value="source">Open: Source</option>
|
|
1269
|
-
<option value="pdf">Open: PDF</option>
|
|
1270
|
-
<option value="pdfjs">Open: PDF Viewer</option>
|
|
1271
|
-
<option value="split">Open: Split</option>
|
|
1272
|
-
</select>
|
|
1273
|
-
</div>
|
|
1274
|
-
<details style="margin-top:10px;">
|
|
1275
|
-
<summary>Advanced search</summary>
|
|
1276
|
-
<div style="margin-top:10px;" class="muted">Build a query:</div>
|
|
1277
|
-
<div class="filters" style="grid-template-columns: repeat(3, 1fr);">
|
|
1278
|
-
<input id="advTitle" placeholder="title contains..." />
|
|
1279
|
-
<input id="advAuthor" placeholder="author contains..." />
|
|
1280
|
-
<input id="advTag" placeholder="tag (comma separated)" />
|
|
1281
|
-
<input id="advYear" placeholder="year (e.g. 2020..2024)" />
|
|
1282
|
-
<input id="advMonth" placeholder="month (01-12)" />
|
|
1283
|
-
<input id="advVenue" placeholder="venue contains..." />
|
|
1284
|
-
</div>
|
|
1285
|
-
<div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
|
|
1286
|
-
<button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
|
|
1287
|
-
<div class="muted">Generated: <code id="generated"></code></div>
|
|
1288
|
-
</div>
|
|
1289
|
-
</details>
|
|
2072
|
+
def _outline_assets(outline_top: str) -> tuple[str, str, str]:
|
|
2073
|
+
outline_html = """
|
|
2074
|
+
<button id="outlineToggle" class="outline-toggle" title="Toggle outline">☰</button>
|
|
2075
|
+
<div id="outlinePanel" class="outline-panel collapsed">
|
|
2076
|
+
<div class="outline-title">Outline</div>
|
|
2077
|
+
<div id="outlineList" class="outline-list"></div>
|
|
1290
2078
|
</div>
|
|
1291
|
-
<
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
2079
|
+
<button id="backToTop" class="back-to-top" title="Back to top">↑</button>
|
|
2080
|
+
"""
|
|
2081
|
+
outline_css = f"""
|
|
2082
|
+
<style>
|
|
2083
|
+
:root {{
|
|
2084
|
+
--outline-top: {outline_top};
|
|
2085
|
+
}}
|
|
2086
|
+
.outline-toggle {{
|
|
2087
|
+
position: fixed;
|
|
2088
|
+
top: var(--outline-top);
|
|
2089
|
+
left: 16px;
|
|
2090
|
+
z-index: 20;
|
|
2091
|
+
padding: 6px 10px;
|
|
2092
|
+
border-radius: 8px;
|
|
2093
|
+
border: 1px solid #d0d7de;
|
|
2094
|
+
background: #f6f8fa;
|
|
2095
|
+
cursor: pointer;
|
|
2096
|
+
}}
|
|
2097
|
+
.outline-panel {{
|
|
2098
|
+
position: fixed;
|
|
2099
|
+
top: calc(var(--outline-top) + 42px);
|
|
2100
|
+
left: 16px;
|
|
2101
|
+
width: 240px;
|
|
2102
|
+
max-height: 60vh;
|
|
2103
|
+
overflow: auto;
|
|
2104
|
+
border: 1px solid #d0d7de;
|
|
2105
|
+
border-radius: 10px;
|
|
2106
|
+
background: #ffffff;
|
|
2107
|
+
padding: 10px;
|
|
2108
|
+
z-index: 20;
|
|
2109
|
+
box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
|
|
2110
|
+
}}
|
|
2111
|
+
.outline-panel.collapsed {{
|
|
2112
|
+
display: none;
|
|
2113
|
+
}}
|
|
2114
|
+
.outline-title {{
|
|
2115
|
+
font-size: 12px;
|
|
2116
|
+
text-transform: uppercase;
|
|
2117
|
+
letter-spacing: 0.08em;
|
|
2118
|
+
color: #57606a;
|
|
2119
|
+
margin-bottom: 8px;
|
|
2120
|
+
}}
|
|
2121
|
+
.outline-list a {{
|
|
2122
|
+
display: block;
|
|
2123
|
+
color: #0969da;
|
|
2124
|
+
text-decoration: none;
|
|
2125
|
+
padding: 4px 0;
|
|
2126
|
+
}}
|
|
2127
|
+
.outline-list a:hover {{
|
|
2128
|
+
text-decoration: underline;
|
|
2129
|
+
}}
|
|
2130
|
+
.back-to-top {{
|
|
2131
|
+
position: fixed;
|
|
2132
|
+
left: 16px;
|
|
2133
|
+
bottom: 16px;
|
|
2134
|
+
padding: 6px 10px;
|
|
2135
|
+
border-radius: 999px;
|
|
2136
|
+
border: 1px solid #d0d7de;
|
|
2137
|
+
background: #ffffff;
|
|
2138
|
+
cursor: pointer;
|
|
2139
|
+
opacity: 0;
|
|
2140
|
+
pointer-events: none;
|
|
2141
|
+
transition: opacity 0.2s ease;
|
|
2142
|
+
z-index: 20;
|
|
2143
|
+
}}
|
|
2144
|
+
.back-to-top.visible {{
|
|
2145
|
+
opacity: 1;
|
|
2146
|
+
pointer-events: auto;
|
|
2147
|
+
}}
|
|
2148
|
+
@media (max-width: 900px) {{
|
|
2149
|
+
.outline-panel {{
|
|
2150
|
+
width: 200px;
|
|
2151
|
+
}}
|
|
2152
|
+
}}
|
|
2153
|
+
</style>
|
|
2154
|
+
"""
|
|
2155
|
+
outline_js = """
|
|
2156
|
+
const outlineToggle = document.getElementById('outlineToggle');
|
|
2157
|
+
const outlinePanel = document.getElementById('outlinePanel');
|
|
2158
|
+
const outlineList = document.getElementById('outlineList');
|
|
2159
|
+
const backToTop = document.getElementById('backToTop');
|
|
2160
|
+
|
|
2161
|
+
function slugify(text) {
|
|
2162
|
+
return text.toLowerCase().trim()
|
|
2163
|
+
.replace(/[^a-z0-9\\s-]/g, '')
|
|
2164
|
+
.replace(/\\s+/g, '-')
|
|
2165
|
+
.replace(/-+/g, '-');
|
|
2166
|
+
}
|
|
2167
|
+
|
|
2168
|
+
function buildOutline() {
|
|
2169
|
+
if (!outlineList) return;
|
|
2170
|
+
const content = document.getElementById('content');
|
|
2171
|
+
if (!content) return;
|
|
2172
|
+
const headings = content.querySelectorAll('h1, h2, h3, h4');
|
|
2173
|
+
if (!headings.length) {
|
|
2174
|
+
outlineList.innerHTML = '<div class="muted">No headings</div>';
|
|
2175
|
+
return;
|
|
2176
|
+
}
|
|
2177
|
+
const used = new Set();
|
|
2178
|
+
outlineList.innerHTML = '';
|
|
2179
|
+
headings.forEach((heading) => {
|
|
2180
|
+
let id = heading.id;
|
|
2181
|
+
if (!id) {
|
|
2182
|
+
const base = slugify(heading.textContent || 'section') || 'section';
|
|
2183
|
+
id = base;
|
|
2184
|
+
let i = 1;
|
|
2185
|
+
while (used.has(id) || document.getElementById(id)) {
|
|
2186
|
+
id = `${base}-${i++}`;
|
|
2187
|
+
}
|
|
2188
|
+
heading.id = id;
|
|
2189
|
+
}
|
|
2190
|
+
used.add(id);
|
|
2191
|
+
const level = parseInt(heading.tagName.slice(1), 10) || 1;
|
|
2192
|
+
const link = document.createElement('a');
|
|
2193
|
+
link.href = `#${id}`;
|
|
2194
|
+
link.textContent = heading.textContent || '';
|
|
2195
|
+
link.style.paddingLeft = `${(level - 1) * 12}px`;
|
|
2196
|
+
outlineList.appendChild(link);
|
|
2197
|
+
});
|
|
2198
|
+
}
|
|
2199
|
+
|
|
2200
|
+
function toggleBackToTop() {
|
|
2201
|
+
if (!backToTop) return;
|
|
2202
|
+
if (window.scrollY > 300) {
|
|
2203
|
+
backToTop.classList.add('visible');
|
|
2204
|
+
} else {
|
|
2205
|
+
backToTop.classList.remove('visible');
|
|
2206
|
+
}
|
|
2207
|
+
}
|
|
2208
|
+
|
|
2209
|
+
if (outlineToggle && outlinePanel) {
|
|
2210
|
+
outlineToggle.addEventListener('click', () => {
|
|
2211
|
+
outlinePanel.classList.toggle('collapsed');
|
|
2212
|
+
});
|
|
2213
|
+
}
|
|
2214
|
+
|
|
2215
|
+
if (backToTop) {
|
|
2216
|
+
backToTop.addEventListener('click', () => {
|
|
2217
|
+
window.scrollTo({ top: 0, behavior: 'smooth' });
|
|
2218
|
+
});
|
|
2219
|
+
}
|
|
2220
|
+
|
|
2221
|
+
buildOutline();
|
|
2222
|
+
window.addEventListener('scroll', toggleBackToTop);
|
|
2223
|
+
toggleBackToTop();
|
|
2224
|
+
"""
|
|
2225
|
+
return outline_html, outline_css, outline_js
|
|
2226
|
+
|
|
2227
|
+
|
|
2228
|
+
async def _index_page(request: Request) -> HTMLResponse:
|
|
2229
|
+
index: PaperIndex = request.app.state.index
|
|
2230
|
+
template_options = "".join(
|
|
2231
|
+
f'<option value="{html.escape(tag)}">{html.escape(tag)}</option>'
|
|
2232
|
+
for tag in index.template_tags
|
|
2233
|
+
)
|
|
2234
|
+
if not template_options:
|
|
2235
|
+
template_options = '<option value="" disabled>(no templates)</option>'
|
|
2236
|
+
filter_help = (
|
|
2237
|
+
"Filters syntax:\\n"
|
|
2238
|
+
"pdf:yes|no source:yes|no summary:yes|no\\n"
|
|
2239
|
+
"tmpl:<tag> or template:<tag>\\n"
|
|
2240
|
+
"has:pdf / no:source aliases\\n"
|
|
2241
|
+
"Content tags still use the search box (tag:fpga)."
|
|
2242
|
+
)
|
|
2243
|
+
filter_help_attr = html.escape(filter_help).replace("\n", " ")
|
|
2244
|
+
body_html = """
|
|
2245
|
+
<h2>Paper Database</h2>
|
|
2246
|
+
<div class="card">
|
|
2247
|
+
<div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
|
|
2248
|
+
<div style="display:flex; gap:8px; margin-top:8px;">
|
|
2249
|
+
<input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
|
|
2250
|
+
<select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
|
|
2251
|
+
<option value="summary" selected>Open: Summary</option>
|
|
2252
|
+
<option value="source">Open: Source</option>
|
|
2253
|
+
<option value="pdf">Open: PDF</option>
|
|
2254
|
+
<option value="pdfjs">Open: PDF Viewer</option>
|
|
2255
|
+
<option value="split">Open: Split</option>
|
|
2256
|
+
</select>
|
|
2257
|
+
</div>
|
|
2258
|
+
<div class="filters" style="grid-template-columns: repeat(4, 1fr); margin-top:10px;">
|
|
2259
|
+
<div class="filter-group">
|
|
2260
|
+
<label>PDF</label>
|
|
2261
|
+
<select id="filterPdf" multiple size="2">
|
|
2262
|
+
<option value="with">With</option>
|
|
2263
|
+
<option value="without">Without</option>
|
|
2264
|
+
</select>
|
|
2265
|
+
</div>
|
|
2266
|
+
<div class="filter-group">
|
|
2267
|
+
<label>Source</label>
|
|
2268
|
+
<select id="filterSource" multiple size="2">
|
|
2269
|
+
<option value="with">With</option>
|
|
2270
|
+
<option value="without">Without</option>
|
|
2271
|
+
</select>
|
|
2272
|
+
</div>
|
|
2273
|
+
<div class="filter-group">
|
|
2274
|
+
<label>Summary</label>
|
|
2275
|
+
<select id="filterSummary" multiple size="2">
|
|
2276
|
+
<option value="with">With</option>
|
|
2277
|
+
<option value="without">Without</option>
|
|
2278
|
+
</select>
|
|
2279
|
+
</div>
|
|
2280
|
+
<div class="filter-group">
|
|
2281
|
+
<label>Template</label>
|
|
2282
|
+
<select id="filterTemplate" multiple size="4">
|
|
2283
|
+
__TEMPLATE_OPTIONS__
|
|
2284
|
+
</select>
|
|
2285
|
+
</div>
|
|
2286
|
+
</div>
|
|
2287
|
+
<div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
|
|
2288
|
+
<input id="filterQuery" placeholder='Filters... e.g. pdf:yes tmpl:simple' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
|
|
2289
|
+
<span class="help-icon" data-tip="__FILTER_HELP__">?</span>
|
|
2290
|
+
</div>
|
|
2291
|
+
<details style="margin-top:10px;">
|
|
2292
|
+
<summary>Advanced search</summary>
|
|
2293
|
+
<div style="margin-top:10px;" class="muted">Build a query:</div>
|
|
2294
|
+
<div class="filters" style="grid-template-columns: repeat(3, 1fr);">
|
|
2295
|
+
<input id="advTitle" placeholder="title contains..." />
|
|
2296
|
+
<input id="advAuthor" placeholder="author contains..." />
|
|
2297
|
+
<input id="advTag" placeholder="tag (comma separated)" />
|
|
2298
|
+
<input id="advYear" placeholder="year (e.g. 2020..2024)" />
|
|
2299
|
+
<input id="advMonth" placeholder="month (01-12)" />
|
|
2300
|
+
<input id="advVenue" placeholder="venue contains..." />
|
|
2301
|
+
</div>
|
|
2302
|
+
<div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
|
|
2303
|
+
<button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
|
|
2304
|
+
<div class="muted">Generated: <code id="generated"></code></div>
|
|
2305
|
+
</div>
|
|
2306
|
+
</details>
|
|
2307
|
+
</div>
|
|
2308
|
+
<div id="stats" class="stats">
|
|
2309
|
+
<div id="statsTotal" class="stats-row"></div>
|
|
2310
|
+
<div id="statsFiltered" class="stats-row" style="margin-top:6px;"></div>
|
|
2311
|
+
</div>
|
|
2312
|
+
<div id="results"></div>
|
|
2313
|
+
<div id="loading" class="muted">Loading...</div>
|
|
2314
|
+
<script>
|
|
2315
|
+
let page = 1;
|
|
2316
|
+
let loading = false;
|
|
2317
|
+
let done = false;
|
|
2318
|
+
|
|
2319
|
+
function currentParams(nextPage) {
|
|
2320
|
+
const params = new URLSearchParams();
|
|
2321
|
+
params.set("page", String(nextPage));
|
|
2322
|
+
params.set("page_size", "30");
|
|
2323
|
+
const q = document.getElementById("query").value.trim();
|
|
2324
|
+
if (q) params.set("q", q);
|
|
2325
|
+
const fq = document.getElementById("filterQuery").value.trim();
|
|
2326
|
+
if (fq) params.set("fq", fq);
|
|
2327
|
+
function addMulti(id, key) {
|
|
2328
|
+
const el = document.getElementById(id);
|
|
2329
|
+
const values = Array.from(el.selectedOptions).map(opt => opt.value).filter(Boolean);
|
|
2330
|
+
for (const value of values) {
|
|
2331
|
+
params.append(key, value);
|
|
2332
|
+
}
|
|
2333
|
+
}
|
|
2334
|
+
addMulti("filterPdf", "pdf");
|
|
2335
|
+
addMulti("filterSource", "source");
|
|
2336
|
+
addMulti("filterSummary", "summary");
|
|
2337
|
+
addMulti("filterTemplate", "template");
|
|
2338
|
+
return params;
|
|
2339
|
+
}
|
|
2340
|
+
|
|
2341
|
+
function escapeHtml(text) {
|
|
2342
|
+
const div = document.createElement("div");
|
|
2343
|
+
div.textContent = text;
|
|
2344
|
+
return div.innerHTML;
|
|
2345
|
+
}
|
|
2346
|
+
|
|
2347
|
+
function viewSuffixForItem(item) {
|
|
2348
|
+
let view = document.getElementById("openView").value;
|
|
2349
|
+
const isPdfOnly = item.is_pdf_only;
|
|
2350
|
+
const pdfFallback = item.has_pdf ? "pdfjs" : "pdf";
|
|
2351
|
+
if (isPdfOnly && (view === "summary" || view === "source")) {
|
|
2352
|
+
view = pdfFallback;
|
|
2353
|
+
}
|
|
2354
|
+
if (!view || view === "summary") return "";
|
|
2355
|
+
const params = new URLSearchParams();
|
|
2356
|
+
params.set("view", view);
|
|
2357
|
+
if (view === "split") {
|
|
2358
|
+
if (isPdfOnly) {
|
|
2359
|
+
params.set("left", pdfFallback);
|
|
2360
|
+
params.set("right", pdfFallback);
|
|
2361
|
+
} else {
|
|
2362
|
+
params.set("left", "summary");
|
|
2363
|
+
if (item.has_pdf) {
|
|
2364
|
+
params.set("right", "pdfjs");
|
|
2365
|
+
} else if (item.has_source) {
|
|
2366
|
+
params.set("right", "source");
|
|
2367
|
+
} else {
|
|
2368
|
+
params.set("right", "summary");
|
|
2369
|
+
}
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
return `?${params.toString()}`;
|
|
2373
|
+
}
|
|
2374
|
+
|
|
1331
2375
|
function renderItem(item) {
|
|
1332
2376
|
const tags = (item.tags || []).map(t => `<span class="pill">${escapeHtml(t)}</span>`).join("");
|
|
2377
|
+
const templateTags = (item.template_tags || []).map(t => `<span class="pill template">tmpl:${escapeHtml(t)}</span>`).join("");
|
|
1333
2378
|
const authors = (item.authors || []).slice(0, 6).map(a => escapeHtml(a)).join(", ");
|
|
1334
2379
|
const meta = `${escapeHtml(item.year || "")}-${escapeHtml(item.month || "")} · ${escapeHtml(item.venue || "")}`;
|
|
1335
2380
|
const viewSuffix = viewSuffixForItem(item);
|
|
1336
2381
|
const badges = [
|
|
1337
2382
|
item.has_source ? `<span class="pill">source</span>` : "",
|
|
1338
2383
|
item.has_pdf ? `<span class="pill">pdf</span>` : "",
|
|
2384
|
+
item.is_pdf_only ? `<span class="pill pdf-only">pdf-only</span>` : "",
|
|
1339
2385
|
].join("");
|
|
1340
2386
|
return `
|
|
1341
2387
|
<div class="card">
|
|
1342
2388
|
<div><a href="/paper/${encodeURIComponent(item.source_hash)}${viewSuffix}">${escapeHtml(item.title || "")}</a></div>
|
|
1343
2389
|
<div class="muted">${authors}</div>
|
|
1344
2390
|
<div class="muted">${meta}</div>
|
|
1345
|
-
<div style="margin-top:6px">${badges} ${tags}</div>
|
|
2391
|
+
<div style="margin-top:6px">${badges} ${templateTags} ${tags}</div>
|
|
1346
2392
|
</div>
|
|
1347
2393
|
`;
|
|
1348
2394
|
}
|
|
1349
2395
|
|
|
2396
|
+
function renderStatsRow(targetId, label, counts) {
|
|
2397
|
+
const row = document.getElementById(targetId);
|
|
2398
|
+
if (!row || !counts) return;
|
|
2399
|
+
const pills = [];
|
|
2400
|
+
pills.push(`<span class="stats-label">${escapeHtml(label)}</span>`);
|
|
2401
|
+
pills.push(`<span class="pill stat">Count ${counts.total}</span>`);
|
|
2402
|
+
pills.push(`<span class="pill stat">PDF ${counts.pdf}</span>`);
|
|
2403
|
+
pills.push(`<span class="pill stat">Source ${counts.source}</span>`);
|
|
2404
|
+
pills.push(`<span class="pill stat">Summary ${counts.summary}</span>`);
|
|
2405
|
+
const order = counts.template_order || Object.keys(counts.templates || {});
|
|
2406
|
+
for (const tag of order) {
|
|
2407
|
+
const count = (counts.templates && counts.templates[tag]) || 0;
|
|
2408
|
+
pills.push(`<span class="pill stat">tmpl:${escapeHtml(tag)} ${count}</span>`);
|
|
2409
|
+
}
|
|
2410
|
+
row.innerHTML = pills.join("");
|
|
2411
|
+
}
|
|
2412
|
+
|
|
2413
|
+
function updateStats(stats) {
|
|
2414
|
+
if (!stats) return;
|
|
2415
|
+
renderStatsRow("statsTotal", "Total", stats.all);
|
|
2416
|
+
renderStatsRow("statsFiltered", "Filtered", stats.filtered);
|
|
2417
|
+
}
|
|
2418
|
+
|
|
1350
2419
|
async function loadMore() {
|
|
1351
2420
|
if (loading || done) return;
|
|
1352
2421
|
loading = true;
|
|
1353
2422
|
document.getElementById("loading").textContent = "Loading...";
|
|
1354
2423
|
const res = await fetch(`/api/papers?${currentParams(page).toString()}`);
|
|
1355
2424
|
const data = await res.json();
|
|
2425
|
+
if (data.stats) {
|
|
2426
|
+
updateStats(data.stats);
|
|
2427
|
+
}
|
|
1356
2428
|
const results = document.getElementById("results");
|
|
1357
2429
|
for (const item of data.items) {
|
|
1358
2430
|
results.insertAdjacentHTML("beforeend", renderItem(item));
|
|
@@ -1376,6 +2448,11 @@ function resetAndLoad() {
|
|
|
1376
2448
|
|
|
1377
2449
|
document.getElementById("query").addEventListener("change", resetAndLoad);
|
|
1378
2450
|
document.getElementById("openView").addEventListener("change", resetAndLoad);
|
|
2451
|
+
document.getElementById("filterQuery").addEventListener("change", resetAndLoad);
|
|
2452
|
+
document.getElementById("filterPdf").addEventListener("change", resetAndLoad);
|
|
2453
|
+
document.getElementById("filterSource").addEventListener("change", resetAndLoad);
|
|
2454
|
+
document.getElementById("filterSummary").addEventListener("change", resetAndLoad);
|
|
2455
|
+
document.getElementById("filterTemplate").addEventListener("change", resetAndLoad);
|
|
1379
2456
|
|
|
1380
2457
|
document.getElementById("buildQuery").addEventListener("click", () => {
|
|
1381
2458
|
function add(field, value) {
|
|
@@ -1416,9 +2493,10 @@ window.addEventListener("scroll", () => {
|
|
|
1416
2493
|
|
|
1417
2494
|
loadMore();
|
|
1418
2495
|
</script>
|
|
1419
|
-
"""
|
|
1420
|
-
|
|
1421
|
-
)
|
|
2496
|
+
"""
|
|
2497
|
+
body_html = body_html.replace("__TEMPLATE_OPTIONS__", template_options)
|
|
2498
|
+
body_html = body_html.replace("__FILTER_HELP__", filter_help_attr)
|
|
2499
|
+
return HTMLResponse(_page_shell("Paper DB", body_html))
|
|
1422
2500
|
|
|
1423
2501
|
|
|
1424
2502
|
def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
|
|
@@ -1429,11 +2507,21 @@ def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
|
|
|
1429
2507
|
page_size = min(max(1, page_size), 200)
|
|
1430
2508
|
|
|
1431
2509
|
q = qp.get("q", "").strip()
|
|
2510
|
+
filter_query = qp.get("fq", "").strip()
|
|
2511
|
+
pdf_filters = [item for item in qp.getlist("pdf") if item]
|
|
2512
|
+
source_filters = [item for item in qp.getlist("source") if item]
|
|
2513
|
+
summary_filters = [item for item in qp.getlist("summary") if item]
|
|
2514
|
+
template_filters = [item for item in qp.getlist("template") if item]
|
|
1432
2515
|
|
|
1433
2516
|
return {
|
|
1434
2517
|
"page": page,
|
|
1435
2518
|
"page_size": page_size,
|
|
1436
2519
|
"q": q,
|
|
2520
|
+
"filter_query": filter_query,
|
|
2521
|
+
"pdf": pdf_filters,
|
|
2522
|
+
"source": source_filters,
|
|
2523
|
+
"summary": summary_filters,
|
|
2524
|
+
"template": template_filters,
|
|
1437
2525
|
}
|
|
1438
2526
|
|
|
1439
2527
|
|
|
@@ -1443,13 +2531,55 @@ async def _api_papers(request: Request) -> JSONResponse:
|
|
|
1443
2531
|
page = int(filters["page"])
|
|
1444
2532
|
page_size = int(filters["page_size"])
|
|
1445
2533
|
q = str(filters["q"])
|
|
2534
|
+
filter_query = str(filters["filter_query"])
|
|
1446
2535
|
query = parse_query(q)
|
|
1447
2536
|
candidate = _apply_query(index, query)
|
|
2537
|
+
filter_terms = _parse_filter_query(filter_query)
|
|
2538
|
+
pdf_filter = _merge_filter_set(_presence_filter(filters["pdf"]), _presence_filter(list(filter_terms["pdf"])))
|
|
2539
|
+
source_filter = _merge_filter_set(
|
|
2540
|
+
_presence_filter(filters["source"]), _presence_filter(list(filter_terms["source"]))
|
|
2541
|
+
)
|
|
2542
|
+
summary_filter = _merge_filter_set(
|
|
2543
|
+
_presence_filter(filters["summary"]), _presence_filter(list(filter_terms["summary"]))
|
|
2544
|
+
)
|
|
2545
|
+
template_selected = {item.lower() for item in filters["template"] if item}
|
|
2546
|
+
template_filter = _merge_filter_set(
|
|
2547
|
+
template_selected or None,
|
|
2548
|
+
filter_terms["template"] or None,
|
|
2549
|
+
)
|
|
2550
|
+
|
|
2551
|
+
if candidate:
|
|
2552
|
+
filtered: set[int] = set()
|
|
2553
|
+
for idx in candidate:
|
|
2554
|
+
paper = index.papers[idx]
|
|
2555
|
+
source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
|
|
2556
|
+
has_source = source_hash in index.md_path_by_hash
|
|
2557
|
+
has_pdf = source_hash in index.pdf_path_by_hash
|
|
2558
|
+
has_summary = bool(paper.get("_has_summary"))
|
|
2559
|
+
if not _matches_presence(pdf_filter, has_pdf):
|
|
2560
|
+
continue
|
|
2561
|
+
if not _matches_presence(source_filter, has_source):
|
|
2562
|
+
continue
|
|
2563
|
+
if not _matches_presence(summary_filter, has_summary):
|
|
2564
|
+
continue
|
|
2565
|
+
if template_filter:
|
|
2566
|
+
tags = paper.get("_template_tags_lc") or []
|
|
2567
|
+
if not any(tag in template_filter for tag in tags):
|
|
2568
|
+
continue
|
|
2569
|
+
filtered.add(idx)
|
|
2570
|
+
candidate = filtered
|
|
1448
2571
|
ordered = [idx for idx in index.ordered_ids if idx in candidate]
|
|
1449
2572
|
total = len(ordered)
|
|
1450
2573
|
start = (page - 1) * page_size
|
|
1451
2574
|
end = min(start + page_size, total)
|
|
1452
2575
|
page_ids = ordered[start:end]
|
|
2576
|
+
stats_payload = None
|
|
2577
|
+
if page == 1:
|
|
2578
|
+
all_ids = set(index.ordered_ids)
|
|
2579
|
+
stats_payload = {
|
|
2580
|
+
"all": _compute_counts(index, all_ids),
|
|
2581
|
+
"filtered": _compute_counts(index, candidate),
|
|
2582
|
+
}
|
|
1453
2583
|
|
|
1454
2584
|
items: list[dict[str, Any]] = []
|
|
1455
2585
|
for idx in page_ids:
|
|
@@ -1464,8 +2594,11 @@ async def _api_papers(request: Request) -> JSONResponse:
|
|
|
1464
2594
|
"month": paper.get("_month") or "",
|
|
1465
2595
|
"venue": paper.get("_venue") or "",
|
|
1466
2596
|
"tags": paper.get("_tags") or [],
|
|
2597
|
+
"template_tags": paper.get("_template_tags") or [],
|
|
1467
2598
|
"has_source": source_hash in index.md_path_by_hash,
|
|
1468
2599
|
"has_pdf": source_hash in index.pdf_path_by_hash,
|
|
2600
|
+
"has_summary": bool(paper.get("_has_summary")),
|
|
2601
|
+
"is_pdf_only": bool(paper.get("_is_pdf_only")),
|
|
1469
2602
|
}
|
|
1470
2603
|
)
|
|
1471
2604
|
|
|
@@ -1476,6 +2609,7 @@ async def _api_papers(request: Request) -> JSONResponse:
|
|
|
1476
2609
|
"total": total,
|
|
1477
2610
|
"has_more": end < total,
|
|
1478
2611
|
"items": items,
|
|
2612
|
+
"stats": stats_payload,
|
|
1479
2613
|
}
|
|
1480
2614
|
)
|
|
1481
2615
|
|
|
@@ -1488,28 +2622,45 @@ async def _paper_detail(request: Request) -> HTMLResponse:
|
|
|
1488
2622
|
if idx is None:
|
|
1489
2623
|
return RedirectResponse("/")
|
|
1490
2624
|
paper = index.papers[idx]
|
|
1491
|
-
|
|
2625
|
+
is_pdf_only = bool(paper.get("_is_pdf_only"))
|
|
2626
|
+
page_title = str(paper.get("paper_title") or "Paper")
|
|
2627
|
+
view = request.query_params.get("view")
|
|
1492
2628
|
template_param = request.query_params.get("template")
|
|
1493
2629
|
embed = request.query_params.get("embed") == "1"
|
|
1494
|
-
if view == "split":
|
|
1495
|
-
embed = False
|
|
1496
2630
|
|
|
1497
2631
|
pdf_path = index.pdf_path_by_hash.get(source_hash)
|
|
1498
2632
|
pdf_url = f"/api/pdf/{source_hash}"
|
|
1499
|
-
shell = _embed_shell if embed else _page_shell
|
|
1500
2633
|
source_available = source_hash in index.md_path_by_hash
|
|
1501
|
-
allowed_views = {"summary", "source", "pdf", "pdfjs"}
|
|
2634
|
+
allowed_views = {"summary", "source", "pdf", "pdfjs", "split"}
|
|
2635
|
+
if is_pdf_only:
|
|
2636
|
+
allowed_views = {"pdf", "pdfjs", "split"}
|
|
1502
2637
|
|
|
1503
2638
|
def normalize_view(value: str | None, default: str) -> str:
|
|
1504
2639
|
if value in allowed_views:
|
|
1505
2640
|
return value
|
|
1506
2641
|
return default
|
|
1507
2642
|
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
2643
|
+
preferred_pdf_view = "pdfjs" if pdf_path else "pdf"
|
|
2644
|
+
default_view = preferred_pdf_view if is_pdf_only else "summary"
|
|
2645
|
+
view = normalize_view(view, default_view)
|
|
2646
|
+
if view == "split":
|
|
2647
|
+
embed = False
|
|
2648
|
+
if is_pdf_only:
|
|
2649
|
+
left_param = request.query_params.get("left")
|
|
2650
|
+
right_param = request.query_params.get("right")
|
|
2651
|
+
left = normalize_view(left_param, preferred_pdf_view) if left_param else preferred_pdf_view
|
|
2652
|
+
right = normalize_view(right_param, preferred_pdf_view) if right_param else preferred_pdf_view
|
|
2653
|
+
else:
|
|
2654
|
+
default_right = "pdfjs" if pdf_path else ("source" if source_available else "summary")
|
|
2655
|
+
left_param = request.query_params.get("left")
|
|
2656
|
+
right_param = request.query_params.get("right")
|
|
2657
|
+
left = normalize_view(left_param, "summary") if left_param else "summary"
|
|
2658
|
+
right = normalize_view(right_param, default_right) if right_param else default_right
|
|
2659
|
+
|
|
2660
|
+
def render_page(title: str, body: str, extra_head: str = "", extra_scripts: str = "") -> HTMLResponse:
|
|
2661
|
+
if embed:
|
|
2662
|
+
return HTMLResponse(_embed_shell(title, body, extra_head, extra_scripts))
|
|
2663
|
+
return HTMLResponse(_page_shell(title, body, extra_head, extra_scripts, header_title=page_title))
|
|
1513
2664
|
|
|
1514
2665
|
def nav_link(label: str, v: str) -> str:
|
|
1515
2666
|
active = " active" if view == v else ""
|
|
@@ -1522,16 +2673,83 @@ async def _paper_detail(request: Request) -> HTMLResponse:
|
|
|
1522
2673
|
href = f"/paper/{source_hash}?{urlencode(params)}"
|
|
1523
2674
|
return f'<a class="tab{active}" href="{html.escape(href)}">{html.escape(label)}</a>'
|
|
1524
2675
|
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
2676
|
+
tab_defs = [
|
|
2677
|
+
("Summary", "summary"),
|
|
2678
|
+
("Source", "source"),
|
|
2679
|
+
("PDF", "pdf"),
|
|
2680
|
+
("PDF Viewer", "pdfjs"),
|
|
2681
|
+
("Split", "split"),
|
|
2682
|
+
]
|
|
2683
|
+
if is_pdf_only:
|
|
2684
|
+
tab_defs = [
|
|
2685
|
+
("PDF", "pdf"),
|
|
2686
|
+
("PDF Viewer", "pdfjs"),
|
|
2687
|
+
("Split", "split"),
|
|
2688
|
+
]
|
|
2689
|
+
tabs_html = '<div class="tabs">' + "".join(nav_link(label, v) for label, v in tab_defs) + "</div>"
|
|
2690
|
+
fullscreen_controls = """
|
|
2691
|
+
<div class="fullscreen-actions">
|
|
2692
|
+
<button id="fullscreenEnter" class="fullscreen-enter" type="button" title="Enter fullscreen">Fullscreen</button>
|
|
2693
|
+
<button id="fullscreenExit" class="fullscreen-exit" type="button" title="Exit fullscreen">Exit Fullscreen</button>
|
|
2694
|
+
</div>
|
|
2695
|
+
"""
|
|
2696
|
+
|
|
2697
|
+
def detail_toolbar(extra_controls: str = "") -> str:
|
|
2698
|
+
if embed:
|
|
2699
|
+
return ""
|
|
2700
|
+
controls = extra_controls.strip()
|
|
2701
|
+
toolbar_controls = f"{controls}{fullscreen_controls}" if controls else fullscreen_controls
|
|
2702
|
+
return f"""
|
|
2703
|
+
<div class="detail-toolbar">
|
|
2704
|
+
{tabs_html}
|
|
2705
|
+
<div class="toolbar-actions">
|
|
2706
|
+
{toolbar_controls}
|
|
2707
|
+
</div>
|
|
2708
|
+
</div>
|
|
2709
|
+
"""
|
|
2710
|
+
|
|
2711
|
+
def wrap_detail(content: str, toolbar_html: str | None = None) -> str:
|
|
2712
|
+
if embed:
|
|
2713
|
+
return content
|
|
2714
|
+
toolbar = detail_toolbar() if toolbar_html is None else toolbar_html
|
|
2715
|
+
return f"""
|
|
2716
|
+
<div class="detail-shell">
|
|
2717
|
+
{toolbar}
|
|
2718
|
+
<div class="detail-body">
|
|
2719
|
+
{content}
|
|
2720
|
+
</div>
|
|
1532
2721
|
</div>
|
|
1533
2722
|
"""
|
|
1534
|
-
|
|
2723
|
+
|
|
2724
|
+
fullscreen_script = ""
|
|
2725
|
+
if not embed:
|
|
2726
|
+
fullscreen_script = """
|
|
2727
|
+
<script>
|
|
2728
|
+
const fullscreenEnter = document.getElementById('fullscreenEnter');
|
|
2729
|
+
const fullscreenExit = document.getElementById('fullscreenExit');
|
|
2730
|
+
function setFullscreen(enable) {
|
|
2731
|
+
document.body.classList.toggle('detail-fullscreen', enable);
|
|
2732
|
+
}
|
|
2733
|
+
if (fullscreenEnter) {
|
|
2734
|
+
fullscreenEnter.addEventListener('click', () => setFullscreen(true));
|
|
2735
|
+
}
|
|
2736
|
+
if (fullscreenExit) {
|
|
2737
|
+
fullscreenExit.addEventListener('click', () => setFullscreen(false));
|
|
2738
|
+
}
|
|
2739
|
+
document.addEventListener('keydown', (event) => {
|
|
2740
|
+
if (event.key === 'Escape' && document.body.classList.contains('detail-fullscreen')) {
|
|
2741
|
+
setFullscreen(false);
|
|
2742
|
+
}
|
|
2743
|
+
});
|
|
2744
|
+
</script>
|
|
2745
|
+
"""
|
|
2746
|
+
pdf_only_warning_html = ""
|
|
2747
|
+
if is_pdf_only:
|
|
2748
|
+
pdf_only_warning_html = (
|
|
2749
|
+
'<div class="warning">PDF-only entry: summary and source views are unavailable.</div>'
|
|
2750
|
+
)
|
|
2751
|
+
outline_top = "72px" if not embed else "16px"
|
|
2752
|
+
outline_html, outline_css, outline_js = _outline_assets(outline_top)
|
|
1535
2753
|
|
|
1536
2754
|
if view == "split":
|
|
1537
2755
|
def pane_src(pane_view: str) -> str:
|
|
@@ -1550,6 +2768,11 @@ async def _paper_detail(request: Request) -> HTMLResponse:
|
|
|
1550
2768
|
("pdf", "PDF"),
|
|
1551
2769
|
("pdfjs", "PDF Viewer"),
|
|
1552
2770
|
]
|
|
2771
|
+
if is_pdf_only:
|
|
2772
|
+
options = [
|
|
2773
|
+
("pdf", "PDF"),
|
|
2774
|
+
("pdfjs", "PDF Viewer"),
|
|
2775
|
+
]
|
|
1553
2776
|
left_options = "\n".join(
|
|
1554
2777
|
f'<option value="{value}"{" selected" if value == left else ""}>{label}</option>'
|
|
1555
2778
|
for value, label in options
|
|
@@ -1558,28 +2781,26 @@ async def _paper_detail(request: Request) -> HTMLResponse:
|
|
|
1558
2781
|
f'<option value="{value}"{" selected" if value == right else ""}>{label}</option>'
|
|
1559
2782
|
for value, label in options
|
|
1560
2783
|
)
|
|
1561
|
-
|
|
1562
|
-
<
|
|
1563
|
-
|
|
1564
|
-
<
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
<select id="splitLeft">
|
|
1568
|
-
{left_options}
|
|
1569
|
-
</select>
|
|
1570
|
-
</div>
|
|
2784
|
+
split_controls = f"""
|
|
2785
|
+
<div class="split-inline">
|
|
2786
|
+
<span class="muted">Left</span>
|
|
2787
|
+
<select id="splitLeft">
|
|
2788
|
+
{left_options}
|
|
2789
|
+
</select>
|
|
1571
2790
|
<div class="split-actions">
|
|
1572
2791
|
<button id="splitTighten" type="button" title="Tighten width">-</button>
|
|
1573
2792
|
<button id="splitSwap" type="button" title="Swap panes">⇄</button>
|
|
1574
2793
|
<button id="splitWiden" type="button" title="Widen width">+</button>
|
|
1575
2794
|
</div>
|
|
1576
|
-
<
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
</select>
|
|
1581
|
-
</div>
|
|
2795
|
+
<span class="muted">Right</span>
|
|
2796
|
+
<select id="splitRight">
|
|
2797
|
+
{right_options}
|
|
2798
|
+
</select>
|
|
1582
2799
|
</div>
|
|
2800
|
+
"""
|
|
2801
|
+
toolbar_html = detail_toolbar(split_controls)
|
|
2802
|
+
split_layout = f"""
|
|
2803
|
+
{pdf_only_warning_html}
|
|
1583
2804
|
<div class="split-layout">
|
|
1584
2805
|
<div class="split-pane">
|
|
1585
2806
|
<iframe id="leftPane" src="{html.escape(left_src)}" title="Left pane"></iframe>
|
|
@@ -1589,6 +2810,7 @@ async def _paper_detail(request: Request) -> HTMLResponse:
|
|
|
1589
2810
|
</div>
|
|
1590
2811
|
</div>
|
|
1591
2812
|
"""
|
|
2813
|
+
body = wrap_detail(split_layout, toolbar_html=toolbar_html)
|
|
1592
2814
|
extra_head = """
|
|
1593
2815
|
<style>
|
|
1594
2816
|
.container {
|
|
@@ -1596,43 +2818,14 @@ async def _paper_detail(request: Request) -> HTMLResponse:
|
|
|
1596
2818
|
width: 100%;
|
|
1597
2819
|
margin: 0 auto;
|
|
1598
2820
|
}
|
|
1599
|
-
.split-controls {
|
|
1600
|
-
display: grid;
|
|
1601
|
-
grid-template-columns: 1fr auto 1fr;
|
|
1602
|
-
gap: 12px;
|
|
1603
|
-
align-items: end;
|
|
1604
|
-
margin: 10px 0 14px;
|
|
1605
|
-
}
|
|
1606
|
-
.split-controls select {
|
|
1607
|
-
padding: 6px 8px;
|
|
1608
|
-
border-radius: 8px;
|
|
1609
|
-
border: 1px solid #d0d7de;
|
|
1610
|
-
background: #fff;
|
|
1611
|
-
min-width: 160px;
|
|
1612
|
-
}
|
|
1613
|
-
.split-actions {
|
|
1614
|
-
display: flex;
|
|
1615
|
-
align-items: center;
|
|
1616
|
-
justify-content: center;
|
|
1617
|
-
gap: 8px;
|
|
1618
|
-
height: 100%;
|
|
1619
|
-
}
|
|
1620
|
-
.split-actions button {
|
|
1621
|
-
padding: 6px 10px;
|
|
1622
|
-
border-radius: 999px;
|
|
1623
|
-
border: 1px solid #d0d7de;
|
|
1624
|
-
background: #f6f8fa;
|
|
1625
|
-
cursor: pointer;
|
|
1626
|
-
min-width: 36px;
|
|
1627
|
-
}
|
|
1628
2821
|
.split-layout {
|
|
1629
2822
|
display: flex;
|
|
1630
2823
|
gap: 12px;
|
|
1631
2824
|
width: 100%;
|
|
1632
|
-
max-width:
|
|
2825
|
+
max-width: var(--split-max-width, 100%);
|
|
1633
2826
|
margin: 0 auto;
|
|
1634
|
-
|
|
1635
|
-
min-height:
|
|
2827
|
+
flex: 1;
|
|
2828
|
+
min-height: 440px;
|
|
1636
2829
|
}
|
|
1637
2830
|
.split-pane {
|
|
1638
2831
|
flex: 1;
|
|
@@ -1649,14 +2842,11 @@ async def _paper_detail(request: Request) -> HTMLResponse:
|
|
|
1649
2842
|
@media (max-width: 900px) {
|
|
1650
2843
|
.split-layout {
|
|
1651
2844
|
flex-direction: column;
|
|
1652
|
-
height:
|
|
2845
|
+
min-height: 0;
|
|
1653
2846
|
}
|
|
1654
2847
|
.split-pane {
|
|
1655
2848
|
height: 70vh;
|
|
1656
2849
|
}
|
|
1657
|
-
.split-controls {
|
|
1658
|
-
grid-template-columns: 1fr;
|
|
1659
|
-
}
|
|
1660
2850
|
}
|
|
1661
2851
|
</style>
|
|
1662
2852
|
"""
|
|
@@ -1717,28 +2907,46 @@ widenButton.addEventListener('click', () => {
|
|
|
1717
2907
|
applySplitWidth();
|
|
1718
2908
|
</script>
|
|
1719
2909
|
"""
|
|
1720
|
-
return
|
|
2910
|
+
return render_page(
|
|
2911
|
+
"Split View",
|
|
2912
|
+
body,
|
|
2913
|
+
extra_head=extra_head,
|
|
2914
|
+
extra_scripts=extra_scripts + fullscreen_script,
|
|
2915
|
+
)
|
|
1721
2916
|
|
|
1722
2917
|
if view == "source":
|
|
1723
2918
|
source_path = index.md_path_by_hash.get(source_hash)
|
|
1724
2919
|
if not source_path:
|
|
1725
|
-
body =
|
|
1726
|
-
|
|
2920
|
+
body = wrap_detail(
|
|
2921
|
+
'<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
|
|
2922
|
+
)
|
|
2923
|
+
return render_page("Source", body, extra_scripts=fullscreen_script)
|
|
1727
2924
|
try:
|
|
1728
2925
|
raw = source_path.read_text(encoding="utf-8")
|
|
1729
2926
|
except UnicodeDecodeError:
|
|
1730
2927
|
raw = source_path.read_text(encoding="latin-1")
|
|
1731
2928
|
rendered = _render_markdown_with_math_placeholders(md, raw)
|
|
1732
|
-
body = (
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
2929
|
+
body = wrap_detail(
|
|
2930
|
+
f"""
|
|
2931
|
+
<div class="muted">{html.escape(str(source_path))}</div>
|
|
2932
|
+
<div class="muted" style="margin-top:10px;">Rendered from source markdown:</div>
|
|
2933
|
+
{outline_html}
|
|
2934
|
+
<div id="content">{rendered}</div>
|
|
2935
|
+
<details style="margin-top:12px;"><summary>Raw markdown</summary>
|
|
2936
|
+
<pre><code>{html.escape(raw)}</code></pre>
|
|
2937
|
+
</details>
|
|
2938
|
+
"""
|
|
1740
2939
|
)
|
|
1741
|
-
extra_head = f
|
|
2940
|
+
extra_head = f"""
|
|
2941
|
+
<link rel="stylesheet" href="{_CDN_KATEX}" />
|
|
2942
|
+
{outline_css}
|
|
2943
|
+
<style>
|
|
2944
|
+
#content img {{
|
|
2945
|
+
max-width: 100%;
|
|
2946
|
+
height: auto;
|
|
2947
|
+
}}
|
|
2948
|
+
</style>
|
|
2949
|
+
"""
|
|
1742
2950
|
extra_scripts = f"""
|
|
1743
2951
|
<script src="{_CDN_MERMAID}"></script>
|
|
1744
2952
|
<script src="{_CDN_KATEX_JS}"></script>
|
|
@@ -1766,16 +2974,18 @@ if (window.renderMathInElement) {{
|
|
|
1766
2974
|
throwOnError: false
|
|
1767
2975
|
}});
|
|
1768
2976
|
}}
|
|
2977
|
+
{outline_js}
|
|
1769
2978
|
</script>
|
|
1770
2979
|
"""
|
|
1771
|
-
return
|
|
2980
|
+
return render_page("Source", body, extra_head=extra_head, extra_scripts=extra_scripts + fullscreen_script)
|
|
1772
2981
|
|
|
1773
2982
|
if view == "pdf":
|
|
1774
2983
|
if not pdf_path:
|
|
1775
|
-
body =
|
|
1776
|
-
return
|
|
1777
|
-
body =
|
|
1778
|
-
|
|
2984
|
+
body = wrap_detail('<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>')
|
|
2985
|
+
return render_page("PDF", body, extra_scripts=fullscreen_script)
|
|
2986
|
+
body = wrap_detail(
|
|
2987
|
+
f"""
|
|
2988
|
+
{pdf_only_warning_html}
|
|
1779
2989
|
<div class="muted">{html.escape(str(pdf_path.name))}</div>
|
|
1780
2990
|
<div style="display:flex; gap:8px; align-items:center; margin: 10px 0;">
|
|
1781
2991
|
<button id="prev" style="padding:6px 10px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Prev</button>
|
|
@@ -1787,6 +2997,7 @@ if (window.renderMathInElement) {{
|
|
|
1787
2997
|
</div>
|
|
1788
2998
|
<canvas id="the-canvas" style="width: 100%; border: 1px solid #d0d7de; border-radius: 10px;"></canvas>
|
|
1789
2999
|
"""
|
|
3000
|
+
)
|
|
1790
3001
|
extra_scripts = f"""
|
|
1791
3002
|
<script src="{_CDN_PDFJS}"></script>
|
|
1792
3003
|
<script>
|
|
@@ -1875,25 +3086,21 @@ window.addEventListener('resize', () => {{
|
|
|
1875
3086
|
}});
|
|
1876
3087
|
</script>
|
|
1877
3088
|
"""
|
|
1878
|
-
return
|
|
3089
|
+
return render_page("PDF", body, extra_scripts=extra_scripts + fullscreen_script)
|
|
1879
3090
|
|
|
1880
3091
|
if view == "pdfjs":
|
|
1881
3092
|
if not pdf_path:
|
|
1882
|
-
body =
|
|
1883
|
-
return
|
|
3093
|
+
body = wrap_detail('<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>')
|
|
3094
|
+
return render_page("PDF Viewer", body, extra_scripts=fullscreen_script)
|
|
1884
3095
|
viewer_url = _build_pdfjs_viewer_url(pdf_url)
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
)
|
|
1891
|
-
frame_height = "calc(100vh - 220px)" if not embed else "calc(100vh - 32px)"
|
|
1892
|
-
body = f"""
|
|
1893
|
-
{nav_html}
|
|
1894
|
-
{header_html}
|
|
3096
|
+
frame_height = "calc(100vh - 32px)" if embed else "100%"
|
|
3097
|
+
body = wrap_detail(
|
|
3098
|
+
f"""
|
|
3099
|
+
{pdf_only_warning_html}
|
|
3100
|
+
<div class="muted">{html.escape(str(pdf_path.name))}</div>
|
|
1895
3101
|
<iframe class="pdfjs-frame" src="{html.escape(viewer_url)}" title="PDF.js Viewer"></iframe>
|
|
1896
3102
|
"""
|
|
3103
|
+
)
|
|
1897
3104
|
extra_head = f"""
|
|
1898
3105
|
<style>
|
|
1899
3106
|
.pdfjs-frame {{
|
|
@@ -1901,10 +3108,11 @@ window.addEventListener('resize', () => {{
|
|
|
1901
3108
|
height: {frame_height};
|
|
1902
3109
|
border: 1px solid #d0d7de;
|
|
1903
3110
|
border-radius: 10px;
|
|
3111
|
+
flex: 1;
|
|
1904
3112
|
}}
|
|
1905
3113
|
</style>
|
|
1906
3114
|
"""
|
|
1907
|
-
return
|
|
3115
|
+
return render_page("PDF Viewer", body, extra_head=extra_head, extra_scripts=fullscreen_script)
|
|
1908
3116
|
|
|
1909
3117
|
selected_tag, available_templates = _select_template_tag(paper, template_param)
|
|
1910
3118
|
markdown, template_name, warning = _render_paper_markdown(
|
|
@@ -1915,8 +3123,6 @@ window.addEventListener('resize', () => {{
|
|
|
1915
3123
|
rendered_html = _render_markdown_with_math_placeholders(md, markdown)
|
|
1916
3124
|
|
|
1917
3125
|
warning_html = f'<div class="warning">{html.escape(warning)}</div>' if warning else ""
|
|
1918
|
-
title = str(paper.get("paper_title") or "Paper")
|
|
1919
|
-
outline_top = "72px" if not embed else "16px"
|
|
1920
3126
|
template_controls = f'<div class="muted">Template: {html.escape(template_name)}</div>'
|
|
1921
3127
|
if available_templates:
|
|
1922
3128
|
options = "\n".join(
|
|
@@ -1942,97 +3148,17 @@ if (templateSelect) {{
|
|
|
1942
3148
|
}}
|
|
1943
3149
|
</script>
|
|
1944
3150
|
"""
|
|
1945
|
-
|
|
1946
|
-
<button id="outlineToggle" class="outline-toggle" title="Toggle outline">☰</button>
|
|
1947
|
-
<div id="outlinePanel" class="outline-panel collapsed">
|
|
1948
|
-
<div class="outline-title">Outline</div>
|
|
1949
|
-
<div id="outlineList" class="outline-list"></div>
|
|
1950
|
-
</div>
|
|
1951
|
-
<button id="backToTop" class="back-to-top" title="Back to top">↑</button>
|
|
1952
|
-
"""
|
|
1953
|
-
body = f"""
|
|
1954
|
-
<h2>{html.escape(title)}</h2>
|
|
3151
|
+
content_html = f"""
|
|
1955
3152
|
{template_controls}
|
|
1956
3153
|
{warning_html}
|
|
1957
|
-
{nav_html}
|
|
1958
3154
|
{outline_html}
|
|
1959
3155
|
<div id="content">{rendered_html}</div>
|
|
1960
3156
|
"""
|
|
3157
|
+
body = wrap_detail(content_html)
|
|
1961
3158
|
|
|
1962
3159
|
extra_head = f"""
|
|
1963
3160
|
<link rel="stylesheet" href="{_CDN_KATEX}" />
|
|
1964
|
-
|
|
1965
|
-
:root {{
|
|
1966
|
-
--outline-top: {outline_top};
|
|
1967
|
-
}}
|
|
1968
|
-
.outline-toggle {{
|
|
1969
|
-
position: fixed;
|
|
1970
|
-
top: var(--outline-top);
|
|
1971
|
-
left: 16px;
|
|
1972
|
-
z-index: 20;
|
|
1973
|
-
padding: 6px 10px;
|
|
1974
|
-
border-radius: 8px;
|
|
1975
|
-
border: 1px solid #d0d7de;
|
|
1976
|
-
background: #f6f8fa;
|
|
1977
|
-
cursor: pointer;
|
|
1978
|
-
}}
|
|
1979
|
-
.outline-panel {{
|
|
1980
|
-
position: fixed;
|
|
1981
|
-
top: calc(var(--outline-top) + 42px);
|
|
1982
|
-
left: 16px;
|
|
1983
|
-
width: 240px;
|
|
1984
|
-
max-height: 60vh;
|
|
1985
|
-
overflow: auto;
|
|
1986
|
-
border: 1px solid #d0d7de;
|
|
1987
|
-
border-radius: 10px;
|
|
1988
|
-
background: #ffffff;
|
|
1989
|
-
padding: 10px;
|
|
1990
|
-
z-index: 20;
|
|
1991
|
-
box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
|
|
1992
|
-
}}
|
|
1993
|
-
.outline-panel.collapsed {{
|
|
1994
|
-
display: none;
|
|
1995
|
-
}}
|
|
1996
|
-
.outline-title {{
|
|
1997
|
-
font-size: 12px;
|
|
1998
|
-
text-transform: uppercase;
|
|
1999
|
-
letter-spacing: 0.08em;
|
|
2000
|
-
color: #57606a;
|
|
2001
|
-
margin-bottom: 8px;
|
|
2002
|
-
}}
|
|
2003
|
-
.outline-list a {{
|
|
2004
|
-
display: block;
|
|
2005
|
-
color: #0969da;
|
|
2006
|
-
text-decoration: none;
|
|
2007
|
-
padding: 4px 0;
|
|
2008
|
-
}}
|
|
2009
|
-
.outline-list a:hover {{
|
|
2010
|
-
text-decoration: underline;
|
|
2011
|
-
}}
|
|
2012
|
-
.back-to-top {{
|
|
2013
|
-
position: fixed;
|
|
2014
|
-
left: 16px;
|
|
2015
|
-
bottom: 16px;
|
|
2016
|
-
padding: 6px 10px;
|
|
2017
|
-
border-radius: 999px;
|
|
2018
|
-
border: 1px solid #d0d7de;
|
|
2019
|
-
background: #ffffff;
|
|
2020
|
-
cursor: pointer;
|
|
2021
|
-
opacity: 0;
|
|
2022
|
-
pointer-events: none;
|
|
2023
|
-
transition: opacity 0.2s ease;
|
|
2024
|
-
z-index: 20;
|
|
2025
|
-
}}
|
|
2026
|
-
.back-to-top.visible {{
|
|
2027
|
-
opacity: 1;
|
|
2028
|
-
pointer-events: auto;
|
|
2029
|
-
}}
|
|
2030
|
-
@media (max-width: 900px) {{
|
|
2031
|
-
.outline-panel {{
|
|
2032
|
-
width: 200px;
|
|
2033
|
-
}}
|
|
2034
|
-
}}
|
|
2035
|
-
</style>
|
|
3161
|
+
{outline_css}
|
|
2036
3162
|
"""
|
|
2037
3163
|
extra_scripts = f"""
|
|
2038
3164
|
<script src="{_CDN_MERMAID}"></script>
|
|
@@ -2062,77 +3188,10 @@ if (window.renderMathInElement) {{
|
|
|
2062
3188
|
throwOnError: false
|
|
2063
3189
|
}});
|
|
2064
3190
|
}}
|
|
2065
|
-
|
|
2066
|
-
const outlinePanel = document.getElementById('outlinePanel');
|
|
2067
|
-
const outlineList = document.getElementById('outlineList');
|
|
2068
|
-
const backToTop = document.getElementById('backToTop');
|
|
2069
|
-
|
|
2070
|
-
function slugify(text) {{
|
|
2071
|
-
return text.toLowerCase().trim()
|
|
2072
|
-
.replace(/[^a-z0-9\\s-]/g, '')
|
|
2073
|
-
.replace(/\\s+/g, '-')
|
|
2074
|
-
.replace(/-+/g, '-');
|
|
2075
|
-
}}
|
|
2076
|
-
|
|
2077
|
-
function buildOutline() {{
|
|
2078
|
-
if (!outlineList) return;
|
|
2079
|
-
const content = document.getElementById('content');
|
|
2080
|
-
if (!content) return;
|
|
2081
|
-
const headings = content.querySelectorAll('h1, h2, h3, h4');
|
|
2082
|
-
if (!headings.length) {{
|
|
2083
|
-
outlineList.innerHTML = '<div class="muted">No headings</div>';
|
|
2084
|
-
return;
|
|
2085
|
-
}}
|
|
2086
|
-
const used = new Set();
|
|
2087
|
-
outlineList.innerHTML = '';
|
|
2088
|
-
headings.forEach((heading) => {{
|
|
2089
|
-
let id = heading.id;
|
|
2090
|
-
if (!id) {{
|
|
2091
|
-
const base = slugify(heading.textContent || 'section') || 'section';
|
|
2092
|
-
id = base;
|
|
2093
|
-
let i = 1;
|
|
2094
|
-
while (used.has(id) || document.getElementById(id)) {{
|
|
2095
|
-
id = `${{base}}-${{i++}}`;
|
|
2096
|
-
}}
|
|
2097
|
-
heading.id = id;
|
|
2098
|
-
}}
|
|
2099
|
-
used.add(id);
|
|
2100
|
-
const level = parseInt(heading.tagName.slice(1), 10) || 1;
|
|
2101
|
-
const link = document.createElement('a');
|
|
2102
|
-
link.href = `#${{id}}`;
|
|
2103
|
-
link.textContent = heading.textContent || '';
|
|
2104
|
-
link.style.paddingLeft = `${{(level - 1) * 12}}px`;
|
|
2105
|
-
outlineList.appendChild(link);
|
|
2106
|
-
}});
|
|
2107
|
-
}}
|
|
2108
|
-
|
|
2109
|
-
function toggleBackToTop() {{
|
|
2110
|
-
if (!backToTop) return;
|
|
2111
|
-
if (window.scrollY > 300) {{
|
|
2112
|
-
backToTop.classList.add('visible');
|
|
2113
|
-
}} else {{
|
|
2114
|
-
backToTop.classList.remove('visible');
|
|
2115
|
-
}}
|
|
2116
|
-
}}
|
|
2117
|
-
|
|
2118
|
-
if (outlineToggle && outlinePanel) {{
|
|
2119
|
-
outlineToggle.addEventListener('click', () => {{
|
|
2120
|
-
outlinePanel.classList.toggle('collapsed');
|
|
2121
|
-
}});
|
|
2122
|
-
}}
|
|
2123
|
-
|
|
2124
|
-
if (backToTop) {{
|
|
2125
|
-
backToTop.addEventListener('click', () => {{
|
|
2126
|
-
window.scrollTo({{ top: 0, behavior: 'smooth' }});
|
|
2127
|
-
}});
|
|
2128
|
-
}}
|
|
2129
|
-
|
|
2130
|
-
buildOutline();
|
|
2131
|
-
window.addEventListener('scroll', toggleBackToTop);
|
|
2132
|
-
toggleBackToTop();
|
|
3191
|
+
{outline_js}
|
|
2133
3192
|
</script>
|
|
2134
3193
|
"""
|
|
2135
|
-
return
|
|
3194
|
+
return render_page(page_title, body, extra_head=extra_head, extra_scripts=extra_scripts + fullscreen_script)
|
|
2136
3195
|
|
|
2137
3196
|
|
|
2138
3197
|
async def _api_stats(request: Request) -> JSONResponse:
|
|
@@ -2159,6 +3218,7 @@ async def _stats_page(request: Request) -> HTMLResponse:
|
|
|
2159
3218
|
<div id="year" style="width:100%;height:360px"></div>
|
|
2160
3219
|
<div id="month" style="width:100%;height:360px"></div>
|
|
2161
3220
|
<div id="tags" style="width:100%;height:420px"></div>
|
|
3221
|
+
<div id="keywords" style="width:100%;height:420px"></div>
|
|
2162
3222
|
<div id="authors" style="width:100%;height:420px"></div>
|
|
2163
3223
|
<div id="venues" style="width:100%;height:420px"></div>
|
|
2164
3224
|
"""
|
|
@@ -2185,6 +3245,7 @@ async function main() {{
|
|
|
2185
3245
|
bar('year', 'Publication Year', data.years || []);
|
|
2186
3246
|
bar('month', 'Publication Month', data.months || []);
|
|
2187
3247
|
bar('tags', 'Top Tags', (data.tags || []).slice(0, 20));
|
|
3248
|
+
bar('keywords', 'Top Keywords', (data.keywords || []).slice(0, 20));
|
|
2188
3249
|
bar('authors', 'Top Authors', (data.authors || []).slice(0, 20));
|
|
2189
3250
|
bar('venues', 'Top Venues', (data.venues || []).slice(0, 20));
|
|
2190
3251
|
}}
|
|
@@ -2272,7 +3333,7 @@ def create_app(
|
|
|
2272
3333
|
cache_dir: Path | None = None,
|
|
2273
3334
|
use_cache: bool = True,
|
|
2274
3335
|
) -> Starlette:
|
|
2275
|
-
papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache)
|
|
3336
|
+
papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache, pdf_roots=pdf_roots)
|
|
2276
3337
|
|
|
2277
3338
|
md_roots = md_roots or []
|
|
2278
3339
|
pdf_roots = pdf_roots or []
|