remarkablesync 2.0.0__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {remarkablesync-2.0.0/remarkablesync.egg-info → remarkablesync-2.0.1}/PKG-INFO +2 -2
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/README.md +1 -1
- {remarkablesync-2.0.0 → remarkablesync-2.0.1/remarkablesync.egg-info}/PKG-INFO +2 -2
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/__version__.py +1 -1
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/pipeline.py +1 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/watch_command.py +1 -1
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/hybrid_converter.py +70 -40
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/pdf_md_converter.py +100 -16
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/rm_pdf_converter.py +13 -12
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/LICENSE +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/MANIFEST.in +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/RemarkableSync.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/pyproject.toml +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/SOURCES.txt +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/dependency_links.txt +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/entry_points.txt +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/requires.txt +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/top_level.txt +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/requirements-dev.txt +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/requirements.txt +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/setup.cfg +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/setup.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/base_provider.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/claude_provider.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/github_models_provider.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/auth/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/auth/github_device_flow.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/backup_manager.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/connection.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/metadata.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/backup_command.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/config_command.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/convert_command.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/sync_command.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/config.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/base_converter.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/v4_converter.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/v5_converter.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/v6_converter.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/keyring_store.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/md_export/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ocr/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ocr/ocr_engine.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/template_renderer.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/utils/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/utils/console.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/utils/logging.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/__init__.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/conftest.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/generate_test_assets.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/mock_ai_provider.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/mock_connection.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_ai_providers.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_auth_device_flow.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_basic.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_config.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_console.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_hybrid_converter.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_keyring_store.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_logging.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_md_exporter.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_metadata.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_ocr_pipeline.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_pipeline.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_template_renderer.py +0 -0
- {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_wifi_connection.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: remarkablesync
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Backup and convert reMarkable tablet notebooks to PDF
|
|
5
5
|
Home-page: https://github.com/JeffSteinbok/RemarkableSync
|
|
6
6
|
Author: Jeff Steinbok
|
|
@@ -54,7 +54,7 @@ Dynamic: summary
|
|
|
54
54
|
|
|
55
55
|
# RemarkableSync
|
|
56
56
|
|
|
57
|
-

|
|
57
|
+

|
|
58
58
|
|
|
59
59
|
[](https://github.com/JeffSteinbok/RemarkableSync)
|
|
60
60
|
[](https://github.com/JeffSteinbok/RemarkableSync/releases)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# RemarkableSync
|
|
2
2
|
|
|
3
|
-

|
|
3
|
+

|
|
4
4
|
|
|
5
5
|
[](https://github.com/JeffSteinbok/RemarkableSync)
|
|
6
6
|
[](https://github.com/JeffSteinbok/RemarkableSync/releases)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: remarkablesync
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Backup and convert reMarkable tablet notebooks to PDF
|
|
5
5
|
Home-page: https://github.com/JeffSteinbok/RemarkableSync
|
|
6
6
|
Author: Jeff Steinbok
|
|
@@ -54,7 +54,7 @@ Dynamic: summary
|
|
|
54
54
|
|
|
55
55
|
# RemarkableSync
|
|
56
56
|
|
|
57
|
-

|
|
57
|
+

|
|
58
58
|
|
|
59
59
|
[](https://github.com/JeffSteinbok/RemarkableSync)
|
|
60
60
|
[](https://github.com/JeffSteinbok/RemarkableSync/releases)
|
|
@@ -738,7 +738,7 @@ class _TrayLogHandler(logging.Handler):
|
|
|
738
738
|
return
|
|
739
739
|
self._tray.set_detail(msg)
|
|
740
740
|
|
|
741
|
-
# Parse progress from page callbacks: "PDF: Work (page 3/21)"
|
|
741
|
+
# Parse progress from page callbacks: "PDF: Work (page 3/21)" or "PDF: Work (page 3/21) [cached]"
|
|
742
742
|
import re
|
|
743
743
|
|
|
744
744
|
m = re.search(r"(PDF|MD): (.+?) \(page (\d+)/(\d+)\)", msg)
|
|
@@ -446,6 +446,7 @@ def convert_notebook(
|
|
|
446
446
|
template_renderer: Optional[TemplateRenderer] = None,
|
|
447
447
|
changed_page_ids: Optional[set] = None,
|
|
448
448
|
on_page_done: Optional[callable] = None,
|
|
449
|
+
on_page_start: Optional[callable] = None,
|
|
449
450
|
) -> Dict:
|
|
450
451
|
"""Convert a notebook using appropriate tools for each file type.
|
|
451
452
|
|
|
@@ -462,6 +463,8 @@ def convert_notebook(
|
|
|
462
463
|
template_renderer: Optional template renderer for backgrounds.
|
|
463
464
|
changed_page_ids: Set of page IDs whose ``.rm`` files changed.
|
|
464
465
|
When *None* all pages are (re-)converted.
|
|
466
|
+
on_page_done: Callback ``(cached: bool)`` called after each page.
|
|
467
|
+
*cached* is True when the page was served from cache.
|
|
465
468
|
"""
|
|
466
469
|
# Create safe filename
|
|
467
470
|
safe_name = "".join(c for c in notebook["name"] if c.isalnum() or c in (" ", "-", "_")).rstrip()
|
|
@@ -505,7 +508,7 @@ def convert_notebook(
|
|
|
505
508
|
template_temp_dir = Path(tempfile.mkdtemp(prefix="remarkable_templates_"))
|
|
506
509
|
|
|
507
510
|
try:
|
|
508
|
-
# Resolve ordered pages using .content file if present
|
|
511
|
+
# Resolve ordered pages using .content file if present
|
|
509
512
|
metadata_file = notebook.get("metadata_file")
|
|
510
513
|
content_path = metadata_file.with_suffix(".content") if metadata_file else None
|
|
511
514
|
|
|
@@ -514,28 +517,52 @@ def convert_notebook(
|
|
|
514
517
|
if template_renderer and content_path:
|
|
515
518
|
page_templates = get_page_templates(content_path)
|
|
516
519
|
|
|
517
|
-
|
|
520
|
+
# Build a set of all known .rm files by page ID for fast lookup
|
|
521
|
+
all_rm_by_id: Dict[str, Path] = {}
|
|
522
|
+
for rm_file in (
|
|
523
|
+
notebook.get("v5_files", [])
|
|
524
|
+
+ notebook.get("v6_files", [])
|
|
525
|
+
+ notebook.get("v4_files", [])
|
|
526
|
+
):
|
|
527
|
+
all_rm_by_id[rm_file.stem] = rm_file
|
|
528
|
+
|
|
529
|
+
# Determine version for each page
|
|
530
|
+
v6_ids = {f.stem for f in notebook.get("v6_files", [])}
|
|
531
|
+
v4_ids = {f.stem for f in notebook.get("v4_files", [])}
|
|
532
|
+
|
|
533
|
+
# Order pages using .content file (applies to all versions)
|
|
534
|
+
ordered_pages: List[Path] = []
|
|
518
535
|
if content_path and content_path.exists():
|
|
519
536
|
try:
|
|
520
537
|
with open(content_path, "r", encoding="utf-8") as cf:
|
|
521
538
|
content_json = json.load(cf)
|
|
522
539
|
page_ids = content_json.get("pages", [])
|
|
540
|
+
# v6 notebooks use cPages.pages with {id, idx, ...} dicts
|
|
541
|
+
if not page_ids:
|
|
542
|
+
cpages = content_json.get("cPages", {}).get("pages", [])
|
|
543
|
+
page_ids = [p["id"] for p in cpages if "id" in p]
|
|
523
544
|
base_dir = content_path.parent / content_path.stem
|
|
524
545
|
for pid in page_ids:
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
ordered_v5_pages.append(candidate)
|
|
546
|
+
if pid in all_rm_by_id:
|
|
547
|
+
ordered_pages.append(all_rm_by_id[pid])
|
|
528
548
|
else:
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
549
|
+
candidate = base_dir / f"{pid}.rm"
|
|
550
|
+
if candidate.exists():
|
|
551
|
+
ordered_pages.append(candidate)
|
|
552
|
+
else:
|
|
553
|
+
alt = list((content_path.parent).glob(f"{pid}.rm"))
|
|
554
|
+
if alt:
|
|
555
|
+
ordered_pages.append(alt[0])
|
|
533
556
|
except Exception as e:
|
|
534
557
|
logging.debug("Failed reading content ordering for %s: %s", notebook["name"], e)
|
|
535
558
|
|
|
536
559
|
# Fallback to unsorted list if ordering extraction failed
|
|
537
|
-
if not
|
|
538
|
-
|
|
560
|
+
if not ordered_pages:
|
|
561
|
+
ordered_pages = (
|
|
562
|
+
notebook.get("v5_files", [])
|
|
563
|
+
+ notebook.get("v6_files", [])
|
|
564
|
+
+ notebook.get("v4_files", [])
|
|
565
|
+
)
|
|
539
566
|
|
|
540
567
|
def _needs_conversion(page_id: str) -> bool:
|
|
541
568
|
"""Check if a page needs (re-)conversion."""
|
|
@@ -543,24 +570,24 @@ def convert_notebook(
|
|
|
543
570
|
return True # No change info → convert all
|
|
544
571
|
return page_id in changed_page_ids
|
|
545
572
|
|
|
546
|
-
def _convert_page(
|
|
547
|
-
rm_file: Path, version_tag: str, convert_fn, result_key: str
|
|
548
|
-
) -> Optional[Path]:
|
|
573
|
+
def _convert_page(rm_file: Path, version_tag: str, convert_fn, result_key: str) -> tuple:
|
|
549
574
|
"""Convert a single page, using cache when possible.
|
|
550
575
|
|
|
551
|
-
Returns
|
|
576
|
+
Returns ``(path, cached)`` where *cached* is True when the
|
|
577
|
+
page was served from the persistent cache without conversion.
|
|
578
|
+
Returns ``(None, False)`` on failure.
|
|
552
579
|
"""
|
|
553
580
|
page_id = rm_file.stem
|
|
554
581
|
cached_pdf = page_cache_dir / f"{page_id}.pdf"
|
|
555
582
|
|
|
556
583
|
# Use cached PDF if page hasn't changed
|
|
557
584
|
if not _needs_conversion(page_id) and cached_pdf.exists():
|
|
558
|
-
return cached_pdf
|
|
585
|
+
return cached_pdf, True
|
|
559
586
|
|
|
560
587
|
# Convert the .rm file to a content PDF
|
|
561
588
|
content_pdf = page_cache_dir / f"{page_id}_content.pdf"
|
|
562
589
|
if not convert_fn(rm_file, content_pdf):
|
|
563
|
-
return None
|
|
590
|
+
return None, False
|
|
564
591
|
|
|
565
592
|
# Apply template if available
|
|
566
593
|
if template_renderer and template_temp_dir:
|
|
@@ -575,7 +602,7 @@ def convert_notebook(
|
|
|
575
602
|
except OSError:
|
|
576
603
|
pass
|
|
577
604
|
results[result_key] += 1
|
|
578
|
-
return cached_pdf
|
|
605
|
+
return cached_pdf, False
|
|
579
606
|
|
|
580
607
|
# No template or template merge failed — content PDF is the final
|
|
581
608
|
if content_pdf != cached_pdf:
|
|
@@ -585,35 +612,34 @@ def convert_notebook(
|
|
|
585
612
|
except OSError:
|
|
586
613
|
cached_pdf = content_pdf
|
|
587
614
|
results[result_key] += 1
|
|
588
|
-
return cached_pdf
|
|
615
|
+
return cached_pdf, False
|
|
589
616
|
|
|
590
|
-
# Convert
|
|
591
|
-
for rm_file in
|
|
592
|
-
|
|
593
|
-
if
|
|
594
|
-
|
|
595
|
-
if
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
# Convert v4 files (best-effort; may not succeed)
|
|
607
|
-
for rm_file in notebook.get("v4_files", []):
|
|
608
|
-
pdf = _convert_page(rm_file, "v4", convert_v4_file_with_rmrl, "v4_converted")
|
|
617
|
+
# Convert all pages in content-file order
|
|
618
|
+
for rm_file in ordered_pages:
|
|
619
|
+
page_id = rm_file.stem
|
|
620
|
+
if on_page_start:
|
|
621
|
+
on_page_start()
|
|
622
|
+
if page_id in v6_ids:
|
|
623
|
+
pdf, cached = _convert_page(rm_file, "v6", convert_v6_file_with_rmc, "v6_converted")
|
|
624
|
+
elif page_id in v4_ids:
|
|
625
|
+
pdf, cached = _convert_page(
|
|
626
|
+
rm_file, "v4", convert_v4_file_with_rmrl, "v4_converted"
|
|
627
|
+
)
|
|
628
|
+
else:
|
|
629
|
+
pdf, cached = _convert_page(
|
|
630
|
+
rm_file, "v5", convert_v5_file_with_rmrl, "v5_converted"
|
|
631
|
+
)
|
|
609
632
|
if pdf:
|
|
610
633
|
page_pdfs.append(pdf)
|
|
611
634
|
if on_page_done:
|
|
612
|
-
on_page_done()
|
|
635
|
+
on_page_done(cached=cached)
|
|
613
636
|
|
|
614
637
|
# Copy existing PDFs
|
|
615
638
|
for i, pdf_file in enumerate(notebook["pdf_files"]):
|
|
639
|
+
if on_page_start:
|
|
640
|
+
on_page_start()
|
|
616
641
|
cached_pdf = page_cache_dir / f"existing_{i+1:03d}.pdf"
|
|
642
|
+
was_cached = False
|
|
617
643
|
if not cached_pdf.exists() or changed_page_ids is None:
|
|
618
644
|
if copy_existing_pdf(pdf_file, cached_pdf):
|
|
619
645
|
page_pdfs.append(cached_pdf)
|
|
@@ -621,8 +647,12 @@ def convert_notebook(
|
|
|
621
647
|
else:
|
|
622
648
|
page_pdfs.append(cached_pdf)
|
|
623
649
|
results["pdfs_copied"] += 1
|
|
650
|
+
was_cached = True
|
|
624
651
|
if on_page_done:
|
|
625
|
-
on_page_done()
|
|
652
|
+
on_page_done(cached=was_cached)
|
|
653
|
+
|
|
654
|
+
# Store ordered page PDFs in results for downstream consumers
|
|
655
|
+
results["page_pdfs"] = list(page_pdfs)
|
|
626
656
|
|
|
627
657
|
# Create merged PDF if we have any pages
|
|
628
658
|
if page_pdfs:
|
|
@@ -115,6 +115,26 @@ class MarkdownExporter:
|
|
|
115
115
|
except OSError as exc:
|
|
116
116
|
logging.error("Failed to save Markdown export state: %s", exc)
|
|
117
117
|
|
|
118
|
+
@staticmethod
|
|
119
|
+
def _get_content_page_order(notebook: Dict) -> Optional[List[str]]:
|
|
120
|
+
"""Return ordered page IDs from the notebook's .content file."""
|
|
121
|
+
metadata_file = notebook.get("metadata_file")
|
|
122
|
+
if not metadata_file:
|
|
123
|
+
return None
|
|
124
|
+
content_path = metadata_file.with_suffix(".content")
|
|
125
|
+
if not content_path.exists():
|
|
126
|
+
return None
|
|
127
|
+
try:
|
|
128
|
+
with open(content_path, "r", encoding="utf-8") as fh:
|
|
129
|
+
data = json.load(fh)
|
|
130
|
+
page_ids = data.get("pages", [])
|
|
131
|
+
if not page_ids:
|
|
132
|
+
cpages = data.get("cPages", {}).get("pages", [])
|
|
133
|
+
page_ids = [p["id"] for p in cpages if "id" in p]
|
|
134
|
+
return page_ids if page_ids else None
|
|
135
|
+
except Exception:
|
|
136
|
+
return None
|
|
137
|
+
|
|
118
138
|
def _needs_export(self, notebook_uuid: str, pdf_path: Path) -> bool:
|
|
119
139
|
"""Return True if the notebook has changed since the last export."""
|
|
120
140
|
if notebook_uuid not in self._state:
|
|
@@ -334,6 +354,7 @@ class MarkdownExporter:
|
|
|
334
354
|
force: bool = False,
|
|
335
355
|
page_pdfs: Optional[List[Path]] = None,
|
|
336
356
|
on_page_done: Optional[callable] = None,
|
|
357
|
+
changed_page_ids: Optional[set] = None,
|
|
337
358
|
) -> Optional[Path]:
|
|
338
359
|
"""Export a notebook as a folder with one Markdown file per page.
|
|
339
360
|
|
|
@@ -342,8 +363,12 @@ class MarkdownExporter:
|
|
|
342
363
|
pdf_path: Path to the converted PDF for this notebook.
|
|
343
364
|
force: Re-export even if the notebook hasn't changed.
|
|
344
365
|
page_pdfs: Optional list of cached per-page PDF paths.
|
|
345
|
-
on_page_done: Callback ``(page_num, total_pages)``
|
|
346
|
-
each page is processed.
|
|
366
|
+
on_page_done: Callback ``(page_num, total_pages, cached=False)``
|
|
367
|
+
called after each page is processed. *cached* is True when
|
|
368
|
+
the page was skipped because its PDF hash was unchanged.
|
|
369
|
+
changed_page_ids: Set of page IDs (UUID stems) known to have
|
|
370
|
+
changed in the backup. When provided, pages in this set
|
|
371
|
+
are always re-exported regardless of hash state.
|
|
347
372
|
|
|
348
373
|
Returns:
|
|
349
374
|
Path to the notebook folder, or *None* on failure.
|
|
@@ -352,8 +377,9 @@ class MarkdownExporter:
|
|
|
352
377
|
name = notebook["name"]
|
|
353
378
|
folder_path = notebook.get("folder_path", "")
|
|
354
379
|
|
|
355
|
-
# Skip if nothing changed
|
|
356
|
-
|
|
380
|
+
# Skip if nothing changed (notebook-level check only when we don't
|
|
381
|
+
# have per-page PDFs — per-page hashing handles the granular case).
|
|
382
|
+
if not force and not page_pdfs and not self._needs_export(uuid, pdf_path):
|
|
357
383
|
logging.debug("Skipping unchanged notebook: %s", name)
|
|
358
384
|
return self._state.get(uuid, {}).get("md_path")
|
|
359
385
|
|
|
@@ -385,6 +411,17 @@ class MarkdownExporter:
|
|
|
385
411
|
rate_limited = False
|
|
386
412
|
|
|
387
413
|
for pg_idx, pg_pdf in enumerate(pages_to_process, start=1):
|
|
414
|
+
# Skip pages that haven't changed
|
|
415
|
+
if (
|
|
416
|
+
not force
|
|
417
|
+
and changed_page_ids is not None
|
|
418
|
+
and pg_pdf.stem not in changed_page_ids
|
|
419
|
+
):
|
|
420
|
+
logging.debug("Skipping unchanged page %d of '%s'", pg_idx, name)
|
|
421
|
+
if on_page_done:
|
|
422
|
+
on_page_done(pg_idx, total_pages, cached=True)
|
|
423
|
+
continue
|
|
424
|
+
|
|
388
425
|
# Rasterise page to image
|
|
389
426
|
page_image: Optional[Path] = None
|
|
390
427
|
page_images: List[Path] = [] # noqa: F841
|
|
@@ -485,6 +522,7 @@ class MarkdownExporter:
|
|
|
485
522
|
force: bool = False,
|
|
486
523
|
converted_pages: Optional[Dict[str, List[Path]]] = None,
|
|
487
524
|
page_filter: Optional[int] = None,
|
|
525
|
+
updated_pages: Optional[Dict[str, set]] = None,
|
|
488
526
|
) -> Tuple[int, int]:
|
|
489
527
|
"""Export all notebooks to Markdown.
|
|
490
528
|
|
|
@@ -496,6 +534,8 @@ class MarkdownExporter:
|
|
|
496
534
|
converted_pages: Dict mapping notebook UUID to list of per-page
|
|
497
535
|
PDF paths produced by the PDF conversion step. When provided,
|
|
498
536
|
these are used directly instead of scanning the cache dir.
|
|
537
|
+
updated_pages: Dict mapping notebook UUID to set of changed page
|
|
538
|
+
IDs from the backup stage.
|
|
499
539
|
|
|
500
540
|
Returns:
|
|
501
541
|
``(exported_count, skipped_count)`` tuple.
|
|
@@ -509,6 +549,8 @@ class MarkdownExporter:
|
|
|
509
549
|
# Count total pages for progress bar
|
|
510
550
|
total_pages = 0
|
|
511
551
|
nb_page_counts = []
|
|
552
|
+
# Count total pages that actually need OCR processing
|
|
553
|
+
total_ocr_pages = 0
|
|
512
554
|
for nb in doc_notebooks:
|
|
513
555
|
count = 0
|
|
514
556
|
if converted_pages and nb["uuid"] in converted_pages:
|
|
@@ -517,10 +559,23 @@ class MarkdownExporter:
|
|
|
517
559
|
cache = self.backup_dir / "PagePDFs" / nb["uuid"]
|
|
518
560
|
if cache.exists():
|
|
519
561
|
count = len([p for p in cache.glob("*.pdf") if not p.stem.endswith("_content")])
|
|
520
|
-
count = max(count, 1)
|
|
562
|
+
count = max(count, 1)
|
|
521
563
|
nb_page_counts.append(count)
|
|
522
564
|
total_pages += count
|
|
523
565
|
|
|
566
|
+
# Count only pages that will actually be processed
|
|
567
|
+
nb_changed = None
|
|
568
|
+
if updated_pages is not None:
|
|
569
|
+
nb_changed = updated_pages.get(nb["uuid"], set())
|
|
570
|
+
if nb_changed is not None and converted_pages and nb["uuid"] in converted_pages:
|
|
571
|
+
total_ocr_pages += sum(
|
|
572
|
+
1 for p in converted_pages[nb["uuid"]] if p.stem in nb_changed
|
|
573
|
+
)
|
|
574
|
+
else:
|
|
575
|
+
total_ocr_pages += count
|
|
576
|
+
|
|
577
|
+
ocr_counter = [0]
|
|
578
|
+
|
|
524
579
|
with create_progress("Exporting") as progress:
|
|
525
580
|
task = progress.add_task("Exporting", total=total_pages)
|
|
526
581
|
|
|
@@ -549,10 +604,20 @@ class MarkdownExporter:
|
|
|
549
604
|
else:
|
|
550
605
|
page_cache_dir = self.backup_dir / "PagePDFs" / notebook["uuid"]
|
|
551
606
|
if page_cache_dir.exists():
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
607
|
+
pdfs_on_disk = {
|
|
608
|
+
p.stem: p
|
|
609
|
+
for p in page_cache_dir.glob("*.pdf")
|
|
610
|
+
if not p.stem.endswith("_content")
|
|
611
|
+
}
|
|
612
|
+
if pdfs_on_disk:
|
|
613
|
+
# Order by .content file if available
|
|
614
|
+
ordered = self._get_content_page_order(notebook)
|
|
615
|
+
if ordered:
|
|
616
|
+
page_pdfs_list = [
|
|
617
|
+
pdfs_on_disk[pid] for pid in ordered if pid in pdfs_on_disk
|
|
618
|
+
]
|
|
619
|
+
else:
|
|
620
|
+
page_pdfs_list = sorted(pdfs_on_disk.values())
|
|
556
621
|
|
|
557
622
|
# Filter to specific page if requested
|
|
558
623
|
if page_filter and page_pdfs_list:
|
|
@@ -565,13 +630,31 @@ class MarkdownExporter:
|
|
|
565
630
|
len(page_pdfs_list),
|
|
566
631
|
)
|
|
567
632
|
|
|
568
|
-
def _on_page(
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
633
|
+
def _on_page(
|
|
634
|
+
pg_num,
|
|
635
|
+
pg_total,
|
|
636
|
+
_nb_name=nb_name,
|
|
637
|
+
_oc=ocr_counter,
|
|
638
|
+
_total_ocr=total_ocr_pages,
|
|
639
|
+
cached=False,
|
|
640
|
+
):
|
|
641
|
+
if cached:
|
|
642
|
+
logging.info("MD: %s (page %d/%d) [cached]", _nb_name, pg_num, pg_total)
|
|
643
|
+
else:
|
|
644
|
+
_oc[0] += 1
|
|
645
|
+
desc = f"OCR page {_oc[0]} of {_total_ocr} ({_nb_name} page {pg_num})"
|
|
646
|
+
progress.update(task, advance=1, description=desc)
|
|
647
|
+
logging.info(
|
|
648
|
+
"MD: OCR page %d of %d (%s page %d)",
|
|
649
|
+
_oc[0],
|
|
650
|
+
_total_ocr,
|
|
651
|
+
_nb_name,
|
|
652
|
+
pg_num,
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
nb_changed_pages = None
|
|
656
|
+
if updated_pages and notebook["uuid"] in updated_pages:
|
|
657
|
+
nb_changed_pages = updated_pages[notebook["uuid"]]
|
|
575
658
|
|
|
576
659
|
result = self.export_notebook(
|
|
577
660
|
notebook,
|
|
@@ -579,6 +662,7 @@ class MarkdownExporter:
|
|
|
579
662
|
force=force,
|
|
580
663
|
page_pdfs=page_pdfs_list,
|
|
581
664
|
on_page_done=_on_page,
|
|
665
|
+
changed_page_ids=nb_changed_pages,
|
|
582
666
|
)
|
|
583
667
|
# Ensure we advance the full count even if pages were fewer
|
|
584
668
|
remaining = nb_pages - (nb_pages if result else 0)
|
|
@@ -167,21 +167,24 @@ def run_conversion(
|
|
|
167
167
|
)
|
|
168
168
|
page_counter = [0] # mutable so the lambda can update it
|
|
169
169
|
|
|
170
|
-
def _on_page_done(_pc=page_counter, _nb=nb_name, _nbt=nb_total):
|
|
170
|
+
def _on_page_done(_pc=page_counter, _nb=nb_name, _nbt=nb_total, cached=False):
|
|
171
171
|
_pc[0] += 1
|
|
172
|
+
progress.update(task, advance=1)
|
|
173
|
+
suffix = " [cached]" if cached else ""
|
|
174
|
+
logging.info("PDF: %s (page %d/%d)%s", _nb, _pc[0], _nbt, suffix)
|
|
175
|
+
|
|
176
|
+
def _on_page_start(_pc=page_counter, _nb=nb_name, _nbt=nb_total):
|
|
172
177
|
progress.update(
|
|
173
178
|
task,
|
|
174
|
-
|
|
175
|
-
description=f"{_nb} (page {_pc[0]} of {_nbt})",
|
|
179
|
+
description=f"{_nb} (page {_pc[0] + 1} of {_nbt})",
|
|
176
180
|
)
|
|
177
|
-
logging.info("PDF: %s (page %d/%d)", _nb, _pc[0], _nbt)
|
|
178
181
|
|
|
179
182
|
progress.update(task, description=f"{nb_name} (page 0 of {nb_total})")
|
|
180
183
|
|
|
181
184
|
try:
|
|
182
185
|
notebook_changed_pages = None
|
|
183
|
-
if updated_pages
|
|
184
|
-
notebook_changed_pages = updated_pages
|
|
186
|
+
if updated_pages is not None:
|
|
187
|
+
notebook_changed_pages = updated_pages.get(notebook["uuid"], set())
|
|
185
188
|
|
|
186
189
|
results = convert_notebook(
|
|
187
190
|
notebook,
|
|
@@ -190,15 +193,13 @@ def run_conversion(
|
|
|
190
193
|
template_renderer,
|
|
191
194
|
changed_page_ids=notebook_changed_pages,
|
|
192
195
|
on_page_done=_on_page_done,
|
|
196
|
+
on_page_start=_on_page_start,
|
|
193
197
|
)
|
|
194
198
|
if results["output_files"]:
|
|
195
199
|
successful += 1
|
|
196
|
-
#
|
|
197
|
-
|
|
198
|
-
if
|
|
199
|
-
page_pdfs = sorted(cache_dir.glob("*.pdf"))
|
|
200
|
-
# Exclude intermediate *_content.pdf files
|
|
201
|
-
page_pdfs = [p for p in page_pdfs if not p.stem.endswith("_content")]
|
|
200
|
+
# Use ordered page PDFs from converter (preserves .content order)
|
|
201
|
+
page_pdfs = results.get("page_pdfs", [])
|
|
202
|
+
if page_pdfs:
|
|
202
203
|
converted[notebook["uuid"]] = page_pdfs
|
|
203
204
|
except Exception as e:
|
|
204
205
|
print_error(f" [ERR] Failed to convert {notebook['name']}: {e}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|