remarkablesync 2.0.0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {remarkablesync-2.0.0/remarkablesync.egg-info → remarkablesync-2.0.1}/PKG-INFO +2 -2
  2. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/README.md +1 -1
  3. {remarkablesync-2.0.0 → remarkablesync-2.0.1/remarkablesync.egg-info}/PKG-INFO +2 -2
  4. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/__version__.py +1 -1
  5. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/pipeline.py +1 -0
  6. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/watch_command.py +1 -1
  7. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/hybrid_converter.py +70 -40
  8. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/pdf_md_converter.py +100 -16
  9. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/rm_pdf_converter.py +13 -12
  10. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/LICENSE +0 -0
  11. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/MANIFEST.in +0 -0
  12. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/RemarkableSync.py +0 -0
  13. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/pyproject.toml +0 -0
  14. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/SOURCES.txt +0 -0
  15. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/dependency_links.txt +0 -0
  16. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/entry_points.txt +0 -0
  17. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/requires.txt +0 -0
  18. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/remarkablesync.egg-info/top_level.txt +0 -0
  19. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/requirements-dev.txt +0 -0
  20. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/requirements.txt +0 -0
  21. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/setup.cfg +0 -0
  22. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/setup.py +0 -0
  23. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/__init__.py +0 -0
  24. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/__init__.py +0 -0
  25. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/base_provider.py +0 -0
  26. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/claude_provider.py +0 -0
  27. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ai/github_models_provider.py +0 -0
  28. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/auth/__init__.py +0 -0
  29. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/auth/github_device_flow.py +0 -0
  30. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/__init__.py +0 -0
  31. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/backup_manager.py +0 -0
  32. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/connection.py +0 -0
  33. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/backup/metadata.py +0 -0
  34. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/__init__.py +0 -0
  35. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/backup_command.py +0 -0
  36. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/config_command.py +0 -0
  37. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/convert_command.py +0 -0
  38. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/commands/sync_command.py +0 -0
  39. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/config.py +0 -0
  40. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/__init__.py +0 -0
  41. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/base_converter.py +0 -0
  42. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/v4_converter.py +0 -0
  43. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/v5_converter.py +0 -0
  44. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/converters/v6_converter.py +0 -0
  45. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/keyring_store.py +0 -0
  46. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/md_export/__init__.py +0 -0
  47. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ocr/__init__.py +0 -0
  48. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/ocr/ocr_engine.py +0 -0
  49. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/template_renderer.py +0 -0
  50. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/utils/__init__.py +0 -0
  51. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/utils/console.py +0 -0
  52. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/src/utils/logging.py +0 -0
  53. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/__init__.py +0 -0
  54. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/conftest.py +0 -0
  55. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/generate_test_assets.py +0 -0
  56. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/mock_ai_provider.py +0 -0
  57. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/mock_connection.py +0 -0
  58. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_ai_providers.py +0 -0
  59. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_auth_device_flow.py +0 -0
  60. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_basic.py +0 -0
  61. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_config.py +0 -0
  62. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_console.py +0 -0
  63. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_hybrid_converter.py +0 -0
  64. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_keyring_store.py +0 -0
  65. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_logging.py +0 -0
  66. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_md_exporter.py +0 -0
  67. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_metadata.py +0 -0
  68. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_ocr_pipeline.py +0 -0
  69. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_pipeline.py +0 -0
  70. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_template_renderer.py +0 -0
  71. {remarkablesync-2.0.0 → remarkablesync-2.0.1}/tests/test_wifi_connection.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: remarkablesync
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Summary: Backup and convert reMarkable tablet notebooks to PDF
5
5
  Home-page: https://github.com/JeffSteinbok/RemarkableSync
6
6
  Author: Jeff Steinbok
@@ -54,7 +54,7 @@ Dynamic: summary
54
54
 
55
55
  # RemarkableSync
56
56
 
57
- ![RemarkableSync](githubSocial.png)
57
+ ![RemarkableSync](https://raw.githubusercontent.com/JeffSteinbok/RemarkableSync/main/githubSocial.png)
58
58
 
59
59
  [![GitHub](https://img.shields.io/badge/GitHub-RemarkableSync-blue?logo=github)](https://github.com/JeffSteinbok/RemarkableSync)
60
60
  [![GitHub release](https://img.shields.io/github/v/release/JeffSteinbok/RemarkableSync)](https://github.com/JeffSteinbok/RemarkableSync/releases)
@@ -1,6 +1,6 @@
1
1
  # RemarkableSync
2
2
 
3
- ![RemarkableSync](githubSocial.png)
3
+ ![RemarkableSync](https://raw.githubusercontent.com/JeffSteinbok/RemarkableSync/main/githubSocial.png)
4
4
 
5
5
  [![GitHub](https://img.shields.io/badge/GitHub-RemarkableSync-blue?logo=github)](https://github.com/JeffSteinbok/RemarkableSync)
6
6
  [![GitHub release](https://img.shields.io/github/v/release/JeffSteinbok/RemarkableSync)](https://github.com/JeffSteinbok/RemarkableSync/releases)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: remarkablesync
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Summary: Backup and convert reMarkable tablet notebooks to PDF
5
5
  Home-page: https://github.com/JeffSteinbok/RemarkableSync
6
6
  Author: Jeff Steinbok
@@ -54,7 +54,7 @@ Dynamic: summary
54
54
 
55
55
  # RemarkableSync
56
56
 
57
- ![RemarkableSync](githubSocial.png)
57
+ ![RemarkableSync](https://raw.githubusercontent.com/JeffSteinbok/RemarkableSync/main/githubSocial.png)
58
58
 
59
59
  [![GitHub](https://img.shields.io/badge/GitHub-RemarkableSync-blue?logo=github)](https://github.com/JeffSteinbok/RemarkableSync)
60
60
  [![GitHub release](https://img.shields.io/github/v/release/JeffSteinbok/RemarkableSync)](https://github.com/JeffSteinbok/RemarkableSync/releases)
@@ -1,4 +1,4 @@
1
1
  """Version information for RemarkableSync."""
2
2
 
3
- __version__ = "2.0.0"
3
+ __version__ = "2.0.1"
4
4
  __repository__ = "https://github.com/JeffSteinbok/RemarkableSync"
@@ -249,6 +249,7 @@ def run_pipeline(
249
249
  force=force_export,
250
250
  converted_pages=converted_pages,
251
251
  page_filter=page_filter,
252
+ updated_pages=updated_pages,
252
253
  )
253
254
 
254
255
  # ------------------------------------------------------------------
@@ -738,7 +738,7 @@ class _TrayLogHandler(logging.Handler):
738
738
  return
739
739
  self._tray.set_detail(msg)
740
740
 
741
- # Parse progress from page callbacks: "PDF: Work (page 3/21)"
741
+ # Parse progress from page callbacks: "PDF: Work (page 3/21)" or "PDF: Work (page 3/21) [cached]"
742
742
  import re
743
743
 
744
744
  m = re.search(r"(PDF|MD): (.+?) \(page (\d+)/(\d+)\)", msg)
@@ -446,6 +446,7 @@ def convert_notebook(
446
446
  template_renderer: Optional[TemplateRenderer] = None,
447
447
  changed_page_ids: Optional[set] = None,
448
448
  on_page_done: Optional[callable] = None,
449
+ on_page_start: Optional[callable] = None,
449
450
  ) -> Dict:
450
451
  """Convert a notebook using appropriate tools for each file type.
451
452
 
@@ -462,6 +463,8 @@ def convert_notebook(
462
463
  template_renderer: Optional template renderer for backgrounds.
463
464
  changed_page_ids: Set of page IDs whose ``.rm`` files changed.
464
465
  When *None* all pages are (re-)converted.
466
+ on_page_done: Callback ``(cached: bool)`` called after each page.
467
+ *cached* is True when the page was served from cache.
465
468
  """
466
469
  # Create safe filename
467
470
  safe_name = "".join(c for c in notebook["name"] if c.isalnum() or c in (" ", "-", "_")).rstrip()
@@ -505,7 +508,7 @@ def convert_notebook(
505
508
  template_temp_dir = Path(tempfile.mkdtemp(prefix="remarkable_templates_"))
506
509
 
507
510
  try:
508
- # Resolve ordered pages using .content file if present (v5 ordering)
511
+ # Resolve ordered pages using .content file if present
509
512
  metadata_file = notebook.get("metadata_file")
510
513
  content_path = metadata_file.with_suffix(".content") if metadata_file else None
511
514
 
@@ -514,28 +517,52 @@ def convert_notebook(
514
517
  if template_renderer and content_path:
515
518
  page_templates = get_page_templates(content_path)
516
519
 
517
- ordered_v5_pages: List[Path] = []
520
+ # Build a set of all known .rm files by page ID for fast lookup
521
+ all_rm_by_id: Dict[str, Path] = {}
522
+ for rm_file in (
523
+ notebook.get("v5_files", [])
524
+ + notebook.get("v6_files", [])
525
+ + notebook.get("v4_files", [])
526
+ ):
527
+ all_rm_by_id[rm_file.stem] = rm_file
528
+
529
+ # Determine version for each page
530
+ v6_ids = {f.stem for f in notebook.get("v6_files", [])}
531
+ v4_ids = {f.stem for f in notebook.get("v4_files", [])}
532
+
533
+ # Order pages using .content file (applies to all versions)
534
+ ordered_pages: List[Path] = []
518
535
  if content_path and content_path.exists():
519
536
  try:
520
537
  with open(content_path, "r", encoding="utf-8") as cf:
521
538
  content_json = json.load(cf)
522
539
  page_ids = content_json.get("pages", [])
540
+ # v6 notebooks use cPages.pages with {id, idx, ...} dicts
541
+ if not page_ids:
542
+ cpages = content_json.get("cPages", {}).get("pages", [])
543
+ page_ids = [p["id"] for p in cpages if "id" in p]
523
544
  base_dir = content_path.parent / content_path.stem
524
545
  for pid in page_ids:
525
- candidate = base_dir / f"{pid}.rm"
526
- if candidate.exists():
527
- ordered_v5_pages.append(candidate)
546
+ if pid in all_rm_by_id:
547
+ ordered_pages.append(all_rm_by_id[pid])
528
548
  else:
529
- # fallback: find rm page anywhere under files matching page id
530
- alt = list((content_path.parent).glob(f"{pid}.rm"))
531
- if alt:
532
- ordered_v5_pages.append(alt[0])
549
+ candidate = base_dir / f"{pid}.rm"
550
+ if candidate.exists():
551
+ ordered_pages.append(candidate)
552
+ else:
553
+ alt = list((content_path.parent).glob(f"{pid}.rm"))
554
+ if alt:
555
+ ordered_pages.append(alt[0])
533
556
  except Exception as e:
534
557
  logging.debug("Failed reading content ordering for %s: %s", notebook["name"], e)
535
558
 
536
559
  # Fallback to unsorted list if ordering extraction failed
537
- if not ordered_v5_pages:
538
- ordered_v5_pages = notebook["v5_files"]
560
+ if not ordered_pages:
561
+ ordered_pages = (
562
+ notebook.get("v5_files", [])
563
+ + notebook.get("v6_files", [])
564
+ + notebook.get("v4_files", [])
565
+ )
539
566
 
540
567
  def _needs_conversion(page_id: str) -> bool:
541
568
  """Check if a page needs (re-)conversion."""
@@ -543,24 +570,24 @@ def convert_notebook(
543
570
  return True # No change info → convert all
544
571
  return page_id in changed_page_ids
545
572
 
546
- def _convert_page(
547
- rm_file: Path, version_tag: str, convert_fn, result_key: str
548
- ) -> Optional[Path]:
573
+ def _convert_page(rm_file: Path, version_tag: str, convert_fn, result_key: str) -> tuple:
549
574
  """Convert a single page, using cache when possible.
550
575
 
551
- Returns the path to the cached page PDF, or None on failure.
576
+ Returns ``(path, cached)`` where *cached* is True when the
577
+ page was served from the persistent cache without conversion.
578
+ Returns ``(None, False)`` on failure.
552
579
  """
553
580
  page_id = rm_file.stem
554
581
  cached_pdf = page_cache_dir / f"{page_id}.pdf"
555
582
 
556
583
  # Use cached PDF if page hasn't changed
557
584
  if not _needs_conversion(page_id) and cached_pdf.exists():
558
- return cached_pdf
585
+ return cached_pdf, True
559
586
 
560
587
  # Convert the .rm file to a content PDF
561
588
  content_pdf = page_cache_dir / f"{page_id}_content.pdf"
562
589
  if not convert_fn(rm_file, content_pdf):
563
- return None
590
+ return None, False
564
591
 
565
592
  # Apply template if available
566
593
  if template_renderer and template_temp_dir:
@@ -575,7 +602,7 @@ def convert_notebook(
575
602
  except OSError:
576
603
  pass
577
604
  results[result_key] += 1
578
- return cached_pdf
605
+ return cached_pdf, False
579
606
 
580
607
  # No template or template merge failed — content PDF is the final
581
608
  if content_pdf != cached_pdf:
@@ -585,35 +612,34 @@ def convert_notebook(
585
612
  except OSError:
586
613
  cached_pdf = content_pdf
587
614
  results[result_key] += 1
588
- return cached_pdf
615
+ return cached_pdf, False
589
616
 
590
- # Convert v5 files in determined order
591
- for rm_file in ordered_v5_pages:
592
- pdf = _convert_page(rm_file, "v5", convert_v5_file_with_rmrl, "v5_converted")
593
- if pdf:
594
- page_pdfs.append(pdf)
595
- if on_page_done:
596
- on_page_done()
597
-
598
- # Convert v6 files
599
- for rm_file in notebook["v6_files"]:
600
- pdf = _convert_page(rm_file, "v6", convert_v6_file_with_rmc, "v6_converted")
601
- if pdf:
602
- page_pdfs.append(pdf)
603
- if on_page_done:
604
- on_page_done()
605
-
606
- # Convert v4 files (best-effort; may not succeed)
607
- for rm_file in notebook.get("v4_files", []):
608
- pdf = _convert_page(rm_file, "v4", convert_v4_file_with_rmrl, "v4_converted")
617
+ # Convert all pages in content-file order
618
+ for rm_file in ordered_pages:
619
+ page_id = rm_file.stem
620
+ if on_page_start:
621
+ on_page_start()
622
+ if page_id in v6_ids:
623
+ pdf, cached = _convert_page(rm_file, "v6", convert_v6_file_with_rmc, "v6_converted")
624
+ elif page_id in v4_ids:
625
+ pdf, cached = _convert_page(
626
+ rm_file, "v4", convert_v4_file_with_rmrl, "v4_converted"
627
+ )
628
+ else:
629
+ pdf, cached = _convert_page(
630
+ rm_file, "v5", convert_v5_file_with_rmrl, "v5_converted"
631
+ )
609
632
  if pdf:
610
633
  page_pdfs.append(pdf)
611
634
  if on_page_done:
612
- on_page_done()
635
+ on_page_done(cached=cached)
613
636
 
614
637
  # Copy existing PDFs
615
638
  for i, pdf_file in enumerate(notebook["pdf_files"]):
639
+ if on_page_start:
640
+ on_page_start()
616
641
  cached_pdf = page_cache_dir / f"existing_{i+1:03d}.pdf"
642
+ was_cached = False
617
643
  if not cached_pdf.exists() or changed_page_ids is None:
618
644
  if copy_existing_pdf(pdf_file, cached_pdf):
619
645
  page_pdfs.append(cached_pdf)
@@ -621,8 +647,12 @@ def convert_notebook(
621
647
  else:
622
648
  page_pdfs.append(cached_pdf)
623
649
  results["pdfs_copied"] += 1
650
+ was_cached = True
624
651
  if on_page_done:
625
- on_page_done()
652
+ on_page_done(cached=was_cached)
653
+
654
+ # Store ordered page PDFs in results for downstream consumers
655
+ results["page_pdfs"] = list(page_pdfs)
626
656
 
627
657
  # Create merged PDF if we have any pages
628
658
  if page_pdfs:
@@ -115,6 +115,26 @@ class MarkdownExporter:
115
115
  except OSError as exc:
116
116
  logging.error("Failed to save Markdown export state: %s", exc)
117
117
 
118
+ @staticmethod
119
+ def _get_content_page_order(notebook: Dict) -> Optional[List[str]]:
120
+ """Return ordered page IDs from the notebook's .content file."""
121
+ metadata_file = notebook.get("metadata_file")
122
+ if not metadata_file:
123
+ return None
124
+ content_path = metadata_file.with_suffix(".content")
125
+ if not content_path.exists():
126
+ return None
127
+ try:
128
+ with open(content_path, "r", encoding="utf-8") as fh:
129
+ data = json.load(fh)
130
+ page_ids = data.get("pages", [])
131
+ if not page_ids:
132
+ cpages = data.get("cPages", {}).get("pages", [])
133
+ page_ids = [p["id"] for p in cpages if "id" in p]
134
+ return page_ids if page_ids else None
135
+ except Exception:
136
+ return None
137
+
118
138
  def _needs_export(self, notebook_uuid: str, pdf_path: Path) -> bool:
119
139
  """Return True if the notebook has changed since the last export."""
120
140
  if notebook_uuid not in self._state:
@@ -334,6 +354,7 @@ class MarkdownExporter:
334
354
  force: bool = False,
335
355
  page_pdfs: Optional[List[Path]] = None,
336
356
  on_page_done: Optional[callable] = None,
357
+ changed_page_ids: Optional[set] = None,
337
358
  ) -> Optional[Path]:
338
359
  """Export a notebook as a folder with one Markdown file per page.
339
360
 
@@ -342,8 +363,12 @@ class MarkdownExporter:
342
363
  pdf_path: Path to the converted PDF for this notebook.
343
364
  force: Re-export even if the notebook hasn't changed.
344
365
  page_pdfs: Optional list of cached per-page PDF paths.
345
- on_page_done: Callback ``(page_num, total_pages)`` called after
346
- each page is processed.
366
+ on_page_done: Callback ``(page_num, total_pages, cached=False)``
367
+ called after each page is processed. *cached* is True when
368
+ the page was skipped because its PDF hash was unchanged.
369
+ changed_page_ids: Set of page IDs (UUID stems) known to have
370
+ changed in the backup. When provided, pages in this set
371
+ are always re-exported regardless of hash state.
347
372
 
348
373
  Returns:
349
374
  Path to the notebook folder, or *None* on failure.
@@ -352,8 +377,9 @@ class MarkdownExporter:
352
377
  name = notebook["name"]
353
378
  folder_path = notebook.get("folder_path", "")
354
379
 
355
- # Skip if nothing changed
356
- if not force and not self._needs_export(uuid, pdf_path):
380
+ # Skip if nothing changed (notebook-level check only when we don't
381
+ # have per-page PDFs per-page hashing handles the granular case).
382
+ if not force and not page_pdfs and not self._needs_export(uuid, pdf_path):
357
383
  logging.debug("Skipping unchanged notebook: %s", name)
358
384
  return self._state.get(uuid, {}).get("md_path")
359
385
 
@@ -385,6 +411,17 @@ class MarkdownExporter:
385
411
  rate_limited = False
386
412
 
387
413
  for pg_idx, pg_pdf in enumerate(pages_to_process, start=1):
414
+ # Skip pages that haven't changed
415
+ if (
416
+ not force
417
+ and changed_page_ids is not None
418
+ and pg_pdf.stem not in changed_page_ids
419
+ ):
420
+ logging.debug("Skipping unchanged page %d of '%s'", pg_idx, name)
421
+ if on_page_done:
422
+ on_page_done(pg_idx, total_pages, cached=True)
423
+ continue
424
+
388
425
  # Rasterise page to image
389
426
  page_image: Optional[Path] = None
390
427
  page_images: List[Path] = [] # noqa: F841
@@ -485,6 +522,7 @@ class MarkdownExporter:
485
522
  force: bool = False,
486
523
  converted_pages: Optional[Dict[str, List[Path]]] = None,
487
524
  page_filter: Optional[int] = None,
525
+ updated_pages: Optional[Dict[str, set]] = None,
488
526
  ) -> Tuple[int, int]:
489
527
  """Export all notebooks to Markdown.
490
528
 
@@ -496,6 +534,8 @@ class MarkdownExporter:
496
534
  converted_pages: Dict mapping notebook UUID to list of per-page
497
535
  PDF paths produced by the PDF conversion step. When provided,
498
536
  these are used directly instead of scanning the cache dir.
537
+ updated_pages: Dict mapping notebook UUID to set of changed page
538
+ IDs from the backup stage.
499
539
 
500
540
  Returns:
501
541
  ``(exported_count, skipped_count)`` tuple.
@@ -509,6 +549,8 @@ class MarkdownExporter:
509
549
  # Count total pages for progress bar
510
550
  total_pages = 0
511
551
  nb_page_counts = []
552
+ # Count total pages that actually need OCR processing
553
+ total_ocr_pages = 0
512
554
  for nb in doc_notebooks:
513
555
  count = 0
514
556
  if converted_pages and nb["uuid"] in converted_pages:
@@ -517,10 +559,23 @@ class MarkdownExporter:
517
559
  cache = self.backup_dir / "PagePDFs" / nb["uuid"]
518
560
  if cache.exists():
519
561
  count = len([p for p in cache.glob("*.pdf") if not p.stem.endswith("_content")])
520
- count = max(count, 1) # at least 1 so progress always advances
562
+ count = max(count, 1)
521
563
  nb_page_counts.append(count)
522
564
  total_pages += count
523
565
 
566
+ # Count only pages that will actually be processed
567
+ nb_changed = None
568
+ if updated_pages is not None:
569
+ nb_changed = updated_pages.get(nb["uuid"], set())
570
+ if nb_changed is not None and converted_pages and nb["uuid"] in converted_pages:
571
+ total_ocr_pages += sum(
572
+ 1 for p in converted_pages[nb["uuid"]] if p.stem in nb_changed
573
+ )
574
+ else:
575
+ total_ocr_pages += count
576
+
577
+ ocr_counter = [0]
578
+
524
579
  with create_progress("Exporting") as progress:
525
580
  task = progress.add_task("Exporting", total=total_pages)
526
581
 
@@ -549,10 +604,20 @@ class MarkdownExporter:
549
604
  else:
550
605
  page_cache_dir = self.backup_dir / "PagePDFs" / notebook["uuid"]
551
606
  if page_cache_dir.exists():
552
- pdfs = sorted(page_cache_dir.glob("*.pdf"))
553
- pdfs = [p for p in pdfs if not p.stem.endswith("_content")]
554
- if pdfs:
555
- page_pdfs_list = pdfs
607
+ pdfs_on_disk = {
608
+ p.stem: p
609
+ for p in page_cache_dir.glob("*.pdf")
610
+ if not p.stem.endswith("_content")
611
+ }
612
+ if pdfs_on_disk:
613
+ # Order by .content file if available
614
+ ordered = self._get_content_page_order(notebook)
615
+ if ordered:
616
+ page_pdfs_list = [
617
+ pdfs_on_disk[pid] for pid in ordered if pid in pdfs_on_disk
618
+ ]
619
+ else:
620
+ page_pdfs_list = sorted(pdfs_on_disk.values())
556
621
 
557
622
  # Filter to specific page if requested
558
623
  if page_filter and page_pdfs_list:
@@ -565,13 +630,31 @@ class MarkdownExporter:
565
630
  len(page_pdfs_list),
566
631
  )
567
632
 
568
- def _on_page(pg_num, pg_total, _nb_name=nb_name):
569
- progress.update(
570
- task,
571
- advance=1,
572
- description=f"{_nb_name} (page {pg_num} of {pg_total})",
573
- )
574
- logging.info("MD: %s (page %d/%d)", _nb_name, pg_num, pg_total)
633
+ def _on_page(
634
+ pg_num,
635
+ pg_total,
636
+ _nb_name=nb_name,
637
+ _oc=ocr_counter,
638
+ _total_ocr=total_ocr_pages,
639
+ cached=False,
640
+ ):
641
+ if cached:
642
+ logging.info("MD: %s (page %d/%d) [cached]", _nb_name, pg_num, pg_total)
643
+ else:
644
+ _oc[0] += 1
645
+ desc = f"OCR page {_oc[0]} of {_total_ocr} ({_nb_name} page {pg_num})"
646
+ progress.update(task, advance=1, description=desc)
647
+ logging.info(
648
+ "MD: OCR page %d of %d (%s page %d)",
649
+ _oc[0],
650
+ _total_ocr,
651
+ _nb_name,
652
+ pg_num,
653
+ )
654
+
655
+ nb_changed_pages = None
656
+ if updated_pages and notebook["uuid"] in updated_pages:
657
+ nb_changed_pages = updated_pages[notebook["uuid"]]
575
658
 
576
659
  result = self.export_notebook(
577
660
  notebook,
@@ -579,6 +662,7 @@ class MarkdownExporter:
579
662
  force=force,
580
663
  page_pdfs=page_pdfs_list,
581
664
  on_page_done=_on_page,
665
+ changed_page_ids=nb_changed_pages,
582
666
  )
583
667
  # Ensure we advance the full count even if pages were fewer
584
668
  remaining = nb_pages - (nb_pages if result else 0)
@@ -167,21 +167,24 @@ def run_conversion(
167
167
  )
168
168
  page_counter = [0] # mutable so the lambda can update it
169
169
 
170
- def _on_page_done(_pc=page_counter, _nb=nb_name, _nbt=nb_total):
170
+ def _on_page_done(_pc=page_counter, _nb=nb_name, _nbt=nb_total, cached=False):
171
171
  _pc[0] += 1
172
+ progress.update(task, advance=1)
173
+ suffix = " [cached]" if cached else ""
174
+ logging.info("PDF: %s (page %d/%d)%s", _nb, _pc[0], _nbt, suffix)
175
+
176
+ def _on_page_start(_pc=page_counter, _nb=nb_name, _nbt=nb_total):
172
177
  progress.update(
173
178
  task,
174
- advance=1,
175
- description=f"{_nb} (page {_pc[0]} of {_nbt})",
179
+ description=f"{_nb} (page {_pc[0] + 1} of {_nbt})",
176
180
  )
177
- logging.info("PDF: %s (page %d/%d)", _nb, _pc[0], _nbt)
178
181
 
179
182
  progress.update(task, description=f"{nb_name} (page 0 of {nb_total})")
180
183
 
181
184
  try:
182
185
  notebook_changed_pages = None
183
- if updated_pages and notebook["uuid"] in updated_pages:
184
- notebook_changed_pages = updated_pages[notebook["uuid"]]
186
+ if updated_pages is not None:
187
+ notebook_changed_pages = updated_pages.get(notebook["uuid"], set())
185
188
 
186
189
  results = convert_notebook(
187
190
  notebook,
@@ -190,15 +193,13 @@ def run_conversion(
190
193
  template_renderer,
191
194
  changed_page_ids=notebook_changed_pages,
192
195
  on_page_done=_on_page_done,
196
+ on_page_start=_on_page_start,
193
197
  )
194
198
  if results["output_files"]:
195
199
  successful += 1
196
- # Collect per-page PDFs for downstream (MD export)
197
- cache_dir = results.get("page_cache_dir")
198
- if cache_dir and cache_dir.exists():
199
- page_pdfs = sorted(cache_dir.glob("*.pdf"))
200
- # Exclude intermediate *_content.pdf files
201
- page_pdfs = [p for p in page_pdfs if not p.stem.endswith("_content")]
200
+ # Use ordered page PDFs from converter (preserves .content order)
201
+ page_pdfs = results.get("page_pdfs", [])
202
+ if page_pdfs:
202
203
  converted[notebook["uuid"]] = page_pdfs
203
204
  except Exception as e:
204
205
  print_error(f" [ERR] Failed to convert {notebook['name']}: {e}")
File without changes
File without changes
File without changes