noteshift 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noteshift-0.1.0/PKG-INFO +141 -0
- noteshift-0.1.0/README.md +107 -0
- noteshift-0.1.0/pyproject.toml +78 -0
- noteshift-0.1.0/src/noteshift/__init__.py +28 -0
- noteshift-0.1.0/src/noteshift/api.py +241 -0
- noteshift-0.1.0/src/noteshift/checkpoint.py +100 -0
- noteshift-0.1.0/src/noteshift/cli.py +136 -0
- noteshift-0.1.0/src/noteshift/db_export.py +92 -0
- noteshift-0.1.0/src/noteshift/events.py +26 -0
- noteshift-0.1.0/src/noteshift/exporter.py +316 -0
- noteshift-0.1.0/src/noteshift/filenames.py +43 -0
- noteshift-0.1.0/src/noteshift/license.py +70 -0
- noteshift-0.1.0/src/noteshift/markdown.py +115 -0
- noteshift-0.1.0/src/noteshift/notion.py +78 -0
- noteshift-0.1.0/src/noteshift/types.py +40 -0
noteshift-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: noteshift
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Export Notion pages and databases to Obsidian-friendly Markdown.
|
|
5
|
+
Keywords: notion,obsidian,migration,markdown,export
|
|
6
|
+
Author: Fragment256
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Utilities
|
|
16
|
+
Requires-Dist: httpx>=0.28.1,<0.29
|
|
17
|
+
Requires-Dist: python-slugify>=8.0.4,<9
|
|
18
|
+
Requires-Dist: typer>=0.24.1,<1
|
|
19
|
+
Requires-Dist: mypy>=1.18.1,<2 ; extra == 'dev'
|
|
20
|
+
Requires-Dist: pytest>=9.0.2,<10 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest-cov>=6.0.0,<7 ; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest-vcr>=1.0.2,<2 ; extra == 'dev'
|
|
23
|
+
Requires-Dist: ruff>=0.15.4,<0.16 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=9.0.2,<10 ; extra == 'test'
|
|
25
|
+
Requires-Dist: pytest-cov>=6.0.0,<7 ; extra == 'test'
|
|
26
|
+
Requires-Dist: pytest-vcr>=1.0.2,<2 ; extra == 'test'
|
|
27
|
+
Requires-Python: >=3.11
|
|
28
|
+
Project-URL: Homepage, https://github.com/Fragment256/noteshift
|
|
29
|
+
Project-URL: Repository, https://github.com/Fragment256/noteshift
|
|
30
|
+
Project-URL: Issues, https://github.com/Fragment256/noteshift/issues
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Provides-Extra: test
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# NoteShift (`noteshift`)
|
|
36
|
+
|
|
37
|
+
**NoteShift** exports Notion content to Obsidian-friendly Markdown with predictable filenames, link rewriting, and checkpoint/resume support.
|
|
38
|
+
|
|
39
|
+
[](https://github.com/Fragment256/noteshift/actions/workflows/ci.yml)
|
|
40
|
+
[](https://pypi.org/project/noteshift/)
|
|
41
|
+
[](https://pypi.org/project/noteshift/)
|
|
42
|
+
[](https://codecov.io/gh/Fragment256/noteshift)
|
|
43
|
+
|
|
44
|
+
## Why it exists
|
|
45
|
+
|
|
46
|
+
Teams migrating from Notion to Obsidian consistently report four pains:
|
|
47
|
+
|
|
48
|
+
1. broken internal links after export
|
|
49
|
+
2. inconsistent filenames and folder layout
|
|
50
|
+
3. long exports failing midway without resume
|
|
51
|
+
4. low confidence in migration correctness
|
|
52
|
+
|
|
53
|
+
NoteShift is focused on solving those pains first.
|
|
54
|
+
|
|
55
|
+
## Current capabilities
|
|
56
|
+
|
|
57
|
+
- Export a Notion page tree to Markdown
|
|
58
|
+
- Export Notion data sources/databases through API layer
|
|
59
|
+
- Rewrite internal links for Obsidian compatibility
|
|
60
|
+
- Preserve and download attachments
|
|
61
|
+
- Resume interrupted runs via checkpoint file
|
|
62
|
+
- Emit migration report (`migration_report.json` + `.md`)
|
|
63
|
+
|
|
64
|
+
## Documentation
|
|
65
|
+
|
|
66
|
+
- Docs index: [`docs/`](docs/index.md)
|
|
67
|
+
- Start here: [Getting started](docs/getting-started.md)
|
|
68
|
+
- Library integration: [API contract](docs/api-contract.md)
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
### Install from PyPI
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
uv tool install noteshift
|
|
76
|
+
# or
|
|
77
|
+
pipx install noteshift
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Install from source (development)
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
uv tool install .
|
|
84
|
+
uv sync --extra dev --extra test
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Authentication
|
|
88
|
+
|
|
89
|
+
Set a Notion integration token in `NOTION_TOKEN`.
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
export NOTION_TOKEN="secret_xxx"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Basic usage
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
noteshift export \
|
|
99
|
+
--page-id "<notion-page-id>" \
|
|
100
|
+
--out ./export \
|
|
101
|
+
--max-depth 2 \
|
|
102
|
+
--overwrite
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Output
|
|
106
|
+
|
|
107
|
+
A successful run writes:
|
|
108
|
+
|
|
109
|
+
- Markdown files for exported pages
|
|
110
|
+
- downloaded assets in the export tree
|
|
111
|
+
- `.checkpoint.json` for resume
|
|
112
|
+
- `migration_report.json`
|
|
113
|
+
- `migration_report.md`
|
|
114
|
+
|
|
115
|
+
## Development
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
uv sync --extra dev --extra test
|
|
119
|
+
uv run ruff format .
|
|
120
|
+
uv run ruff check .
|
|
121
|
+
uv run mypy src
|
|
122
|
+
uv run pytest --cov=noteshift --cov-report=term
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Contract tests (`pytest-vcr`)
|
|
126
|
+
|
|
127
|
+
Contract tests are deterministic and replay HTTP traffic from sanitized cassettes:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
uv run pytest -m contract
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
To re-record cassettes intentionally, set a real token in your environment and run:
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
VCR_RECORD_MODE=once uv run pytest -m contract
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# NoteShift (`noteshift`)
|
|
2
|
+
|
|
3
|
+
**NoteShift** exports Notion content to Obsidian-friendly Markdown with predictable filenames, link rewriting, and checkpoint/resume support.
|
|
4
|
+
|
|
5
|
+
[](https://github.com/Fragment256/noteshift/actions/workflows/ci.yml)
|
|
6
|
+
[](https://pypi.org/project/noteshift/)
|
|
7
|
+
[](https://pypi.org/project/noteshift/)
|
|
8
|
+
[](https://codecov.io/gh/Fragment256/noteshift)
|
|
9
|
+
|
|
10
|
+
## Why it exists
|
|
11
|
+
|
|
12
|
+
Teams migrating from Notion to Obsidian consistently report four pains:
|
|
13
|
+
|
|
14
|
+
1. broken internal links after export
|
|
15
|
+
2. inconsistent filenames and folder layout
|
|
16
|
+
3. long exports failing midway without resume
|
|
17
|
+
4. low confidence in migration correctness
|
|
18
|
+
|
|
19
|
+
NoteShift is focused on solving those pains first.
|
|
20
|
+
|
|
21
|
+
## Current capabilities
|
|
22
|
+
|
|
23
|
+
- Export a Notion page tree to Markdown
|
|
24
|
+
- Export Notion data sources/databases through API layer
|
|
25
|
+
- Rewrite internal links for Obsidian compatibility
|
|
26
|
+
- Preserve and download attachments
|
|
27
|
+
- Resume interrupted runs via checkpoint file
|
|
28
|
+
- Emit migration report (`migration_report.json` + `.md`)
|
|
29
|
+
|
|
30
|
+
## Documentation
|
|
31
|
+
|
|
32
|
+
- Docs index: [`docs/`](docs/index.md)
|
|
33
|
+
- Start here: [Getting started](docs/getting-started.md)
|
|
34
|
+
- Library integration: [API contract](docs/api-contract.md)
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
### Install from PyPI
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
uv tool install noteshift
|
|
42
|
+
# or
|
|
43
|
+
pipx install noteshift
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Install from source (development)
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
uv tool install .
|
|
50
|
+
uv sync --extra dev --extra test
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Authentication
|
|
54
|
+
|
|
55
|
+
Set a Notion integration token in `NOTION_TOKEN`.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
export NOTION_TOKEN="secret_xxx"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Basic usage
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
noteshift export \
|
|
65
|
+
--page-id "<notion-page-id>" \
|
|
66
|
+
--out ./export \
|
|
67
|
+
--max-depth 2 \
|
|
68
|
+
--overwrite
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Output
|
|
72
|
+
|
|
73
|
+
A successful run writes:
|
|
74
|
+
|
|
75
|
+
- Markdown files for exported pages
|
|
76
|
+
- downloaded assets in the export tree
|
|
77
|
+
- `.checkpoint.json` for resume
|
|
78
|
+
- `migration_report.json`
|
|
79
|
+
- `migration_report.md`
|
|
80
|
+
|
|
81
|
+
## Development
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
uv sync --extra dev --extra test
|
|
85
|
+
uv run ruff format .
|
|
86
|
+
uv run ruff check .
|
|
87
|
+
uv run mypy src
|
|
88
|
+
uv run pytest --cov=noteshift --cov-report=term
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Contract tests (`pytest-vcr`)
|
|
92
|
+
|
|
93
|
+
Contract tests are deterministic and replay HTTP traffic from sanitized cassettes:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
uv run pytest -m contract
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
To re-record cassettes intentionally, set a real token in your environment and run:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
VCR_RECORD_MODE=once uv run pytest -m contract
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
MIT
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "noteshift"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Export Notion pages and databases to Obsidian-friendly Markdown."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Fragment256" }
|
|
10
|
+
]
|
|
11
|
+
keywords = ["notion", "obsidian", "migration", "markdown", "export"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 3 - Alpha",
|
|
14
|
+
"Intended Audience :: End Users/Desktop",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Topic :: Utilities",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"httpx>=0.28.1,<0.29",
|
|
24
|
+
"python-slugify>=8.0.4,<9",
|
|
25
|
+
"typer>=0.24.1,<1",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/Fragment256/noteshift"
|
|
30
|
+
Repository = "https://github.com/Fragment256/noteshift"
|
|
31
|
+
Issues = "https://github.com/Fragment256/noteshift/issues"
|
|
32
|
+
|
|
33
|
+
[project.scripts]
|
|
34
|
+
noteshift = "noteshift.cli:main"
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"mypy>=1.18.1,<2",
|
|
39
|
+
"pytest>=9.0.2,<10",
|
|
40
|
+
"pytest-cov>=6.0.0,<7",
|
|
41
|
+
"pytest-vcr>=1.0.2,<2",
|
|
42
|
+
"ruff>=0.15.4,<0.16",
|
|
43
|
+
]
|
|
44
|
+
test = [
|
|
45
|
+
"pytest>=9.0.2,<10",
|
|
46
|
+
"pytest-cov>=6.0.0,<7",
|
|
47
|
+
"pytest-vcr>=1.0.2,<2",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
[tool.ruff]
|
|
51
|
+
line-length = 88
|
|
52
|
+
target-version = "py312"
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint]
|
|
55
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
56
|
+
ignore = ["B008"]
|
|
57
|
+
|
|
58
|
+
[tool.pytest.ini_options]
|
|
59
|
+
pythonpath = ["src"]
|
|
60
|
+
addopts = "-q"
|
|
61
|
+
testpaths = ["tests"]
|
|
62
|
+
markers = [
|
|
63
|
+
"contract: deterministic API contract tests (vcr-backed)",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[tool.mypy]
|
|
67
|
+
python_version = "3.12"
|
|
68
|
+
warn_unused_configs = true
|
|
69
|
+
warn_redundant_casts = true
|
|
70
|
+
warn_unused_ignores = true
|
|
71
|
+
check_untyped_defs = true
|
|
72
|
+
no_implicit_optional = true
|
|
73
|
+
strict_equality = true
|
|
74
|
+
mypy_path = "src"
|
|
75
|
+
|
|
76
|
+
[build-system]
|
|
77
|
+
requires = ["uv_build>=0.10.4,<0.11.0"]
|
|
78
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"__version__",
|
|
8
|
+
"ExportPlan",
|
|
9
|
+
"ExportResult",
|
|
10
|
+
"NoteshiftConfig",
|
|
11
|
+
"PreflightReport",
|
|
12
|
+
"ProgressEvent",
|
|
13
|
+
"ProgressSink",
|
|
14
|
+
"preflight",
|
|
15
|
+
"run_export",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
__version__ = "0.1.0"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(name: str) -> Any:
|
|
22
|
+
if name in {"ExportPlan", "ExportResult", "NoteshiftConfig", "PreflightReport"}:
|
|
23
|
+
return getattr(import_module("noteshift.types"), name)
|
|
24
|
+
if name in {"ProgressEvent", "ProgressSink"}:
|
|
25
|
+
return getattr(import_module("noteshift.events"), name)
|
|
26
|
+
if name in {"preflight", "run_export"}:
|
|
27
|
+
return getattr(import_module("noteshift.api"), name)
|
|
28
|
+
raise AttributeError(f"module 'noteshift' has no attribute {name!r}")
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from noteshift.checkpoint import Checkpoint
|
|
9
|
+
from noteshift.db_export import export_child_database
|
|
10
|
+
from noteshift.events import ProgressEvent, ProgressSink
|
|
11
|
+
from noteshift.exporter import export_page_tree
|
|
12
|
+
from noteshift.notion import NotionClient
|
|
13
|
+
from noteshift.types import ExportPlan, ExportResult, NoteshiftConfig, PreflightReport
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _emit(progress: ProgressSink | None, event: ProgressEvent) -> None:
|
|
17
|
+
if progress is not None:
|
|
18
|
+
progress(event)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _write_migration_report(
|
|
22
|
+
out_dir: Path, checkpoint: Checkpoint
|
|
23
|
+
) -> tuple[Path, list[str]]:
|
|
24
|
+
report_errors: list[str] = []
|
|
25
|
+
report_data: dict[str, object] = {
|
|
26
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
|
27
|
+
"pages_exported_total": len(checkpoint.page_ids),
|
|
28
|
+
"databases_exported_total": len(checkpoint.database_ids),
|
|
29
|
+
"rows_exported_total": checkpoint.rows_exported,
|
|
30
|
+
"attachments_downloaded_total": checkpoint.attachments_downloaded,
|
|
31
|
+
"files_written_total": len(checkpoint.files_written),
|
|
32
|
+
"warnings": checkpoint.warnings,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
json_report_path = out_dir / "migration_report.json"
|
|
36
|
+
try:
|
|
37
|
+
with open(json_report_path, "w", encoding="utf-8") as f:
|
|
38
|
+
json.dump(report_data, f, indent=2)
|
|
39
|
+
|
|
40
|
+
md_report_path = out_dir / "migration_report.md"
|
|
41
|
+
md_lines = [
|
|
42
|
+
"# Migration Report",
|
|
43
|
+
"",
|
|
44
|
+
"## Summary",
|
|
45
|
+
"",
|
|
46
|
+
"| Metric | Value |",
|
|
47
|
+
"| :----- | :---- |",
|
|
48
|
+
f"| Timestamp | {report_data['timestamp']} |",
|
|
49
|
+
f"| Pages Exported | {report_data['pages_exported_total']} |",
|
|
50
|
+
f"| Databases Exported | {report_data['databases_exported_total']} |",
|
|
51
|
+
f"| Rows Exported | {report_data['rows_exported_total']} |",
|
|
52
|
+
(
|
|
53
|
+
"| Attachments Downloaded | "
|
|
54
|
+
f"{report_data['attachments_downloaded_total']} |"
|
|
55
|
+
),
|
|
56
|
+
f"| Files Written | {report_data['files_written_total']} |",
|
|
57
|
+
"",
|
|
58
|
+
"## Warnings",
|
|
59
|
+
"",
|
|
60
|
+
]
|
|
61
|
+
warnings = report_data["warnings"]
|
|
62
|
+
if isinstance(warnings, list) and warnings:
|
|
63
|
+
for warning in warnings:
|
|
64
|
+
md_lines.append(f"- {warning}")
|
|
65
|
+
else:
|
|
66
|
+
md_lines.append("No warnings.")
|
|
67
|
+
|
|
68
|
+
with open(md_report_path, "w", encoding="utf-8") as f:
|
|
69
|
+
f.write("\n".join(md_lines) + "\n")
|
|
70
|
+
except OSError as exc:
|
|
71
|
+
report_errors.append(f"Failed to write migration report files: {exc}")
|
|
72
|
+
|
|
73
|
+
return json_report_path, report_errors
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _database_title(schema: dict) -> str:
|
|
77
|
+
title_obj = schema.get("title")
|
|
78
|
+
if isinstance(title_obj, list) and title_obj:
|
|
79
|
+
first = title_obj[0]
|
|
80
|
+
if isinstance(first, dict) and first.get("plain_text"):
|
|
81
|
+
return str(first["plain_text"])
|
|
82
|
+
return "Database"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def preflight(plan: ExportPlan, config: NoteshiftConfig) -> PreflightReport:
|
|
86
|
+
errors: list[str] = []
|
|
87
|
+
warnings: list[str] = []
|
|
88
|
+
|
|
89
|
+
token = config.notion_token or os.getenv("NOTION_TOKEN")
|
|
90
|
+
if not token:
|
|
91
|
+
errors.append(
|
|
92
|
+
"Missing Notion token. Provide config.notion_token or set NOTION_TOKEN."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if not plan.page_ids and not plan.database_ids:
|
|
96
|
+
errors.append(
|
|
97
|
+
"Export plan is empty. Provide at least one page_id or database_id."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if config.max_depth < 0:
|
|
101
|
+
errors.append("max_depth must be >= 0.")
|
|
102
|
+
|
|
103
|
+
out_dir = config.out_dir.resolve()
|
|
104
|
+
if out_dir.exists():
|
|
105
|
+
if not out_dir.is_dir():
|
|
106
|
+
errors.append(
|
|
107
|
+
f"Output path {out_dir} exists and is not a directory. "
|
|
108
|
+
"Choose a directory path for out_dir."
|
|
109
|
+
)
|
|
110
|
+
elif any(out_dir.iterdir()) and not config.overwrite:
|
|
111
|
+
errors.append(
|
|
112
|
+
f"Output dir {out_dir} is not empty. "
|
|
113
|
+
"Use overwrite=True or choose a new out_dir."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return PreflightReport(ok=not errors, errors=errors, warnings=warnings)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def run_export(
|
|
120
|
+
plan: ExportPlan,
|
|
121
|
+
config: NoteshiftConfig,
|
|
122
|
+
progress: ProgressSink | None = None,
|
|
123
|
+
) -> ExportResult:
|
|
124
|
+
report = preflight(plan, config)
|
|
125
|
+
if not report.ok:
|
|
126
|
+
raise ValueError("; ".join(report.errors))
|
|
127
|
+
|
|
128
|
+
token = config.notion_token or os.getenv("NOTION_TOKEN")
|
|
129
|
+
if token is None:
|
|
130
|
+
raise ValueError("Missing Notion token.")
|
|
131
|
+
|
|
132
|
+
out_dir = config.out_dir.resolve()
|
|
133
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
|
|
135
|
+
checkpoint_path = out_dir / ".checkpoint.json"
|
|
136
|
+
checkpoint = Checkpoint.load(checkpoint_path)
|
|
137
|
+
if config.force:
|
|
138
|
+
checkpoint = Checkpoint()
|
|
139
|
+
|
|
140
|
+
all_errors: list[str] = []
|
|
141
|
+
|
|
142
|
+
_emit(progress, ProgressEvent(type="phase", message="starting_export"))
|
|
143
|
+
|
|
144
|
+
for page_id in plan.page_ids:
|
|
145
|
+
_emit(progress, ProgressEvent(type="item_start", id=page_id, title="page"))
|
|
146
|
+
try:
|
|
147
|
+
export_page_tree(
|
|
148
|
+
token=token,
|
|
149
|
+
root_page_id=page_id,
|
|
150
|
+
out_dir=out_dir,
|
|
151
|
+
checkpoint=checkpoint,
|
|
152
|
+
force=config.force,
|
|
153
|
+
max_depth=config.max_depth,
|
|
154
|
+
)
|
|
155
|
+
_emit(progress, ProgressEvent(type="item_done", id=page_id, title="page"))
|
|
156
|
+
except Exception as exc: # noqa: BLE001
|
|
157
|
+
msg = f"Failed to export page {page_id}: {exc}"
|
|
158
|
+
all_errors.append(msg)
|
|
159
|
+
_emit(
|
|
160
|
+
progress,
|
|
161
|
+
ProgressEvent(type="error", id=page_id, title="page", message=msg),
|
|
162
|
+
)
|
|
163
|
+
if config.fail_fast:
|
|
164
|
+
raise RuntimeError(msg) from exc
|
|
165
|
+
|
|
166
|
+
if plan.database_ids:
|
|
167
|
+
client = NotionClient(token)
|
|
168
|
+
for database_id in plan.database_ids:
|
|
169
|
+
_emit(
|
|
170
|
+
progress,
|
|
171
|
+
ProgressEvent(type="item_start", id=database_id, title="database"),
|
|
172
|
+
)
|
|
173
|
+
try:
|
|
174
|
+
schema = client.get_data_source(database_id)
|
|
175
|
+
title = _database_title(schema)
|
|
176
|
+
db_result = export_child_database(
|
|
177
|
+
client=client,
|
|
178
|
+
data_source_id=database_id,
|
|
179
|
+
title=title,
|
|
180
|
+
out_dir=out_dir,
|
|
181
|
+
)
|
|
182
|
+
checkpoint.add_database(database_id)
|
|
183
|
+
checkpoint.add_rows(db_result.rows_exported)
|
|
184
|
+
for warning in db_result.warnings:
|
|
185
|
+
checkpoint.add_warning(warning)
|
|
186
|
+
_emit(
|
|
187
|
+
progress,
|
|
188
|
+
ProgressEvent(
|
|
189
|
+
type="warning",
|
|
190
|
+
id=database_id,
|
|
191
|
+
title="database",
|
|
192
|
+
message=warning,
|
|
193
|
+
),
|
|
194
|
+
)
|
|
195
|
+
_emit(
|
|
196
|
+
progress,
|
|
197
|
+
ProgressEvent(type="item_done", id=database_id, title="database"),
|
|
198
|
+
)
|
|
199
|
+
except Exception as exc: # noqa: BLE001
|
|
200
|
+
msg = f"Failed to export database {database_id}: {exc}"
|
|
201
|
+
all_errors.append(msg)
|
|
202
|
+
_emit(
|
|
203
|
+
progress,
|
|
204
|
+
ProgressEvent(
|
|
205
|
+
type="error", id=database_id, title="database", message=msg
|
|
206
|
+
),
|
|
207
|
+
)
|
|
208
|
+
if config.fail_fast:
|
|
209
|
+
raise RuntimeError(msg) from exc
|
|
210
|
+
|
|
211
|
+
checkpoint.save(checkpoint_path)
|
|
212
|
+
_emit(progress, ProgressEvent(type="checkpoint", message=str(checkpoint_path)))
|
|
213
|
+
|
|
214
|
+
report_path, report_errors = _write_migration_report(out_dir, checkpoint)
|
|
215
|
+
|
|
216
|
+
summary = ExportResult(
|
|
217
|
+
out_dir=out_dir,
|
|
218
|
+
report_path=report_path,
|
|
219
|
+
checkpoint_path=checkpoint_path,
|
|
220
|
+
pages_exported=len(checkpoint.page_ids),
|
|
221
|
+
databases_exported=len(checkpoint.database_ids),
|
|
222
|
+
rows_exported=checkpoint.rows_exported,
|
|
223
|
+
attachments_downloaded=checkpoint.attachments_downloaded,
|
|
224
|
+
warnings=list(checkpoint.warnings),
|
|
225
|
+
errors=all_errors + report_errors,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
_emit(
|
|
229
|
+
progress,
|
|
230
|
+
ProgressEvent(
|
|
231
|
+
type="summary",
|
|
232
|
+
message=(
|
|
233
|
+
f"pages={summary.pages_exported}, "
|
|
234
|
+
f"databases={summary.databases_exported}, "
|
|
235
|
+
f"rows={summary.rows_exported}, "
|
|
236
|
+
f"attachments={summary.attachments_downloaded}"
|
|
237
|
+
),
|
|
238
|
+
),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return summary
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import UTC
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Checkpoint:
|
|
12
|
+
"""Tracks export progress for resumable exports."""
|
|
13
|
+
|
|
14
|
+
version: int = 1
|
|
15
|
+
timestamp: str = ""
|
|
16
|
+
# IDs of exported items
|
|
17
|
+
page_ids: set[str] = field(default_factory=set)
|
|
18
|
+
database_ids: set[str] = field(default_factory=set)
|
|
19
|
+
# Files written (relative paths from out_dir)
|
|
20
|
+
files_written: list[str] = field(default_factory=list)
|
|
21
|
+
# Statistics
|
|
22
|
+
attachments_downloaded: int = 0
|
|
23
|
+
rows_exported: int = 0
|
|
24
|
+
# Warnings accumulated
|
|
25
|
+
warnings: list[str] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def load(cls, path: Path) -> Checkpoint:
|
|
29
|
+
"""Load checkpoint from file, or return empty if not exists."""
|
|
30
|
+
if not path.exists():
|
|
31
|
+
return cls()
|
|
32
|
+
try:
|
|
33
|
+
with open(path, encoding="utf-8") as f:
|
|
34
|
+
data = json.load(f)
|
|
35
|
+
return cls(
|
|
36
|
+
version=data.get("version", 1),
|
|
37
|
+
timestamp=data.get("timestamp", ""),
|
|
38
|
+
page_ids=set(data.get("page_ids", [])),
|
|
39
|
+
database_ids=set(data.get("database_ids", [])),
|
|
40
|
+
files_written=data.get("files_written", []),
|
|
41
|
+
attachments_downloaded=data.get("attachments_downloaded", 0),
|
|
42
|
+
rows_exported=data.get("rows_exported", 0),
|
|
43
|
+
warnings=data.get("warnings", []),
|
|
44
|
+
)
|
|
45
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
46
|
+
return cls()
|
|
47
|
+
|
|
48
|
+
def save(self, path: Path) -> None:
|
|
49
|
+
"""Save checkpoint to file."""
|
|
50
|
+
from datetime import datetime
|
|
51
|
+
|
|
52
|
+
self.timestamp = datetime.now(UTC).isoformat()
|
|
53
|
+
data = {
|
|
54
|
+
"version": self.version,
|
|
55
|
+
"timestamp": self.timestamp,
|
|
56
|
+
"page_ids": sorted(self.page_ids),
|
|
57
|
+
"database_ids": sorted(self.database_ids),
|
|
58
|
+
"files_written": self.files_written,
|
|
59
|
+
"attachments_downloaded": self.attachments_downloaded,
|
|
60
|
+
"rows_exported": self.rows_exported,
|
|
61
|
+
"warnings": self.warnings,
|
|
62
|
+
}
|
|
63
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
65
|
+
json.dump(data, f, indent=2)
|
|
66
|
+
|
|
67
|
+
def is_page_exported(self, page_id: str) -> bool:
|
|
68
|
+
return page_id in self.page_ids
|
|
69
|
+
|
|
70
|
+
def is_database_exported(self, db_id: str) -> bool:
|
|
71
|
+
return db_id in self.database_ids
|
|
72
|
+
|
|
73
|
+
def add_page(self, page_id: str) -> None:
|
|
74
|
+
self.page_ids.add(page_id)
|
|
75
|
+
|
|
76
|
+
def add_database(self, db_id: str) -> None:
|
|
77
|
+
self.database_ids.add(db_id)
|
|
78
|
+
|
|
79
|
+
def add_file(self, rel_path: str) -> None:
|
|
80
|
+
if rel_path not in self.files_written:
|
|
81
|
+
self.files_written.append(rel_path)
|
|
82
|
+
|
|
83
|
+
def add_attachment(self) -> None:
|
|
84
|
+
self.attachments_downloaded += 1
|
|
85
|
+
|
|
86
|
+
def add_rows(self, count: int) -> None:
|
|
87
|
+
self.rows_exported += count
|
|
88
|
+
|
|
89
|
+
def add_warning(self, warning: str) -> None:
|
|
90
|
+
self.warnings.append(warning)
|
|
91
|
+
|
|
92
|
+
def get_stats(self) -> dict[str, Any]:
|
|
93
|
+
return {
|
|
94
|
+
"pages_exported": len(self.page_ids),
|
|
95
|
+
"databases_exported": len(self.database_ids),
|
|
96
|
+
"files_written": len(self.files_written),
|
|
97
|
+
"attachments_downloaded": self.attachments_downloaded,
|
|
98
|
+
"rows_exported": self.rows_exported,
|
|
99
|
+
"warnings_count": len(self.warnings),
|
|
100
|
+
}
|