nao-core 0.0.38__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. nao_core/__init__.py +2 -0
  2. nao_core/__init__.py.bak +2 -0
  3. nao_core/bin/build-info.json +5 -0
  4. nao_core/bin/fastapi/main.py +268 -0
  5. nao_core/bin/fastapi/test_main.py +156 -0
  6. nao_core/bin/migrations-postgres/0000_user_auth_and_chat_tables.sql +98 -0
  7. nao_core/bin/migrations-postgres/0001_message_feedback.sql +9 -0
  8. nao_core/bin/migrations-postgres/0002_chat_message_stop_reason_and_error_message.sql +2 -0
  9. nao_core/bin/migrations-postgres/0003_handle_slack_with_thread.sql +2 -0
  10. nao_core/bin/migrations-postgres/0004_input_and_output_tokens.sql +8 -0
  11. nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
  12. nao_core/bin/migrations-postgres/0006_llm_model_ids.sql +4 -0
  13. nao_core/bin/migrations-postgres/0007_chat_message_llm_info.sql +2 -0
  14. nao_core/bin/migrations-postgres/meta/0000_snapshot.json +707 -0
  15. nao_core/bin/migrations-postgres/meta/0001_snapshot.json +766 -0
  16. nao_core/bin/migrations-postgres/meta/0002_snapshot.json +778 -0
  17. nao_core/bin/migrations-postgres/meta/0003_snapshot.json +799 -0
  18. nao_core/bin/migrations-postgres/meta/0004_snapshot.json +847 -0
  19. nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
  20. nao_core/bin/migrations-postgres/meta/0006_snapshot.json +1141 -0
  21. nao_core/bin/migrations-postgres/meta/_journal.json +62 -0
  22. nao_core/bin/migrations-sqlite/0000_user_auth_and_chat_tables.sql +98 -0
  23. nao_core/bin/migrations-sqlite/0001_message_feedback.sql +8 -0
  24. nao_core/bin/migrations-sqlite/0002_chat_message_stop_reason_and_error_message.sql +2 -0
  25. nao_core/bin/migrations-sqlite/0003_handle_slack_with_thread.sql +2 -0
  26. nao_core/bin/migrations-sqlite/0004_input_and_output_tokens.sql +8 -0
  27. nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
  28. nao_core/bin/migrations-sqlite/0006_llm_model_ids.sql +4 -0
  29. nao_core/bin/migrations-sqlite/0007_chat_message_llm_info.sql +2 -0
  30. nao_core/bin/migrations-sqlite/meta/0000_snapshot.json +674 -0
  31. nao_core/bin/migrations-sqlite/meta/0001_snapshot.json +735 -0
  32. nao_core/bin/migrations-sqlite/meta/0002_snapshot.json +749 -0
  33. nao_core/bin/migrations-sqlite/meta/0003_snapshot.json +763 -0
  34. nao_core/bin/migrations-sqlite/meta/0004_snapshot.json +819 -0
  35. nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
  36. nao_core/bin/migrations-sqlite/meta/0006_snapshot.json +1100 -0
  37. nao_core/bin/migrations-sqlite/meta/_journal.json +62 -0
  38. nao_core/bin/nao-chat-server +0 -0
  39. nao_core/bin/public/assets/code-block-F6WJLWQG-CV0uOmNJ.js +153 -0
  40. nao_core/bin/public/assets/index-DcbndLHo.css +1 -0
  41. nao_core/bin/public/assets/index-t1hZI3nl.js +560 -0
  42. nao_core/bin/public/favicon.ico +0 -0
  43. nao_core/bin/public/index.html +18 -0
  44. nao_core/bin/rg +0 -0
  45. nao_core/commands/__init__.py +6 -0
  46. nao_core/commands/chat.py +225 -0
  47. nao_core/commands/debug.py +158 -0
  48. nao_core/commands/init.py +358 -0
  49. nao_core/commands/sync/__init__.py +124 -0
  50. nao_core/commands/sync/accessors.py +290 -0
  51. nao_core/commands/sync/cleanup.py +156 -0
  52. nao_core/commands/sync/providers/__init__.py +32 -0
  53. nao_core/commands/sync/providers/base.py +113 -0
  54. nao_core/commands/sync/providers/databases/__init__.py +17 -0
  55. nao_core/commands/sync/providers/databases/bigquery.py +79 -0
  56. nao_core/commands/sync/providers/databases/databricks.py +79 -0
  57. nao_core/commands/sync/providers/databases/duckdb.py +78 -0
  58. nao_core/commands/sync/providers/databases/postgres.py +79 -0
  59. nao_core/commands/sync/providers/databases/provider.py +129 -0
  60. nao_core/commands/sync/providers/databases/snowflake.py +79 -0
  61. nao_core/commands/sync/providers/notion/__init__.py +5 -0
  62. nao_core/commands/sync/providers/notion/provider.py +205 -0
  63. nao_core/commands/sync/providers/repositories/__init__.py +5 -0
  64. nao_core/commands/sync/providers/repositories/provider.py +134 -0
  65. nao_core/commands/sync/registry.py +23 -0
  66. nao_core/config/__init__.py +30 -0
  67. nao_core/config/base.py +100 -0
  68. nao_core/config/databases/__init__.py +55 -0
  69. nao_core/config/databases/base.py +85 -0
  70. nao_core/config/databases/bigquery.py +99 -0
  71. nao_core/config/databases/databricks.py +79 -0
  72. nao_core/config/databases/duckdb.py +41 -0
  73. nao_core/config/databases/postgres.py +83 -0
  74. nao_core/config/databases/snowflake.py +125 -0
  75. nao_core/config/exceptions.py +7 -0
  76. nao_core/config/llm/__init__.py +19 -0
  77. nao_core/config/notion/__init__.py +8 -0
  78. nao_core/config/repos/__init__.py +3 -0
  79. nao_core/config/repos/base.py +11 -0
  80. nao_core/config/slack/__init__.py +12 -0
  81. nao_core/context/__init__.py +54 -0
  82. nao_core/context/base.py +57 -0
  83. nao_core/context/git.py +177 -0
  84. nao_core/context/local.py +59 -0
  85. nao_core/main.py +13 -0
  86. nao_core/templates/__init__.py +41 -0
  87. nao_core/templates/context.py +193 -0
  88. nao_core/templates/defaults/databases/columns.md.j2 +23 -0
  89. nao_core/templates/defaults/databases/description.md.j2 +32 -0
  90. nao_core/templates/defaults/databases/preview.md.j2 +22 -0
  91. nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
  92. nao_core/templates/engine.py +133 -0
  93. nao_core/templates/render.py +196 -0
  94. nao_core-0.0.38.dist-info/METADATA +150 -0
  95. nao_core-0.0.38.dist-info/RECORD +98 -0
  96. nao_core-0.0.38.dist-info/WHEEL +4 -0
  97. nao_core-0.0.38.dist-info/entry_points.txt +2 -0
  98. nao_core-0.0.38.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,79 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_databricks(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync Databricks database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+ db_name = db_config.get_database_name()
28
+ db_path = base_path / "type=databricks" / f"database={db_name}"
29
+ state = DatabaseSyncState(db_path=db_path)
30
+
31
+ if db_config.schema:
32
+ schemas = [db_config.schema]
33
+ else:
34
+ schemas = conn.list_databases()
35
+
36
+ schema_task = progress.add_task(
37
+ f"[dim]{db_config.name}[/dim]",
38
+ total=len(schemas),
39
+ )
40
+
41
+ for schema in schemas:
42
+ try:
43
+ all_tables = conn.list_tables(database=schema)
44
+ except Exception:
45
+ progress.update(schema_task, advance=1)
46
+ continue
47
+
48
+ # Filter tables based on include/exclude patterns
49
+ tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
50
+
51
+ # Skip schema if no tables match
52
+ if not tables:
53
+ progress.update(schema_task, advance=1)
54
+ continue
55
+
56
+ schema_path = db_path / f"schema={schema}"
57
+ schema_path.mkdir(parents=True, exist_ok=True)
58
+ state.add_schema(schema)
59
+
60
+ table_task = progress.add_task(
61
+ f" [cyan]{schema}[/cyan]",
62
+ total=len(tables),
63
+ )
64
+
65
+ for table in tables:
66
+ table_path = schema_path / f"table={table}"
67
+ table_path.mkdir(parents=True, exist_ok=True)
68
+
69
+ for accessor in accessors:
70
+ content = accessor.generate(conn, schema, table)
71
+ output_file = table_path / accessor.filename
72
+ output_file.write_text(content)
73
+
74
+ state.add_table(schema, table)
75
+ progress.update(table_task, advance=1)
76
+
77
+ progress.update(schema_task, advance=1)
78
+
79
+ return state
@@ -0,0 +1,78 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_duckdb(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync DuckDB database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+
28
+ db_name = db_config.get_database_name()
29
+ db_path = base_path / "type=duckdb" / f"database={db_name}"
30
+ state = DatabaseSyncState(db_path=db_path)
31
+
32
+ # List all schemas in DuckDB
33
+ schemas = conn.list_databases()
34
+
35
+ schema_task = progress.add_task(
36
+ f"[dim]{db_config.name}[/dim]",
37
+ total=len(schemas),
38
+ )
39
+
40
+ for schema in schemas:
41
+ try:
42
+ all_tables = conn.list_tables(database=schema)
43
+ except Exception:
44
+ progress.update(schema_task, advance=1)
45
+ continue
46
+
47
+ # Filter tables based on include/exclude patterns
48
+ tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
49
+
50
+ # Skip schema if no tables match
51
+ if not tables:
52
+ progress.update(schema_task, advance=1)
53
+ continue
54
+
55
+ schema_path = db_path / f"schema={schema}"
56
+ schema_path.mkdir(parents=True, exist_ok=True)
57
+ state.add_schema(schema)
58
+
59
+ table_task = progress.add_task(
60
+ f" [cyan]{schema}[/cyan]",
61
+ total=len(tables),
62
+ )
63
+
64
+ for table in tables:
65
+ table_path = schema_path / f"table={table}"
66
+ table_path.mkdir(parents=True, exist_ok=True)
67
+
68
+ for accessor in accessors:
69
+ content = accessor.generate(conn, schema, table)
70
+ output_file = table_path / accessor.filename
71
+ output_file.write_text(content)
72
+
73
+ state.add_table(schema, table)
74
+ progress.update(table_task, advance=1)
75
+
76
+ progress.update(schema_task, advance=1)
77
+
78
+ return state
@@ -0,0 +1,79 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_postgres(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync PostgreSQL database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+ db_name = db_config.get_database_name()
28
+ db_path = base_path / "type=postgres" / f"database={db_name}"
29
+ state = DatabaseSyncState(db_path=db_path)
30
+
31
+ if db_config.schema_name:
32
+ schemas = [db_config.schema_name]
33
+ else:
34
+ schemas = conn.list_databases()
35
+
36
+ schema_task = progress.add_task(
37
+ f"[dim]{db_config.name}[/dim]",
38
+ total=len(schemas),
39
+ )
40
+
41
+ for schema in schemas:
42
+ try:
43
+ all_tables = conn.list_tables(database=schema)
44
+ except Exception:
45
+ progress.update(schema_task, advance=1)
46
+ continue
47
+
48
+ # Filter tables based on include/exclude patterns
49
+ tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
50
+
51
+ # Skip schema if no tables match
52
+ if not tables:
53
+ progress.update(schema_task, advance=1)
54
+ continue
55
+
56
+ schema_path = db_path / f"schema={schema}"
57
+ schema_path.mkdir(parents=True, exist_ok=True)
58
+ state.add_schema(schema)
59
+
60
+ table_task = progress.add_task(
61
+ f" [cyan]{schema}[/cyan]",
62
+ total=len(tables),
63
+ )
64
+
65
+ for table in tables:
66
+ table_path = schema_path / f"table={table}"
67
+ table_path.mkdir(parents=True, exist_ok=True)
68
+
69
+ for accessor in accessors:
70
+ content = accessor.generate(conn, schema, table)
71
+ output_file = table_path / accessor.filename
72
+ output_file.write_text(content)
73
+
74
+ state.add_table(schema, table)
75
+ progress.update(table_task, advance=1)
76
+
77
+ progress.update(schema_task, advance=1)
78
+
79
+ return state
@@ -0,0 +1,129 @@
1
+ """Database sync provider implementation."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from rich.console import Console
7
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
8
+
9
+ from nao_core.commands.sync.accessors import DataAccessor
10
+ from nao_core.commands.sync.cleanup import DatabaseSyncState, cleanup_stale_databases, cleanup_stale_paths
11
+ from nao_core.commands.sync.registry import get_accessors
12
+ from nao_core.config import AnyDatabaseConfig, NaoConfig
13
+
14
+ from ..base import SyncProvider, SyncResult
15
+ from .bigquery import sync_bigquery
16
+ from .databricks import sync_databricks
17
+ from .duckdb import sync_duckdb
18
+ from .postgres import sync_postgres
19
+ from .snowflake import sync_snowflake
20
+
21
+ console = Console()
22
+
23
+ # Registry mapping database types to their sync functions
24
+ DATABASE_SYNC_FUNCTIONS = {
25
+ "bigquery": sync_bigquery,
26
+ "duckdb": sync_duckdb,
27
+ "databricks": sync_databricks,
28
+ "snowflake": sync_snowflake,
29
+ "postgres": sync_postgres,
30
+ }
31
+
32
+
33
+ class DatabaseSyncProvider(SyncProvider):
34
+ """Provider for syncing database schemas to markdown documentation."""
35
+
36
+ @property
37
+ def name(self) -> str:
38
+ return "Databases"
39
+
40
+ @property
41
+ def emoji(self) -> str:
42
+ return "🗄️"
43
+
44
+ @property
45
+ def default_output_dir(self) -> str:
46
+ return "databases"
47
+
48
+ def pre_sync(self, config: NaoConfig, output_path: Path) -> None:
49
+ """
50
+ Always run before syncing.
51
+ """
52
+ cleanup_stale_databases(config.databases, output_path, verbose=True)
53
+
54
+ def get_items(self, config: NaoConfig) -> list[AnyDatabaseConfig]:
55
+ return config.databases
56
+
57
+ def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
58
+ """Sync all configured databases.
59
+
60
+ Args:
61
+ items: List of database configurations
62
+ output_path: Base path where database schemas are stored
63
+ project_path: Path to the nao project root (for template resolution)
64
+
65
+ Returns:
66
+ SyncResult with datasets and tables synced
67
+ """
68
+ if not items:
69
+ console.print("\n[dim]No databases configured[/dim]")
70
+ return SyncResult(provider_name=self.name, items_synced=0)
71
+
72
+ # Set project path for template resolution
73
+ DataAccessor.set_project_path(project_path)
74
+
75
+ total_datasets = 0
76
+ total_tables = 0
77
+ total_removed = 0
78
+ sync_states: list[DatabaseSyncState] = []
79
+
80
+ console.print(f"\n[bold cyan]{self.emoji} Syncing {self.name}[/bold cyan]")
81
+ console.print(f"[dim]Location:[/dim] {output_path.absolute()}\n")
82
+
83
+ with Progress(
84
+ SpinnerColumn(style="dim"),
85
+ TextColumn("[progress.description]{task.description}"),
86
+ BarColumn(bar_width=30, style="dim", complete_style="cyan", finished_style="green"),
87
+ TaskProgressColumn(),
88
+ console=console,
89
+ transient=False,
90
+ ) as progress:
91
+ for db in items:
92
+ # Get accessors from database config
93
+ db_accessors = get_accessors(db.accessors)
94
+ accessor_names = [a.filename.replace(".md", "") for a in db_accessors]
95
+
96
+ try:
97
+ console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
98
+
99
+ sync_fn = DATABASE_SYNC_FUNCTIONS.get(db.type)
100
+ if sync_fn:
101
+ state = sync_fn(db, output_path, progress, db_accessors)
102
+ sync_states.append(state)
103
+ total_datasets += state.schemas_synced
104
+ total_tables += state.tables_synced
105
+ else:
106
+ console.print(f"[yellow]⚠ Unsupported database type: {db.type}[/yellow]")
107
+ except Exception as e:
108
+ console.print(f"[bold red]✗[/bold red] Failed to sync {db.name}: {e}")
109
+
110
+ # Clean up stale files after all syncs complete
111
+ for state in sync_states:
112
+ removed = cleanup_stale_paths(state, verbose=True)
113
+ total_removed += removed
114
+
115
+ # Build summary
116
+ summary = f"{total_tables} tables across {total_datasets} datasets"
117
+ if total_removed > 0:
118
+ summary += f", {total_removed} stale removed"
119
+
120
+ return SyncResult(
121
+ provider_name=self.name,
122
+ items_synced=total_tables,
123
+ details={
124
+ "datasets": total_datasets,
125
+ "tables": total_tables,
126
+ "removed": total_removed,
127
+ },
128
+ summary=summary,
129
+ )
@@ -0,0 +1,79 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_snowflake(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync Snowflake database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+ db_name = db_config.get_database_name()
28
+ db_path = base_path / "type=snowflake" / f"database={db_name}"
29
+ state = DatabaseSyncState(db_path=db_path)
30
+
31
+ if db_config.schema:
32
+ schemas = [db_config.schema]
33
+ else:
34
+ schemas = conn.list_databases()
35
+
36
+ schema_task = progress.add_task(
37
+ f"[dim]{db_config.name}[/dim]",
38
+ total=len(schemas),
39
+ )
40
+
41
+ for schema in schemas:
42
+ try:
43
+ all_tables = conn.list_tables(database=schema)
44
+ except Exception:
45
+ progress.update(schema_task, advance=1)
46
+ continue
47
+
48
+ # Filter tables based on include/exclude patterns
49
+ tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
50
+
51
+ # Skip schema if no tables match
52
+ if not tables:
53
+ progress.update(schema_task, advance=1)
54
+ continue
55
+
56
+ schema_path = db_path / f"schema={schema}"
57
+ schema_path.mkdir(parents=True, exist_ok=True)
58
+ state.add_schema(schema)
59
+
60
+ table_task = progress.add_task(
61
+ f" [cyan]{schema}[/cyan]",
62
+ total=len(tables),
63
+ )
64
+
65
+ for table in tables:
66
+ table_path = schema_path / f"table={table}"
67
+ table_path.mkdir(parents=True, exist_ok=True)
68
+
69
+ for accessor in accessors:
70
+ content = accessor.generate(conn, schema, table)
71
+ output_file = table_path / accessor.filename
72
+ output_file.write_text(content)
73
+
74
+ state.add_table(schema, table)
75
+ progress.update(table_task, advance=1)
76
+
77
+ progress.update(schema_task, advance=1)
78
+
79
+ return state
@@ -0,0 +1,5 @@
1
+ """Notion syncing functionality for syncing Notion pages and databases."""
2
+
3
+ from .provider import NotionSyncProvider
4
+
5
+ __all__ = ["NotionSyncProvider"]
@@ -0,0 +1,205 @@
1
+ import re
2
+ from pathlib import Path
3
+ from typing import Any, cast
4
+
5
+ from notion2md.exporter.block import StringExporter
6
+ from notion_client import Client
7
+ from rich.console import Console
8
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
9
+
10
+ from nao_core.config.base import NaoConfig
11
+ from nao_core.config.notion import NotionConfig
12
+
13
+ from ..base import SyncProvider, SyncResult
14
+
15
+ console = Console()
16
+
17
+ # Notion page IDs are 32-character hex strings (UUID without dashes)
18
+ NOTION_PAGE_ID_PATTERN = re.compile(r"[a-f0-9]{32}")
19
+
20
+
21
+ def cleanup_stale_pages(synced_files: set[str], output_path: Path, verbose: bool = False) -> int:
22
+ """Remove markdown files that were not synced.
23
+
24
+ Args:
25
+ synced_files: Set of filenames that were synced in this run.
26
+ output_path: Path where synced markdown files are stored.
27
+ verbose: Whether to print cleanup messages.
28
+
29
+ Returns:
30
+ Number of stale files removed.
31
+ """
32
+ if not output_path.exists():
33
+ return 0
34
+
35
+ removed_count = 0
36
+ for file_path in output_path.iterdir():
37
+ if file_path.is_file() and file_path.suffix == ".md":
38
+ if file_path.name not in synced_files:
39
+ file_path.unlink()
40
+ removed_count += 1
41
+ if verbose:
42
+ console.print(f" [dim red]removing stale page:[/dim red] {file_path.name}")
43
+
44
+ return removed_count
45
+
46
+
47
+ # Pattern to match markdown images: ![alt](url)
48
+ IMAGE_PATTERN = re.compile(r"!\[[^\]]*\]\([^)]+\)\n?")
49
+
50
+
51
+ def strip_images(markdown: str) -> str:
52
+ """Replace markdown image references with a placeholder."""
53
+ return IMAGE_PATTERN.sub("[image]\n", markdown)
54
+
55
+
56
+ def extract_page_id(page_url: str) -> str:
57
+ """Extract Notion page ID from a URL.
58
+
59
+ Handles URLs like:
60
+ - https://www.notion.so/naolabs/Conversational-analytics-2bfc7a70bc0680978900d1e85ece83a0
61
+ - https://www.notion.so/2bfc7a70bc0680978900d1e85ece83a0
62
+ - 2bfc7a70bc0680978900d1e85ece83a0 (raw ID)
63
+ """
64
+ match = NOTION_PAGE_ID_PATTERN.search(page_url)
65
+ if match:
66
+ return match.group(0)
67
+ raise ValueError(f"Could not extract Notion page ID from: {page_url}")
68
+
69
+
70
+ def get_page_title(client: Client, page_id: str) -> str:
71
+ """Get the title of a Notion page."""
72
+ page = cast(dict[str, Any], client.pages.retrieve(page_id=page_id))
73
+ properties = page.get("properties", {})
74
+
75
+ # Try common title property names
76
+ for prop_name in ["title", "Title", "Name", "name", "Page"]:
77
+ if prop_name in properties:
78
+ title_prop = properties[prop_name]
79
+ if title_prop.get("type") == "title":
80
+ title_array = title_prop.get("title", [])
81
+ if title_array:
82
+ return "".join(t.get("plain_text", "") for t in title_array)
83
+
84
+ # Fallback to page ID if no title found
85
+ return page_id
86
+
87
+
88
+ def get_page_as_markdown(page_url: str, api_key: str) -> tuple[str, str]:
89
+ """Fetch a Notion page and convert it to markdown.
90
+
91
+ Returns:
92
+ Tuple of (title, markdown_content)
93
+ """
94
+ page_id = extract_page_id(page_url)
95
+
96
+ # Get page title for the filename
97
+ client = Client(auth=api_key)
98
+ title = get_page_title(client, page_id)
99
+
100
+ # Export to markdown string using notion2md
101
+ md_exporter = StringExporter(block_id=page_id, token=api_key)
102
+ markdown = md_exporter.export()
103
+
104
+ # Strip images since we can't read them
105
+ markdown = strip_images(markdown)
106
+
107
+ content = f"""---
108
+ title: {title}
109
+ id: {page_id}
110
+ ---
111
+
112
+ {markdown}
113
+ """
114
+
115
+ return title, content
116
+
117
+
118
+ class NotionSyncProvider(SyncProvider):
119
+ """Provider for syncing Notion pages and databases."""
120
+
121
+ @property
122
+ def name(self) -> str:
123
+ return "Notion"
124
+
125
+ @property
126
+ def emoji(self) -> str:
127
+ return "📝"
128
+
129
+ @property
130
+ def default_output_dir(self) -> str:
131
+ return "docs/notion"
132
+
133
+ def get_items(self, config: NaoConfig) -> list[NotionConfig]:
134
+ return [config.notion] if config.notion else []
135
+
136
+ def sync(self, items: list[NotionConfig], output_path: Path, project_path: Path | None = None) -> SyncResult:
137
+ """Sync Notion pages to local filesystem as markdown files.
138
+
139
+ Args:
140
+ items: Notion configuration with pages to sync.
141
+ output_path: Path where synced markdown files should be written.
142
+ project_path: Path to the nao project root.
143
+
144
+ Returns:
145
+ SyncResult with statistics about what was synced.
146
+ """
147
+ if not items:
148
+ console.print("\n[dim]No Notion pages configured[/dim]")
149
+ return SyncResult(provider_name=self.name, items_synced=0, summary="No Notion configurations configured")
150
+
151
+ notion_config = items[0]
152
+ output_path.mkdir(parents=True, exist_ok=True)
153
+ pages_synced = 0
154
+ synced_pages: list[str] = []
155
+ synced_files: set[str] = set()
156
+
157
+ console.print(f"\n[bold cyan]{self.emoji} Syncing {self.name}[/bold cyan]")
158
+ console.print(f"[dim]Location:[/dim] {output_path.absolute()}\n")
159
+
160
+ api_key = notion_config.api_key
161
+ total_pages = len(notion_config.pages)
162
+
163
+ with Progress(
164
+ SpinnerColumn(style="dim"),
165
+ TextColumn("[progress.description]{task.description}"),
166
+ BarColumn(bar_width=30, style="dim", complete_style="cyan", finished_style="green"),
167
+ TaskProgressColumn(),
168
+ console=console,
169
+ transient=False,
170
+ ) as progress:
171
+ task = progress.add_task("Syncing pages", total=total_pages)
172
+
173
+ for page_url in notion_config.pages:
174
+ try:
175
+ title, markdown = get_page_as_markdown(page_url, api_key)
176
+
177
+ # Sanitize title for filename
178
+ safe_title = re.sub(r"[^\w\s-]", "", title).strip().replace(" ", "-").lower()
179
+ filename = f"{safe_title}.md"
180
+
181
+ with open(output_path / filename, "w") as f:
182
+ f.write(markdown)
183
+
184
+ pages_synced += 1
185
+ synced_pages.append(title)
186
+ synced_files.add(filename)
187
+ progress.update(task, advance=1, description=f"Synced: {title}")
188
+ except Exception as e:
189
+ console.print(f"[bold red]✗[/bold red] Failed to sync page {page_url}: {e}")
190
+ progress.update(task, advance=1)
191
+
192
+ # Clean up stale pages
193
+ removed_count = cleanup_stale_pages(synced_files, output_path, verbose=True)
194
+
195
+ # Build summary
196
+ summary = f"{pages_synced} pages synced as markdown"
197
+ if removed_count > 0:
198
+ summary += f", {removed_count} stale removed"
199
+
200
+ return SyncResult(
201
+ provider_name=self.name,
202
+ items_synced=pages_synced,
203
+ details={"pages": synced_pages, "removed": removed_count},
204
+ summary=summary,
205
+ )
@@ -0,0 +1,5 @@
1
+ """Repository syncing functionality for cloning and pulling git repositories."""
2
+
3
+ from .provider import RepositorySyncProvider
4
+
5
+ __all__ = ["RepositorySyncProvider"]