nao-core 0.0.38__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nao_core/__init__.py +2 -0
- nao_core/__init__.py.bak +2 -0
- nao_core/bin/build-info.json +5 -0
- nao_core/bin/fastapi/main.py +268 -0
- nao_core/bin/fastapi/test_main.py +156 -0
- nao_core/bin/migrations-postgres/0000_user_auth_and_chat_tables.sql +98 -0
- nao_core/bin/migrations-postgres/0001_message_feedback.sql +9 -0
- nao_core/bin/migrations-postgres/0002_chat_message_stop_reason_and_error_message.sql +2 -0
- nao_core/bin/migrations-postgres/0003_handle_slack_with_thread.sql +2 -0
- nao_core/bin/migrations-postgres/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
- nao_core/bin/migrations-postgres/0006_llm_model_ids.sql +4 -0
- nao_core/bin/migrations-postgres/0007_chat_message_llm_info.sql +2 -0
- nao_core/bin/migrations-postgres/meta/0000_snapshot.json +707 -0
- nao_core/bin/migrations-postgres/meta/0001_snapshot.json +766 -0
- nao_core/bin/migrations-postgres/meta/0002_snapshot.json +778 -0
- nao_core/bin/migrations-postgres/meta/0003_snapshot.json +799 -0
- nao_core/bin/migrations-postgres/meta/0004_snapshot.json +847 -0
- nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
- nao_core/bin/migrations-postgres/meta/0006_snapshot.json +1141 -0
- nao_core/bin/migrations-postgres/meta/_journal.json +62 -0
- nao_core/bin/migrations-sqlite/0000_user_auth_and_chat_tables.sql +98 -0
- nao_core/bin/migrations-sqlite/0001_message_feedback.sql +8 -0
- nao_core/bin/migrations-sqlite/0002_chat_message_stop_reason_and_error_message.sql +2 -0
- nao_core/bin/migrations-sqlite/0003_handle_slack_with_thread.sql +2 -0
- nao_core/bin/migrations-sqlite/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
- nao_core/bin/migrations-sqlite/0006_llm_model_ids.sql +4 -0
- nao_core/bin/migrations-sqlite/0007_chat_message_llm_info.sql +2 -0
- nao_core/bin/migrations-sqlite/meta/0000_snapshot.json +674 -0
- nao_core/bin/migrations-sqlite/meta/0001_snapshot.json +735 -0
- nao_core/bin/migrations-sqlite/meta/0002_snapshot.json +749 -0
- nao_core/bin/migrations-sqlite/meta/0003_snapshot.json +763 -0
- nao_core/bin/migrations-sqlite/meta/0004_snapshot.json +819 -0
- nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
- nao_core/bin/migrations-sqlite/meta/0006_snapshot.json +1100 -0
- nao_core/bin/migrations-sqlite/meta/_journal.json +62 -0
- nao_core/bin/nao-chat-server +0 -0
- nao_core/bin/public/assets/code-block-F6WJLWQG-CV0uOmNJ.js +153 -0
- nao_core/bin/public/assets/index-DcbndLHo.css +1 -0
- nao_core/bin/public/assets/index-t1hZI3nl.js +560 -0
- nao_core/bin/public/favicon.ico +0 -0
- nao_core/bin/public/index.html +18 -0
- nao_core/bin/rg +0 -0
- nao_core/commands/__init__.py +6 -0
- nao_core/commands/chat.py +225 -0
- nao_core/commands/debug.py +158 -0
- nao_core/commands/init.py +358 -0
- nao_core/commands/sync/__init__.py +124 -0
- nao_core/commands/sync/accessors.py +290 -0
- nao_core/commands/sync/cleanup.py +156 -0
- nao_core/commands/sync/providers/__init__.py +32 -0
- nao_core/commands/sync/providers/base.py +113 -0
- nao_core/commands/sync/providers/databases/__init__.py +17 -0
- nao_core/commands/sync/providers/databases/bigquery.py +79 -0
- nao_core/commands/sync/providers/databases/databricks.py +79 -0
- nao_core/commands/sync/providers/databases/duckdb.py +78 -0
- nao_core/commands/sync/providers/databases/postgres.py +79 -0
- nao_core/commands/sync/providers/databases/provider.py +129 -0
- nao_core/commands/sync/providers/databases/snowflake.py +79 -0
- nao_core/commands/sync/providers/notion/__init__.py +5 -0
- nao_core/commands/sync/providers/notion/provider.py +205 -0
- nao_core/commands/sync/providers/repositories/__init__.py +5 -0
- nao_core/commands/sync/providers/repositories/provider.py +134 -0
- nao_core/commands/sync/registry.py +23 -0
- nao_core/config/__init__.py +30 -0
- nao_core/config/base.py +100 -0
- nao_core/config/databases/__init__.py +55 -0
- nao_core/config/databases/base.py +85 -0
- nao_core/config/databases/bigquery.py +99 -0
- nao_core/config/databases/databricks.py +79 -0
- nao_core/config/databases/duckdb.py +41 -0
- nao_core/config/databases/postgres.py +83 -0
- nao_core/config/databases/snowflake.py +125 -0
- nao_core/config/exceptions.py +7 -0
- nao_core/config/llm/__init__.py +19 -0
- nao_core/config/notion/__init__.py +8 -0
- nao_core/config/repos/__init__.py +3 -0
- nao_core/config/repos/base.py +11 -0
- nao_core/config/slack/__init__.py +12 -0
- nao_core/context/__init__.py +54 -0
- nao_core/context/base.py +57 -0
- nao_core/context/git.py +177 -0
- nao_core/context/local.py +59 -0
- nao_core/main.py +13 -0
- nao_core/templates/__init__.py +41 -0
- nao_core/templates/context.py +193 -0
- nao_core/templates/defaults/databases/columns.md.j2 +23 -0
- nao_core/templates/defaults/databases/description.md.j2 +32 -0
- nao_core/templates/defaults/databases/preview.md.j2 +22 -0
- nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
- nao_core/templates/engine.py +133 -0
- nao_core/templates/render.py +196 -0
- nao_core-0.0.38.dist-info/METADATA +150 -0
- nao_core-0.0.38.dist-info/RECORD +98 -0
- nao_core-0.0.38.dist-info/WHEEL +4 -0
- nao_core-0.0.38.dist-info/entry_points.txt +2 -0
- nao_core-0.0.38.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_databricks(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync Databricks database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
db_name = db_config.get_database_name()
|
|
28
|
+
db_path = base_path / "type=databricks" / f"database={db_name}"
|
|
29
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
30
|
+
|
|
31
|
+
if db_config.schema:
|
|
32
|
+
schemas = [db_config.schema]
|
|
33
|
+
else:
|
|
34
|
+
schemas = conn.list_databases()
|
|
35
|
+
|
|
36
|
+
schema_task = progress.add_task(
|
|
37
|
+
f"[dim]{db_config.name}[/dim]",
|
|
38
|
+
total=len(schemas),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
for schema in schemas:
|
|
42
|
+
try:
|
|
43
|
+
all_tables = conn.list_tables(database=schema)
|
|
44
|
+
except Exception:
|
|
45
|
+
progress.update(schema_task, advance=1)
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
# Filter tables based on include/exclude patterns
|
|
49
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
50
|
+
|
|
51
|
+
# Skip schema if no tables match
|
|
52
|
+
if not tables:
|
|
53
|
+
progress.update(schema_task, advance=1)
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
schema_path = db_path / f"schema={schema}"
|
|
57
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
state.add_schema(schema)
|
|
59
|
+
|
|
60
|
+
table_task = progress.add_task(
|
|
61
|
+
f" [cyan]{schema}[/cyan]",
|
|
62
|
+
total=len(tables),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for table in tables:
|
|
66
|
+
table_path = schema_path / f"table={table}"
|
|
67
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
|
|
69
|
+
for accessor in accessors:
|
|
70
|
+
content = accessor.generate(conn, schema, table)
|
|
71
|
+
output_file = table_path / accessor.filename
|
|
72
|
+
output_file.write_text(content)
|
|
73
|
+
|
|
74
|
+
state.add_table(schema, table)
|
|
75
|
+
progress.update(table_task, advance=1)
|
|
76
|
+
|
|
77
|
+
progress.update(schema_task, advance=1)
|
|
78
|
+
|
|
79
|
+
return state
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_duckdb(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync DuckDB database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
|
|
28
|
+
db_name = db_config.get_database_name()
|
|
29
|
+
db_path = base_path / "type=duckdb" / f"database={db_name}"
|
|
30
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
31
|
+
|
|
32
|
+
# List all schemas in DuckDB
|
|
33
|
+
schemas = conn.list_databases()
|
|
34
|
+
|
|
35
|
+
schema_task = progress.add_task(
|
|
36
|
+
f"[dim]{db_config.name}[/dim]",
|
|
37
|
+
total=len(schemas),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
for schema in schemas:
|
|
41
|
+
try:
|
|
42
|
+
all_tables = conn.list_tables(database=schema)
|
|
43
|
+
except Exception:
|
|
44
|
+
progress.update(schema_task, advance=1)
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
# Filter tables based on include/exclude patterns
|
|
48
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
49
|
+
|
|
50
|
+
# Skip schema if no tables match
|
|
51
|
+
if not tables:
|
|
52
|
+
progress.update(schema_task, advance=1)
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
schema_path = db_path / f"schema={schema}"
|
|
56
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
state.add_schema(schema)
|
|
58
|
+
|
|
59
|
+
table_task = progress.add_task(
|
|
60
|
+
f" [cyan]{schema}[/cyan]",
|
|
61
|
+
total=len(tables),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
for table in tables:
|
|
65
|
+
table_path = schema_path / f"table={table}"
|
|
66
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
|
|
68
|
+
for accessor in accessors:
|
|
69
|
+
content = accessor.generate(conn, schema, table)
|
|
70
|
+
output_file = table_path / accessor.filename
|
|
71
|
+
output_file.write_text(content)
|
|
72
|
+
|
|
73
|
+
state.add_table(schema, table)
|
|
74
|
+
progress.update(table_task, advance=1)
|
|
75
|
+
|
|
76
|
+
progress.update(schema_task, advance=1)
|
|
77
|
+
|
|
78
|
+
return state
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_postgres(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync PostgreSQL database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
db_name = db_config.get_database_name()
|
|
28
|
+
db_path = base_path / "type=postgres" / f"database={db_name}"
|
|
29
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
30
|
+
|
|
31
|
+
if db_config.schema_name:
|
|
32
|
+
schemas = [db_config.schema_name]
|
|
33
|
+
else:
|
|
34
|
+
schemas = conn.list_databases()
|
|
35
|
+
|
|
36
|
+
schema_task = progress.add_task(
|
|
37
|
+
f"[dim]{db_config.name}[/dim]",
|
|
38
|
+
total=len(schemas),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
for schema in schemas:
|
|
42
|
+
try:
|
|
43
|
+
all_tables = conn.list_tables(database=schema)
|
|
44
|
+
except Exception:
|
|
45
|
+
progress.update(schema_task, advance=1)
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
# Filter tables based on include/exclude patterns
|
|
49
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
50
|
+
|
|
51
|
+
# Skip schema if no tables match
|
|
52
|
+
if not tables:
|
|
53
|
+
progress.update(schema_task, advance=1)
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
schema_path = db_path / f"schema={schema}"
|
|
57
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
state.add_schema(schema)
|
|
59
|
+
|
|
60
|
+
table_task = progress.add_task(
|
|
61
|
+
f" [cyan]{schema}[/cyan]",
|
|
62
|
+
total=len(tables),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for table in tables:
|
|
66
|
+
table_path = schema_path / f"table={table}"
|
|
67
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
|
|
69
|
+
for accessor in accessors:
|
|
70
|
+
content = accessor.generate(conn, schema, table)
|
|
71
|
+
output_file = table_path / accessor.filename
|
|
72
|
+
output_file.write_text(content)
|
|
73
|
+
|
|
74
|
+
state.add_table(schema, table)
|
|
75
|
+
progress.update(table_task, advance=1)
|
|
76
|
+
|
|
77
|
+
progress.update(schema_task, advance=1)
|
|
78
|
+
|
|
79
|
+
return state
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Database sync provider implementation."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
|
|
8
|
+
|
|
9
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
10
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState, cleanup_stale_databases, cleanup_stale_paths
|
|
11
|
+
from nao_core.commands.sync.registry import get_accessors
|
|
12
|
+
from nao_core.config import AnyDatabaseConfig, NaoConfig
|
|
13
|
+
|
|
14
|
+
from ..base import SyncProvider, SyncResult
|
|
15
|
+
from .bigquery import sync_bigquery
|
|
16
|
+
from .databricks import sync_databricks
|
|
17
|
+
from .duckdb import sync_duckdb
|
|
18
|
+
from .postgres import sync_postgres
|
|
19
|
+
from .snowflake import sync_snowflake
|
|
20
|
+
|
|
21
|
+
console = Console()
|
|
22
|
+
|
|
23
|
+
# Registry mapping database types to their sync functions
|
|
24
|
+
DATABASE_SYNC_FUNCTIONS = {
|
|
25
|
+
"bigquery": sync_bigquery,
|
|
26
|
+
"duckdb": sync_duckdb,
|
|
27
|
+
"databricks": sync_databricks,
|
|
28
|
+
"snowflake": sync_snowflake,
|
|
29
|
+
"postgres": sync_postgres,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DatabaseSyncProvider(SyncProvider):
|
|
34
|
+
"""Provider for syncing database schemas to markdown documentation."""
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def name(self) -> str:
|
|
38
|
+
return "Databases"
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def emoji(self) -> str:
|
|
42
|
+
return "🗄️"
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def default_output_dir(self) -> str:
|
|
46
|
+
return "databases"
|
|
47
|
+
|
|
48
|
+
def pre_sync(self, config: NaoConfig, output_path: Path) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Always run before syncing.
|
|
51
|
+
"""
|
|
52
|
+
cleanup_stale_databases(config.databases, output_path, verbose=True)
|
|
53
|
+
|
|
54
|
+
def get_items(self, config: NaoConfig) -> list[AnyDatabaseConfig]:
|
|
55
|
+
return config.databases
|
|
56
|
+
|
|
57
|
+
def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
|
|
58
|
+
"""Sync all configured databases.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
items: List of database configurations
|
|
62
|
+
output_path: Base path where database schemas are stored
|
|
63
|
+
project_path: Path to the nao project root (for template resolution)
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
SyncResult with datasets and tables synced
|
|
67
|
+
"""
|
|
68
|
+
if not items:
|
|
69
|
+
console.print("\n[dim]No databases configured[/dim]")
|
|
70
|
+
return SyncResult(provider_name=self.name, items_synced=0)
|
|
71
|
+
|
|
72
|
+
# Set project path for template resolution
|
|
73
|
+
DataAccessor.set_project_path(project_path)
|
|
74
|
+
|
|
75
|
+
total_datasets = 0
|
|
76
|
+
total_tables = 0
|
|
77
|
+
total_removed = 0
|
|
78
|
+
sync_states: list[DatabaseSyncState] = []
|
|
79
|
+
|
|
80
|
+
console.print(f"\n[bold cyan]{self.emoji} Syncing {self.name}[/bold cyan]")
|
|
81
|
+
console.print(f"[dim]Location:[/dim] {output_path.absolute()}\n")
|
|
82
|
+
|
|
83
|
+
with Progress(
|
|
84
|
+
SpinnerColumn(style="dim"),
|
|
85
|
+
TextColumn("[progress.description]{task.description}"),
|
|
86
|
+
BarColumn(bar_width=30, style="dim", complete_style="cyan", finished_style="green"),
|
|
87
|
+
TaskProgressColumn(),
|
|
88
|
+
console=console,
|
|
89
|
+
transient=False,
|
|
90
|
+
) as progress:
|
|
91
|
+
for db in items:
|
|
92
|
+
# Get accessors from database config
|
|
93
|
+
db_accessors = get_accessors(db.accessors)
|
|
94
|
+
accessor_names = [a.filename.replace(".md", "") for a in db_accessors]
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
|
|
98
|
+
|
|
99
|
+
sync_fn = DATABASE_SYNC_FUNCTIONS.get(db.type)
|
|
100
|
+
if sync_fn:
|
|
101
|
+
state = sync_fn(db, output_path, progress, db_accessors)
|
|
102
|
+
sync_states.append(state)
|
|
103
|
+
total_datasets += state.schemas_synced
|
|
104
|
+
total_tables += state.tables_synced
|
|
105
|
+
else:
|
|
106
|
+
console.print(f"[yellow]⚠ Unsupported database type: {db.type}[/yellow]")
|
|
107
|
+
except Exception as e:
|
|
108
|
+
console.print(f"[bold red]✗[/bold red] Failed to sync {db.name}: {e}")
|
|
109
|
+
|
|
110
|
+
# Clean up stale files after all syncs complete
|
|
111
|
+
for state in sync_states:
|
|
112
|
+
removed = cleanup_stale_paths(state, verbose=True)
|
|
113
|
+
total_removed += removed
|
|
114
|
+
|
|
115
|
+
# Build summary
|
|
116
|
+
summary = f"{total_tables} tables across {total_datasets} datasets"
|
|
117
|
+
if total_removed > 0:
|
|
118
|
+
summary += f", {total_removed} stale removed"
|
|
119
|
+
|
|
120
|
+
return SyncResult(
|
|
121
|
+
provider_name=self.name,
|
|
122
|
+
items_synced=total_tables,
|
|
123
|
+
details={
|
|
124
|
+
"datasets": total_datasets,
|
|
125
|
+
"tables": total_tables,
|
|
126
|
+
"removed": total_removed,
|
|
127
|
+
},
|
|
128
|
+
summary=summary,
|
|
129
|
+
)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_snowflake(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync Snowflake database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
db_name = db_config.get_database_name()
|
|
28
|
+
db_path = base_path / "type=snowflake" / f"database={db_name}"
|
|
29
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
30
|
+
|
|
31
|
+
if db_config.schema:
|
|
32
|
+
schemas = [db_config.schema]
|
|
33
|
+
else:
|
|
34
|
+
schemas = conn.list_databases()
|
|
35
|
+
|
|
36
|
+
schema_task = progress.add_task(
|
|
37
|
+
f"[dim]{db_config.name}[/dim]",
|
|
38
|
+
total=len(schemas),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
for schema in schemas:
|
|
42
|
+
try:
|
|
43
|
+
all_tables = conn.list_tables(database=schema)
|
|
44
|
+
except Exception:
|
|
45
|
+
progress.update(schema_task, advance=1)
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
# Filter tables based on include/exclude patterns
|
|
49
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
50
|
+
|
|
51
|
+
# Skip schema if no tables match
|
|
52
|
+
if not tables:
|
|
53
|
+
progress.update(schema_task, advance=1)
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
schema_path = db_path / f"schema={schema}"
|
|
57
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
state.add_schema(schema)
|
|
59
|
+
|
|
60
|
+
table_task = progress.add_task(
|
|
61
|
+
f" [cyan]{schema}[/cyan]",
|
|
62
|
+
total=len(tables),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for table in tables:
|
|
66
|
+
table_path = schema_path / f"table={table}"
|
|
67
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
|
|
69
|
+
for accessor in accessors:
|
|
70
|
+
content = accessor.generate(conn, schema, table)
|
|
71
|
+
output_file = table_path / accessor.filename
|
|
72
|
+
output_file.write_text(content)
|
|
73
|
+
|
|
74
|
+
state.add_table(schema, table)
|
|
75
|
+
progress.update(table_task, advance=1)
|
|
76
|
+
|
|
77
|
+
progress.update(schema_task, advance=1)
|
|
78
|
+
|
|
79
|
+
return state
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
from notion2md.exporter.block import StringExporter
|
|
6
|
+
from notion_client import Client
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
|
|
9
|
+
|
|
10
|
+
from nao_core.config.base import NaoConfig
|
|
11
|
+
from nao_core.config.notion import NotionConfig
|
|
12
|
+
|
|
13
|
+
from ..base import SyncProvider, SyncResult
|
|
14
|
+
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
# Notion page IDs are 32-character hex strings (UUID without dashes)
|
|
18
|
+
NOTION_PAGE_ID_PATTERN = re.compile(r"[a-f0-9]{32}")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def cleanup_stale_pages(synced_files: set[str], output_path: Path, verbose: bool = False) -> int:
|
|
22
|
+
"""Remove markdown files that were not synced.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
synced_files: Set of filenames that were synced in this run.
|
|
26
|
+
output_path: Path where synced markdown files are stored.
|
|
27
|
+
verbose: Whether to print cleanup messages.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Number of stale files removed.
|
|
31
|
+
"""
|
|
32
|
+
if not output_path.exists():
|
|
33
|
+
return 0
|
|
34
|
+
|
|
35
|
+
removed_count = 0
|
|
36
|
+
for file_path in output_path.iterdir():
|
|
37
|
+
if file_path.is_file() and file_path.suffix == ".md":
|
|
38
|
+
if file_path.name not in synced_files:
|
|
39
|
+
file_path.unlink()
|
|
40
|
+
removed_count += 1
|
|
41
|
+
if verbose:
|
|
42
|
+
console.print(f" [dim red]removing stale page:[/dim red] {file_path.name}")
|
|
43
|
+
|
|
44
|
+
return removed_count
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Pattern to match markdown images: 
|
|
48
|
+
IMAGE_PATTERN = re.compile(r"!\[[^\]]*\]\([^)]+\)\n?")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def strip_images(markdown: str) -> str:
|
|
52
|
+
"""Replace markdown image references with a placeholder."""
|
|
53
|
+
return IMAGE_PATTERN.sub("[image]\n", markdown)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def extract_page_id(page_url: str) -> str:
|
|
57
|
+
"""Extract Notion page ID from a URL.
|
|
58
|
+
|
|
59
|
+
Handles URLs like:
|
|
60
|
+
- https://www.notion.so/naolabs/Conversational-analytics-2bfc7a70bc0680978900d1e85ece83a0
|
|
61
|
+
- https://www.notion.so/2bfc7a70bc0680978900d1e85ece83a0
|
|
62
|
+
- 2bfc7a70bc0680978900d1e85ece83a0 (raw ID)
|
|
63
|
+
"""
|
|
64
|
+
match = NOTION_PAGE_ID_PATTERN.search(page_url)
|
|
65
|
+
if match:
|
|
66
|
+
return match.group(0)
|
|
67
|
+
raise ValueError(f"Could not extract Notion page ID from: {page_url}")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_page_title(client: Client, page_id: str) -> str:
|
|
71
|
+
"""Get the title of a Notion page."""
|
|
72
|
+
page = cast(dict[str, Any], client.pages.retrieve(page_id=page_id))
|
|
73
|
+
properties = page.get("properties", {})
|
|
74
|
+
|
|
75
|
+
# Try common title property names
|
|
76
|
+
for prop_name in ["title", "Title", "Name", "name", "Page"]:
|
|
77
|
+
if prop_name in properties:
|
|
78
|
+
title_prop = properties[prop_name]
|
|
79
|
+
if title_prop.get("type") == "title":
|
|
80
|
+
title_array = title_prop.get("title", [])
|
|
81
|
+
if title_array:
|
|
82
|
+
return "".join(t.get("plain_text", "") for t in title_array)
|
|
83
|
+
|
|
84
|
+
# Fallback to page ID if no title found
|
|
85
|
+
return page_id
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_page_as_markdown(page_url: str, api_key: str) -> tuple[str, str]:
|
|
89
|
+
"""Fetch a Notion page and convert it to markdown.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Tuple of (title, markdown_content)
|
|
93
|
+
"""
|
|
94
|
+
page_id = extract_page_id(page_url)
|
|
95
|
+
|
|
96
|
+
# Get page title for the filename
|
|
97
|
+
client = Client(auth=api_key)
|
|
98
|
+
title = get_page_title(client, page_id)
|
|
99
|
+
|
|
100
|
+
# Export to markdown string using notion2md
|
|
101
|
+
md_exporter = StringExporter(block_id=page_id, token=api_key)
|
|
102
|
+
markdown = md_exporter.export()
|
|
103
|
+
|
|
104
|
+
# Strip images since we can't read them
|
|
105
|
+
markdown = strip_images(markdown)
|
|
106
|
+
|
|
107
|
+
content = f"""---
|
|
108
|
+
title: {title}
|
|
109
|
+
id: {page_id}
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
{markdown}
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
return title, content
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class NotionSyncProvider(SyncProvider):
|
|
119
|
+
"""Provider for syncing Notion pages and databases."""
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def name(self) -> str:
|
|
123
|
+
return "Notion"
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def emoji(self) -> str:
|
|
127
|
+
return "📝"
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def default_output_dir(self) -> str:
|
|
131
|
+
return "docs/notion"
|
|
132
|
+
|
|
133
|
+
def get_items(self, config: NaoConfig) -> list[NotionConfig]:
|
|
134
|
+
return [config.notion] if config.notion else []
|
|
135
|
+
|
|
136
|
+
def sync(self, items: list[NotionConfig], output_path: Path, project_path: Path | None = None) -> SyncResult:
|
|
137
|
+
"""Sync Notion pages to local filesystem as markdown files.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
items: Notion configuration with pages to sync.
|
|
141
|
+
output_path: Path where synced markdown files should be written.
|
|
142
|
+
project_path: Path to the nao project root.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
SyncResult with statistics about what was synced.
|
|
146
|
+
"""
|
|
147
|
+
if not items:
|
|
148
|
+
console.print("\n[dim]No Notion pages configured[/dim]")
|
|
149
|
+
return SyncResult(provider_name=self.name, items_synced=0, summary="No Notion configurations configured")
|
|
150
|
+
|
|
151
|
+
notion_config = items[0]
|
|
152
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
153
|
+
pages_synced = 0
|
|
154
|
+
synced_pages: list[str] = []
|
|
155
|
+
synced_files: set[str] = set()
|
|
156
|
+
|
|
157
|
+
console.print(f"\n[bold cyan]{self.emoji} Syncing {self.name}[/bold cyan]")
|
|
158
|
+
console.print(f"[dim]Location:[/dim] {output_path.absolute()}\n")
|
|
159
|
+
|
|
160
|
+
api_key = notion_config.api_key
|
|
161
|
+
total_pages = len(notion_config.pages)
|
|
162
|
+
|
|
163
|
+
with Progress(
|
|
164
|
+
SpinnerColumn(style="dim"),
|
|
165
|
+
TextColumn("[progress.description]{task.description}"),
|
|
166
|
+
BarColumn(bar_width=30, style="dim", complete_style="cyan", finished_style="green"),
|
|
167
|
+
TaskProgressColumn(),
|
|
168
|
+
console=console,
|
|
169
|
+
transient=False,
|
|
170
|
+
) as progress:
|
|
171
|
+
task = progress.add_task("Syncing pages", total=total_pages)
|
|
172
|
+
|
|
173
|
+
for page_url in notion_config.pages:
|
|
174
|
+
try:
|
|
175
|
+
title, markdown = get_page_as_markdown(page_url, api_key)
|
|
176
|
+
|
|
177
|
+
# Sanitize title for filename
|
|
178
|
+
safe_title = re.sub(r"[^\w\s-]", "", title).strip().replace(" ", "-").lower()
|
|
179
|
+
filename = f"{safe_title}.md"
|
|
180
|
+
|
|
181
|
+
with open(output_path / filename, "w") as f:
|
|
182
|
+
f.write(markdown)
|
|
183
|
+
|
|
184
|
+
pages_synced += 1
|
|
185
|
+
synced_pages.append(title)
|
|
186
|
+
synced_files.add(filename)
|
|
187
|
+
progress.update(task, advance=1, description=f"Synced: {title}")
|
|
188
|
+
except Exception as e:
|
|
189
|
+
console.print(f"[bold red]✗[/bold red] Failed to sync page {page_url}: {e}")
|
|
190
|
+
progress.update(task, advance=1)
|
|
191
|
+
|
|
192
|
+
# Clean up stale pages
|
|
193
|
+
removed_count = cleanup_stale_pages(synced_files, output_path, verbose=True)
|
|
194
|
+
|
|
195
|
+
# Build summary
|
|
196
|
+
summary = f"{pages_synced} pages synced as markdown"
|
|
197
|
+
if removed_count > 0:
|
|
198
|
+
summary += f", {removed_count} stale removed"
|
|
199
|
+
|
|
200
|
+
return SyncResult(
|
|
201
|
+
provider_name=self.name,
|
|
202
|
+
items_synced=pages_synced,
|
|
203
|
+
details={"pages": synced_pages, "removed": removed_count},
|
|
204
|
+
summary=summary,
|
|
205
|
+
)
|