nao-core 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nao_core/__init__.py +1 -1
  2. nao_core/bin/fastapi/main.py +6 -0
  3. nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
  4. nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
  5. nao_core/bin/migrations-postgres/meta/_journal.json +7 -0
  6. nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
  7. nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
  8. nao_core/bin/migrations-sqlite/meta/_journal.json +7 -0
  9. nao_core/bin/nao-chat-server +0 -0
  10. nao_core/bin/public/assets/{code-block-F6WJLWQG-z4zcca7w.js → code-block-F6WJLWQG-TAi8koem.js} +1 -1
  11. nao_core/bin/public/assets/index-BfHcd9Xz.css +1 -0
  12. nao_core/bin/public/assets/{index-DhhS7iVA.js → index-Mzo9bkag.js} +256 -172
  13. nao_core/bin/public/index.html +2 -2
  14. nao_core/commands/chat.py +11 -10
  15. nao_core/commands/init.py +27 -4
  16. nao_core/commands/sync/__init__.py +40 -21
  17. nao_core/commands/sync/accessors.py +218 -139
  18. nao_core/commands/sync/cleanup.py +133 -0
  19. nao_core/commands/sync/providers/__init__.py +30 -0
  20. nao_core/commands/sync/providers/base.py +87 -0
  21. nao_core/commands/sync/providers/databases/__init__.py +17 -0
  22. nao_core/commands/sync/providers/databases/bigquery.py +78 -0
  23. nao_core/commands/sync/providers/databases/databricks.py +79 -0
  24. nao_core/commands/sync/providers/databases/duckdb.py +83 -0
  25. nao_core/commands/sync/providers/databases/postgres.py +78 -0
  26. nao_core/commands/sync/providers/databases/provider.py +123 -0
  27. nao_core/commands/sync/providers/databases/snowflake.py +78 -0
  28. nao_core/commands/sync/providers/repositories/__init__.py +5 -0
  29. nao_core/commands/sync/{repositories.py → providers/repositories/provider.py} +43 -20
  30. nao_core/config/__init__.py +2 -0
  31. nao_core/config/base.py +23 -4
  32. nao_core/config/databases/__init__.py +5 -0
  33. nao_core/config/databases/base.py +1 -0
  34. nao_core/config/databases/postgres.py +78 -0
  35. nao_core/templates/__init__.py +12 -0
  36. nao_core/templates/defaults/databases/columns.md.j2 +23 -0
  37. nao_core/templates/defaults/databases/description.md.j2 +32 -0
  38. nao_core/templates/defaults/databases/preview.md.j2 +22 -0
  39. nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
  40. nao_core/templates/engine.py +133 -0
  41. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/METADATA +6 -2
  42. nao_core-0.0.31.dist-info/RECORD +86 -0
  43. nao_core/bin/public/assets/index-ClduEZSo.css +0 -1
  44. nao_core/commands/sync/databases.py +0 -374
  45. nao_core-0.0.30.dist-info/RECORD +0 -65
  46. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/WHEEL +0 -0
  47. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/entry_points.txt +0 -0
  48. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,133 @@
1
+ """Cleanup utilities for removing stale sync files."""
2
+
3
+ import shutil
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+
7
+ from rich.console import Console
8
+
9
+ console = Console()
10
+
11
+
12
+ @dataclass
13
+ class DatabaseSyncState:
14
+ """Tracks the state of a database sync operation.
15
+
16
+ Used to track which paths were synced so stale paths can be cleaned up.
17
+ """
18
+
19
+ db_path: Path
20
+ """The root path for this database (e.g., databases/type=duckdb/database=mydb)"""
21
+
22
+ synced_schemas: set[str] = field(default_factory=set)
23
+ """Set of schema names that were synced"""
24
+
25
+ synced_tables: dict[str, set[str]] = field(default_factory=dict)
26
+ """Dict mapping schema names to sets of table names that were synced"""
27
+
28
+ schemas_synced: int = 0
29
+ """Count of schemas synced"""
30
+
31
+ tables_synced: int = 0
32
+ """Count of tables synced"""
33
+
34
+ def add_table(self, schema: str, table: str) -> None:
35
+ """Record that a table was synced.
36
+
37
+ Args:
38
+ schema: The schema/dataset name
39
+ table: The table name
40
+ """
41
+ self.synced_schemas.add(schema)
42
+ if schema not in self.synced_tables:
43
+ self.synced_tables[schema] = set()
44
+ self.synced_tables[schema].add(table)
45
+ self.tables_synced += 1
46
+
47
+ def add_schema(self, schema: str) -> None:
48
+ """Record that a schema was synced (even if empty).
49
+
50
+ Args:
51
+ schema: The schema/dataset name
52
+ """
53
+ self.synced_schemas.add(schema)
54
+ self.schemas_synced += 1
55
+
56
+
57
+ def cleanup_stale_paths(state: DatabaseSyncState, verbose: bool = False) -> int:
58
+ """Remove directories that exist on disk but weren't synced.
59
+
60
+ This function cleans up:
61
+ - Table directories that no longer exist in the source
62
+ - Schema directories that no longer exist or have no tables
63
+
64
+ Args:
65
+ state: The sync state tracking what was synced
66
+ verbose: Whether to print cleanup messages
67
+
68
+ Returns:
69
+ Number of stale paths removed
70
+ """
71
+ removed_count = 0
72
+
73
+ if not state.db_path.exists():
74
+ return 0
75
+
76
+ # Find all existing schema directories
77
+ existing_schemas = {
78
+ d.name.replace("schema=", ""): d for d in state.db_path.iterdir() if d.is_dir() and d.name.startswith("schema=")
79
+ }
80
+
81
+ # Remove schemas that weren't synced
82
+ for schema_name, schema_path in existing_schemas.items():
83
+ if schema_name not in state.synced_schemas:
84
+ if verbose:
85
+ console.print(f" [dim red]removing stale schema:[/dim red] {schema_name}")
86
+ shutil.rmtree(schema_path)
87
+ removed_count += 1
88
+ continue
89
+
90
+ # Find existing tables in this schema
91
+ existing_tables = {
92
+ d.name.replace("table=", ""): d for d in schema_path.iterdir() if d.is_dir() and d.name.startswith("table=")
93
+ }
94
+
95
+ synced_tables_for_schema = state.synced_tables.get(schema_name, set())
96
+
97
+ # Remove tables that weren't synced
98
+ for table_name, table_path in existing_tables.items():
99
+ if table_name not in synced_tables_for_schema:
100
+ if verbose:
101
+ console.print(f" [dim red]removing stale table:[/dim red] {schema_name}.{table_name}")
102
+ shutil.rmtree(table_path)
103
+ removed_count += 1
104
+
105
+ return removed_count
106
+
107
+
108
+ def cleanup_stale_database_types(base_path: Path, active_db_types: set[str], verbose: bool = False) -> int:
109
+ """Remove database type directories that are no longer configured.
110
+
111
+ Args:
112
+ base_path: The base databases output path
113
+ active_db_types: Set of database type directory names that should exist
114
+ (e.g., {'type=duckdb', 'type=postgres'})
115
+ verbose: Whether to print cleanup messages
116
+
117
+ Returns:
118
+ Number of stale database type directories removed
119
+ """
120
+ removed_count = 0
121
+
122
+ if not base_path.exists():
123
+ return 0
124
+
125
+ for db_type_dir in base_path.iterdir():
126
+ if db_type_dir.is_dir() and db_type_dir.name.startswith("type="):
127
+ if db_type_dir.name not in active_db_types:
128
+ if verbose:
129
+ console.print(f" [dim red]removing stale database type:[/dim red] {db_type_dir.name}")
130
+ shutil.rmtree(db_type_dir)
131
+ removed_count += 1
132
+
133
+ return removed_count
@@ -0,0 +1,30 @@
1
+ """Sync providers for different resource types."""
2
+
3
+ from .base import SyncProvider, SyncResult
4
+ from .databases.provider import DatabaseSyncProvider
5
+ from .repositories.provider import RepositorySyncProvider
6
+
7
+ # Default providers in order of execution
8
+ DEFAULT_PROVIDERS: list[SyncProvider] = [
9
+ RepositorySyncProvider(),
10
+ DatabaseSyncProvider(),
11
+ ]
12
+
13
+
14
+ def get_all_providers() -> list[SyncProvider]:
15
+ """Get all registered sync providers.
16
+
17
+ Returns:
18
+ List of sync provider instances
19
+ """
20
+ return DEFAULT_PROVIDERS.copy()
21
+
22
+
23
+ __all__ = [
24
+ "SyncProvider",
25
+ "SyncResult",
26
+ "DatabaseSyncProvider",
27
+ "RepositorySyncProvider",
28
+ "DEFAULT_PROVIDERS",
29
+ "get_all_providers",
30
+ ]
@@ -0,0 +1,87 @@
1
+ """Base class for sync providers."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from nao_core.config import NaoConfig
9
+
10
+
11
+ @dataclass
12
+ class SyncResult:
13
+ """Result of a sync operation."""
14
+
15
+ provider_name: str
16
+ items_synced: int
17
+ details: dict[str, Any] | None = None
18
+ summary: str | None = None
19
+
20
+ def get_summary(self) -> str:
21
+ """Get a human-readable summary of the sync result."""
22
+ if self.summary:
23
+ return self.summary
24
+ return f"{self.items_synced} synced"
25
+
26
+
27
+ class SyncProvider(ABC):
28
+ """Abstract base class for sync providers.
29
+
30
+ A sync provider is responsible for synchronizing a specific type of resource
31
+ (e.g., repositories, databases) from the nao configuration to local files.
32
+ """
33
+
34
+ @property
35
+ @abstractmethod
36
+ def name(self) -> str:
37
+ """Human-readable name for this provider (e.g., 'Repositories', 'Databases')."""
38
+ ...
39
+
40
+ @property
41
+ @abstractmethod
42
+ def emoji(self) -> str:
43
+ """Emoji icon for this provider."""
44
+ ...
45
+
46
+ @property
47
+ @abstractmethod
48
+ def default_output_dir(self) -> str:
49
+ """Default output directory for this provider."""
50
+ ...
51
+
52
+ @abstractmethod
53
+ def get_items(self, config: NaoConfig) -> list[Any]:
54
+ """Extract items to sync from the configuration.
55
+
56
+ Args:
57
+ config: The nao configuration
58
+
59
+ Returns:
60
+ List of items to sync (e.g., repo configs, database configs)
61
+ """
62
+ ...
63
+
64
+ @abstractmethod
65
+ def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
66
+ """Sync the items to the output path.
67
+
68
+ Args:
69
+ items: List of items to sync
70
+ output_path: Path where synced data should be written
71
+ project_path: Path to the nao project root (for template resolution)
72
+
73
+ Returns:
74
+ SyncResult with statistics about what was synced
75
+ """
76
+ ...
77
+
78
+ def should_sync(self, config: NaoConfig) -> bool:
79
+ """Check if this provider has items to sync.
80
+
81
+ Args:
82
+ config: The nao configuration
83
+
84
+ Returns:
85
+ True if there are items to sync
86
+ """
87
+ return len(self.get_items(config)) > 0
@@ -0,0 +1,17 @@
1
+ """Database syncing functionality for generating markdown documentation from database schemas."""
2
+
3
+ from .bigquery import sync_bigquery
4
+ from .databricks import sync_databricks
5
+ from .duckdb import sync_duckdb
6
+ from .postgres import sync_postgres
7
+ from .provider import DatabaseSyncProvider
8
+ from .snowflake import sync_snowflake
9
+
10
+ __all__ = [
11
+ "DatabaseSyncProvider",
12
+ "sync_bigquery",
13
+ "sync_databricks",
14
+ "sync_duckdb",
15
+ "sync_postgres",
16
+ "sync_snowflake",
17
+ ]
@@ -0,0 +1,78 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_bigquery(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync BigQuery database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+ db_path = base_path / "type=bigquery" / f"database={db_config.project_id}"
28
+ state = DatabaseSyncState(db_path=db_path)
29
+
30
+ if db_config.dataset_id:
31
+ datasets = [db_config.dataset_id]
32
+ else:
33
+ datasets = conn.list_databases()
34
+
35
+ dataset_task = progress.add_task(
36
+ f"[dim]{db_config.name}[/dim]",
37
+ total=len(datasets),
38
+ )
39
+
40
+ for dataset in datasets:
41
+ try:
42
+ all_tables = conn.list_tables(database=dataset)
43
+ except Exception:
44
+ progress.update(dataset_task, advance=1)
45
+ continue
46
+
47
+ # Filter tables based on include/exclude patterns
48
+ tables = [t for t in all_tables if db_config.matches_pattern(dataset, t)]
49
+
50
+ # Skip dataset if no tables match
51
+ if not tables:
52
+ progress.update(dataset_task, advance=1)
53
+ continue
54
+
55
+ dataset_path = db_path / f"schema={dataset}"
56
+ dataset_path.mkdir(parents=True, exist_ok=True)
57
+ state.add_schema(dataset)
58
+
59
+ table_task = progress.add_task(
60
+ f" [cyan]{dataset}[/cyan]",
61
+ total=len(tables),
62
+ )
63
+
64
+ for table in tables:
65
+ table_path = dataset_path / f"table={table}"
66
+ table_path.mkdir(parents=True, exist_ok=True)
67
+
68
+ for accessor in accessors:
69
+ content = accessor.generate(conn, dataset, table)
70
+ output_file = table_path / accessor.filename
71
+ output_file.write_text(content)
72
+
73
+ state.add_table(dataset, table)
74
+ progress.update(table_task, advance=1)
75
+
76
+ progress.update(dataset_task, advance=1)
77
+
78
+ return state
@@ -0,0 +1,79 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_databricks(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync Databricks database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+ catalog = db_config.catalog or "main"
28
+ db_path = base_path / "type=databricks" / f"database={catalog}"
29
+ state = DatabaseSyncState(db_path=db_path)
30
+
31
+ if db_config.schema:
32
+ schemas = [db_config.schema]
33
+ else:
34
+ schemas = conn.list_databases()
35
+
36
+ schema_task = progress.add_task(
37
+ f"[dim]{db_config.name}[/dim]",
38
+ total=len(schemas),
39
+ )
40
+
41
+ for schema in schemas:
42
+ try:
43
+ all_tables = conn.list_tables(database=schema)
44
+ except Exception:
45
+ progress.update(schema_task, advance=1)
46
+ continue
47
+
48
+ # Filter tables based on include/exclude patterns
49
+ tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
50
+
51
+ # Skip schema if no tables match
52
+ if not tables:
53
+ progress.update(schema_task, advance=1)
54
+ continue
55
+
56
+ schema_path = db_path / f"schema={schema}"
57
+ schema_path.mkdir(parents=True, exist_ok=True)
58
+ state.add_schema(schema)
59
+
60
+ table_task = progress.add_task(
61
+ f" [cyan]{schema}[/cyan]",
62
+ total=len(tables),
63
+ )
64
+
65
+ for table in tables:
66
+ table_path = schema_path / f"table={table}"
67
+ table_path.mkdir(parents=True, exist_ok=True)
68
+
69
+ for accessor in accessors:
70
+ content = accessor.generate(conn, schema, table)
71
+ output_file = table_path / accessor.filename
72
+ output_file.write_text(content)
73
+
74
+ state.add_table(schema, table)
75
+ progress.update(table_task, advance=1)
76
+
77
+ progress.update(schema_task, advance=1)
78
+
79
+ return state
@@ -0,0 +1,83 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_duckdb(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync DuckDB database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+
28
+ # Derive database name from path
29
+ if db_config.path == ":memory:":
30
+ db_name = "memory"
31
+ else:
32
+ db_name = Path(db_config.path).stem
33
+
34
+ db_path = base_path / "type=duckdb" / f"database={db_name}"
35
+ state = DatabaseSyncState(db_path=db_path)
36
+
37
+ # List all schemas in DuckDB
38
+ schemas = conn.list_databases()
39
+
40
+ schema_task = progress.add_task(
41
+ f"[dim]{db_config.name}[/dim]",
42
+ total=len(schemas),
43
+ )
44
+
45
+ for schema in schemas:
46
+ try:
47
+ all_tables = conn.list_tables(database=schema)
48
+ except Exception:
49
+ progress.update(schema_task, advance=1)
50
+ continue
51
+
52
+ # Filter tables based on include/exclude patterns
53
+ tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
54
+
55
+ # Skip schema if no tables match
56
+ if not tables:
57
+ progress.update(schema_task, advance=1)
58
+ continue
59
+
60
+ schema_path = db_path / f"schema={schema}"
61
+ schema_path.mkdir(parents=True, exist_ok=True)
62
+ state.add_schema(schema)
63
+
64
+ table_task = progress.add_task(
65
+ f" [cyan]{schema}[/cyan]",
66
+ total=len(tables),
67
+ )
68
+
69
+ for table in tables:
70
+ table_path = schema_path / f"table={table}"
71
+ table_path.mkdir(parents=True, exist_ok=True)
72
+
73
+ for accessor in accessors:
74
+ content = accessor.generate(conn, schema, table)
75
+ output_file = table_path / accessor.filename
76
+ output_file.write_text(content)
77
+
78
+ state.add_table(schema, table)
79
+ progress.update(table_task, advance=1)
80
+
81
+ progress.update(schema_task, advance=1)
82
+
83
+ return state
@@ -0,0 +1,78 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_postgres(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync PostgreSQL database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+ db_path = base_path / "type=postgres" / f"database={db_config.database}"
28
+ state = DatabaseSyncState(db_path=db_path)
29
+
30
+ if db_config.schema_name:
31
+ schemas = [db_config.schema_name]
32
+ else:
33
+ schemas = conn.list_databases()
34
+
35
+ schema_task = progress.add_task(
36
+ f"[dim]{db_config.name}[/dim]",
37
+ total=len(schemas),
38
+ )
39
+
40
+ for schema in schemas:
41
+ try:
42
+ all_tables = conn.list_tables(database=schema)
43
+ except Exception:
44
+ progress.update(schema_task, advance=1)
45
+ continue
46
+
47
+ # Filter tables based on include/exclude patterns
48
+ tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
49
+
50
+ # Skip schema if no tables match
51
+ if not tables:
52
+ progress.update(schema_task, advance=1)
53
+ continue
54
+
55
+ schema_path = db_path / f"schema={schema}"
56
+ schema_path.mkdir(parents=True, exist_ok=True)
57
+ state.add_schema(schema)
58
+
59
+ table_task = progress.add_task(
60
+ f" [cyan]{schema}[/cyan]",
61
+ total=len(tables),
62
+ )
63
+
64
+ for table in tables:
65
+ table_path = schema_path / f"table={table}"
66
+ table_path.mkdir(parents=True, exist_ok=True)
67
+
68
+ for accessor in accessors:
69
+ content = accessor.generate(conn, schema, table)
70
+ output_file = table_path / accessor.filename
71
+ output_file.write_text(content)
72
+
73
+ state.add_table(schema, table)
74
+ progress.update(table_task, advance=1)
75
+
76
+ progress.update(schema_task, advance=1)
77
+
78
+ return state