nao-core 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nao_core/__init__.py +1 -1
- nao_core/bin/fastapi/main.py +6 -0
- nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
- nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
- nao_core/bin/migrations-postgres/meta/_journal.json +7 -0
- nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
- nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
- nao_core/bin/migrations-sqlite/meta/_journal.json +7 -0
- nao_core/bin/nao-chat-server +0 -0
- nao_core/bin/public/assets/{code-block-F6WJLWQG-z4zcca7w.js → code-block-F6WJLWQG-TAi8koem.js} +1 -1
- nao_core/bin/public/assets/index-BfHcd9Xz.css +1 -0
- nao_core/bin/public/assets/{index-DhhS7iVA.js → index-Mzo9bkag.js} +256 -172
- nao_core/bin/public/index.html +2 -2
- nao_core/commands/chat.py +11 -10
- nao_core/commands/init.py +27 -4
- nao_core/commands/sync/__init__.py +40 -21
- nao_core/commands/sync/accessors.py +218 -139
- nao_core/commands/sync/cleanup.py +133 -0
- nao_core/commands/sync/providers/__init__.py +30 -0
- nao_core/commands/sync/providers/base.py +87 -0
- nao_core/commands/sync/providers/databases/__init__.py +17 -0
- nao_core/commands/sync/providers/databases/bigquery.py +78 -0
- nao_core/commands/sync/providers/databases/databricks.py +79 -0
- nao_core/commands/sync/providers/databases/duckdb.py +83 -0
- nao_core/commands/sync/providers/databases/postgres.py +78 -0
- nao_core/commands/sync/providers/databases/provider.py +123 -0
- nao_core/commands/sync/providers/databases/snowflake.py +78 -0
- nao_core/commands/sync/providers/repositories/__init__.py +5 -0
- nao_core/commands/sync/{repositories.py → providers/repositories/provider.py} +43 -20
- nao_core/config/__init__.py +2 -0
- nao_core/config/base.py +23 -4
- nao_core/config/databases/__init__.py +5 -0
- nao_core/config/databases/base.py +1 -0
- nao_core/config/databases/postgres.py +78 -0
- nao_core/templates/__init__.py +12 -0
- nao_core/templates/defaults/databases/columns.md.j2 +23 -0
- nao_core/templates/defaults/databases/description.md.j2 +32 -0
- nao_core/templates/defaults/databases/preview.md.j2 +22 -0
- nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
- nao_core/templates/engine.py +133 -0
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/METADATA +6 -2
- nao_core-0.0.31.dist-info/RECORD +86 -0
- nao_core/bin/public/assets/index-ClduEZSo.css +0 -1
- nao_core/commands/sync/databases.py +0 -374
- nao_core-0.0.30.dist-info/RECORD +0 -65
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/WHEEL +0 -0
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/entry_points.txt +0 -0
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Cleanup utilities for removing stale sync files."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DatabaseSyncState:
|
|
14
|
+
"""Tracks the state of a database sync operation.
|
|
15
|
+
|
|
16
|
+
Used to track which paths were synced so stale paths can be cleaned up.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
db_path: Path
|
|
20
|
+
"""The root path for this database (e.g., databases/type=duckdb/database=mydb)"""
|
|
21
|
+
|
|
22
|
+
synced_schemas: set[str] = field(default_factory=set)
|
|
23
|
+
"""Set of schema names that were synced"""
|
|
24
|
+
|
|
25
|
+
synced_tables: dict[str, set[str]] = field(default_factory=dict)
|
|
26
|
+
"""Dict mapping schema names to sets of table names that were synced"""
|
|
27
|
+
|
|
28
|
+
schemas_synced: int = 0
|
|
29
|
+
"""Count of schemas synced"""
|
|
30
|
+
|
|
31
|
+
tables_synced: int = 0
|
|
32
|
+
"""Count of tables synced"""
|
|
33
|
+
|
|
34
|
+
def add_table(self, schema: str, table: str) -> None:
|
|
35
|
+
"""Record that a table was synced.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
schema: The schema/dataset name
|
|
39
|
+
table: The table name
|
|
40
|
+
"""
|
|
41
|
+
self.synced_schemas.add(schema)
|
|
42
|
+
if schema not in self.synced_tables:
|
|
43
|
+
self.synced_tables[schema] = set()
|
|
44
|
+
self.synced_tables[schema].add(table)
|
|
45
|
+
self.tables_synced += 1
|
|
46
|
+
|
|
47
|
+
def add_schema(self, schema: str) -> None:
|
|
48
|
+
"""Record that a schema was synced (even if empty).
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
schema: The schema/dataset name
|
|
52
|
+
"""
|
|
53
|
+
self.synced_schemas.add(schema)
|
|
54
|
+
self.schemas_synced += 1
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def cleanup_stale_paths(state: DatabaseSyncState, verbose: bool = False) -> int:
|
|
58
|
+
"""Remove directories that exist on disk but weren't synced.
|
|
59
|
+
|
|
60
|
+
This function cleans up:
|
|
61
|
+
- Table directories that no longer exist in the source
|
|
62
|
+
- Schema directories that no longer exist or have no tables
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
state: The sync state tracking what was synced
|
|
66
|
+
verbose: Whether to print cleanup messages
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Number of stale paths removed
|
|
70
|
+
"""
|
|
71
|
+
removed_count = 0
|
|
72
|
+
|
|
73
|
+
if not state.db_path.exists():
|
|
74
|
+
return 0
|
|
75
|
+
|
|
76
|
+
# Find all existing schema directories
|
|
77
|
+
existing_schemas = {
|
|
78
|
+
d.name.replace("schema=", ""): d for d in state.db_path.iterdir() if d.is_dir() and d.name.startswith("schema=")
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Remove schemas that weren't synced
|
|
82
|
+
for schema_name, schema_path in existing_schemas.items():
|
|
83
|
+
if schema_name not in state.synced_schemas:
|
|
84
|
+
if verbose:
|
|
85
|
+
console.print(f" [dim red]removing stale schema:[/dim red] {schema_name}")
|
|
86
|
+
shutil.rmtree(schema_path)
|
|
87
|
+
removed_count += 1
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
# Find existing tables in this schema
|
|
91
|
+
existing_tables = {
|
|
92
|
+
d.name.replace("table=", ""): d for d in schema_path.iterdir() if d.is_dir() and d.name.startswith("table=")
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
synced_tables_for_schema = state.synced_tables.get(schema_name, set())
|
|
96
|
+
|
|
97
|
+
# Remove tables that weren't synced
|
|
98
|
+
for table_name, table_path in existing_tables.items():
|
|
99
|
+
if table_name not in synced_tables_for_schema:
|
|
100
|
+
if verbose:
|
|
101
|
+
console.print(f" [dim red]removing stale table:[/dim red] {schema_name}.{table_name}")
|
|
102
|
+
shutil.rmtree(table_path)
|
|
103
|
+
removed_count += 1
|
|
104
|
+
|
|
105
|
+
return removed_count
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def cleanup_stale_database_types(base_path: Path, active_db_types: set[str], verbose: bool = False) -> int:
|
|
109
|
+
"""Remove database type directories that are no longer configured.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
base_path: The base databases output path
|
|
113
|
+
active_db_types: Set of database type directory names that should exist
|
|
114
|
+
(e.g., {'type=duckdb', 'type=postgres'})
|
|
115
|
+
verbose: Whether to print cleanup messages
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Number of stale database type directories removed
|
|
119
|
+
"""
|
|
120
|
+
removed_count = 0
|
|
121
|
+
|
|
122
|
+
if not base_path.exists():
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
for db_type_dir in base_path.iterdir():
|
|
126
|
+
if db_type_dir.is_dir() and db_type_dir.name.startswith("type="):
|
|
127
|
+
if db_type_dir.name not in active_db_types:
|
|
128
|
+
if verbose:
|
|
129
|
+
console.print(f" [dim red]removing stale database type:[/dim red] {db_type_dir.name}")
|
|
130
|
+
shutil.rmtree(db_type_dir)
|
|
131
|
+
removed_count += 1
|
|
132
|
+
|
|
133
|
+
return removed_count
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Sync providers for different resource types."""
|
|
2
|
+
|
|
3
|
+
from .base import SyncProvider, SyncResult
|
|
4
|
+
from .databases.provider import DatabaseSyncProvider
|
|
5
|
+
from .repositories.provider import RepositorySyncProvider
|
|
6
|
+
|
|
7
|
+
# Default providers in order of execution
|
|
8
|
+
DEFAULT_PROVIDERS: list[SyncProvider] = [
|
|
9
|
+
RepositorySyncProvider(),
|
|
10
|
+
DatabaseSyncProvider(),
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_all_providers() -> list[SyncProvider]:
|
|
15
|
+
"""Get all registered sync providers.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
List of sync provider instances
|
|
19
|
+
"""
|
|
20
|
+
return DEFAULT_PROVIDERS.copy()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"SyncProvider",
|
|
25
|
+
"SyncResult",
|
|
26
|
+
"DatabaseSyncProvider",
|
|
27
|
+
"RepositorySyncProvider",
|
|
28
|
+
"DEFAULT_PROVIDERS",
|
|
29
|
+
"get_all_providers",
|
|
30
|
+
]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Base class for sync providers."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from nao_core.config import NaoConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class SyncResult:
|
|
13
|
+
"""Result of a sync operation."""
|
|
14
|
+
|
|
15
|
+
provider_name: str
|
|
16
|
+
items_synced: int
|
|
17
|
+
details: dict[str, Any] | None = None
|
|
18
|
+
summary: str | None = None
|
|
19
|
+
|
|
20
|
+
def get_summary(self) -> str:
|
|
21
|
+
"""Get a human-readable summary of the sync result."""
|
|
22
|
+
if self.summary:
|
|
23
|
+
return self.summary
|
|
24
|
+
return f"{self.items_synced} synced"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SyncProvider(ABC):
|
|
28
|
+
"""Abstract base class for sync providers.
|
|
29
|
+
|
|
30
|
+
A sync provider is responsible for synchronizing a specific type of resource
|
|
31
|
+
(e.g., repositories, databases) from the nao configuration to local files.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def name(self) -> str:
|
|
37
|
+
"""Human-readable name for this provider (e.g., 'Repositories', 'Databases')."""
|
|
38
|
+
...
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def emoji(self) -> str:
|
|
43
|
+
"""Emoji icon for this provider."""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def default_output_dir(self) -> str:
|
|
49
|
+
"""Default output directory for this provider."""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def get_items(self, config: NaoConfig) -> list[Any]:
|
|
54
|
+
"""Extract items to sync from the configuration.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
config: The nao configuration
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of items to sync (e.g., repo configs, database configs)
|
|
61
|
+
"""
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
|
|
66
|
+
"""Sync the items to the output path.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
items: List of items to sync
|
|
70
|
+
output_path: Path where synced data should be written
|
|
71
|
+
project_path: Path to the nao project root (for template resolution)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
SyncResult with statistics about what was synced
|
|
75
|
+
"""
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
def should_sync(self, config: NaoConfig) -> bool:
|
|
79
|
+
"""Check if this provider has items to sync.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
config: The nao configuration
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
True if there are items to sync
|
|
86
|
+
"""
|
|
87
|
+
return len(self.get_items(config)) > 0
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Database syncing functionality for generating markdown documentation from database schemas."""
|
|
2
|
+
|
|
3
|
+
from .bigquery import sync_bigquery
|
|
4
|
+
from .databricks import sync_databricks
|
|
5
|
+
from .duckdb import sync_duckdb
|
|
6
|
+
from .postgres import sync_postgres
|
|
7
|
+
from .provider import DatabaseSyncProvider
|
|
8
|
+
from .snowflake import sync_snowflake
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"DatabaseSyncProvider",
|
|
12
|
+
"sync_bigquery",
|
|
13
|
+
"sync_databricks",
|
|
14
|
+
"sync_duckdb",
|
|
15
|
+
"sync_postgres",
|
|
16
|
+
"sync_snowflake",
|
|
17
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_bigquery(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync BigQuery database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
db_path = base_path / "type=bigquery" / f"database={db_config.project_id}"
|
|
28
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
29
|
+
|
|
30
|
+
if db_config.dataset_id:
|
|
31
|
+
datasets = [db_config.dataset_id]
|
|
32
|
+
else:
|
|
33
|
+
datasets = conn.list_databases()
|
|
34
|
+
|
|
35
|
+
dataset_task = progress.add_task(
|
|
36
|
+
f"[dim]{db_config.name}[/dim]",
|
|
37
|
+
total=len(datasets),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
for dataset in datasets:
|
|
41
|
+
try:
|
|
42
|
+
all_tables = conn.list_tables(database=dataset)
|
|
43
|
+
except Exception:
|
|
44
|
+
progress.update(dataset_task, advance=1)
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
# Filter tables based on include/exclude patterns
|
|
48
|
+
tables = [t for t in all_tables if db_config.matches_pattern(dataset, t)]
|
|
49
|
+
|
|
50
|
+
# Skip dataset if no tables match
|
|
51
|
+
if not tables:
|
|
52
|
+
progress.update(dataset_task, advance=1)
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
dataset_path = db_path / f"schema={dataset}"
|
|
56
|
+
dataset_path.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
state.add_schema(dataset)
|
|
58
|
+
|
|
59
|
+
table_task = progress.add_task(
|
|
60
|
+
f" [cyan]{dataset}[/cyan]",
|
|
61
|
+
total=len(tables),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
for table in tables:
|
|
65
|
+
table_path = dataset_path / f"table={table}"
|
|
66
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
|
|
68
|
+
for accessor in accessors:
|
|
69
|
+
content = accessor.generate(conn, dataset, table)
|
|
70
|
+
output_file = table_path / accessor.filename
|
|
71
|
+
output_file.write_text(content)
|
|
72
|
+
|
|
73
|
+
state.add_table(dataset, table)
|
|
74
|
+
progress.update(table_task, advance=1)
|
|
75
|
+
|
|
76
|
+
progress.update(dataset_task, advance=1)
|
|
77
|
+
|
|
78
|
+
return state
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_databricks(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync Databricks database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
catalog = db_config.catalog or "main"
|
|
28
|
+
db_path = base_path / "type=databricks" / f"database={catalog}"
|
|
29
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
30
|
+
|
|
31
|
+
if db_config.schema:
|
|
32
|
+
schemas = [db_config.schema]
|
|
33
|
+
else:
|
|
34
|
+
schemas = conn.list_databases()
|
|
35
|
+
|
|
36
|
+
schema_task = progress.add_task(
|
|
37
|
+
f"[dim]{db_config.name}[/dim]",
|
|
38
|
+
total=len(schemas),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
for schema in schemas:
|
|
42
|
+
try:
|
|
43
|
+
all_tables = conn.list_tables(database=schema)
|
|
44
|
+
except Exception:
|
|
45
|
+
progress.update(schema_task, advance=1)
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
# Filter tables based on include/exclude patterns
|
|
49
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
50
|
+
|
|
51
|
+
# Skip schema if no tables match
|
|
52
|
+
if not tables:
|
|
53
|
+
progress.update(schema_task, advance=1)
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
schema_path = db_path / f"schema={schema}"
|
|
57
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
state.add_schema(schema)
|
|
59
|
+
|
|
60
|
+
table_task = progress.add_task(
|
|
61
|
+
f" [cyan]{schema}[/cyan]",
|
|
62
|
+
total=len(tables),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for table in tables:
|
|
66
|
+
table_path = schema_path / f"table={table}"
|
|
67
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
|
|
69
|
+
for accessor in accessors:
|
|
70
|
+
content = accessor.generate(conn, schema, table)
|
|
71
|
+
output_file = table_path / accessor.filename
|
|
72
|
+
output_file.write_text(content)
|
|
73
|
+
|
|
74
|
+
state.add_table(schema, table)
|
|
75
|
+
progress.update(table_task, advance=1)
|
|
76
|
+
|
|
77
|
+
progress.update(schema_task, advance=1)
|
|
78
|
+
|
|
79
|
+
return state
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_duckdb(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync DuckDB database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
|
|
28
|
+
# Derive database name from path
|
|
29
|
+
if db_config.path == ":memory:":
|
|
30
|
+
db_name = "memory"
|
|
31
|
+
else:
|
|
32
|
+
db_name = Path(db_config.path).stem
|
|
33
|
+
|
|
34
|
+
db_path = base_path / "type=duckdb" / f"database={db_name}"
|
|
35
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
36
|
+
|
|
37
|
+
# List all schemas in DuckDB
|
|
38
|
+
schemas = conn.list_databases()
|
|
39
|
+
|
|
40
|
+
schema_task = progress.add_task(
|
|
41
|
+
f"[dim]{db_config.name}[/dim]",
|
|
42
|
+
total=len(schemas),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
for schema in schemas:
|
|
46
|
+
try:
|
|
47
|
+
all_tables = conn.list_tables(database=schema)
|
|
48
|
+
except Exception:
|
|
49
|
+
progress.update(schema_task, advance=1)
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
# Filter tables based on include/exclude patterns
|
|
53
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
54
|
+
|
|
55
|
+
# Skip schema if no tables match
|
|
56
|
+
if not tables:
|
|
57
|
+
progress.update(schema_task, advance=1)
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
schema_path = db_path / f"schema={schema}"
|
|
61
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
62
|
+
state.add_schema(schema)
|
|
63
|
+
|
|
64
|
+
table_task = progress.add_task(
|
|
65
|
+
f" [cyan]{schema}[/cyan]",
|
|
66
|
+
total=len(tables),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
for table in tables:
|
|
70
|
+
table_path = schema_path / f"table={table}"
|
|
71
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
for accessor in accessors:
|
|
74
|
+
content = accessor.generate(conn, schema, table)
|
|
75
|
+
output_file = table_path / accessor.filename
|
|
76
|
+
output_file.write_text(content)
|
|
77
|
+
|
|
78
|
+
state.add_table(schema, table)
|
|
79
|
+
progress.update(table_task, advance=1)
|
|
80
|
+
|
|
81
|
+
progress.update(schema_task, advance=1)
|
|
82
|
+
|
|
83
|
+
return state
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich.progress import Progress
|
|
4
|
+
|
|
5
|
+
from nao_core.commands.sync.accessors import DataAccessor
|
|
6
|
+
from nao_core.commands.sync.cleanup import DatabaseSyncState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sync_postgres(
|
|
10
|
+
db_config,
|
|
11
|
+
base_path: Path,
|
|
12
|
+
progress: Progress,
|
|
13
|
+
accessors: list[DataAccessor],
|
|
14
|
+
) -> DatabaseSyncState:
|
|
15
|
+
"""Sync PostgreSQL database schema to markdown files.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_config: The database configuration
|
|
19
|
+
base_path: Base output path
|
|
20
|
+
progress: Rich progress instance
|
|
21
|
+
accessors: List of data accessors to run
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
DatabaseSyncState with sync results and tracked paths
|
|
25
|
+
"""
|
|
26
|
+
conn = db_config.connect()
|
|
27
|
+
db_path = base_path / "type=postgres" / f"database={db_config.database}"
|
|
28
|
+
state = DatabaseSyncState(db_path=db_path)
|
|
29
|
+
|
|
30
|
+
if db_config.schema_name:
|
|
31
|
+
schemas = [db_config.schema_name]
|
|
32
|
+
else:
|
|
33
|
+
schemas = conn.list_databases()
|
|
34
|
+
|
|
35
|
+
schema_task = progress.add_task(
|
|
36
|
+
f"[dim]{db_config.name}[/dim]",
|
|
37
|
+
total=len(schemas),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
for schema in schemas:
|
|
41
|
+
try:
|
|
42
|
+
all_tables = conn.list_tables(database=schema)
|
|
43
|
+
except Exception:
|
|
44
|
+
progress.update(schema_task, advance=1)
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
# Filter tables based on include/exclude patterns
|
|
48
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
49
|
+
|
|
50
|
+
# Skip schema if no tables match
|
|
51
|
+
if not tables:
|
|
52
|
+
progress.update(schema_task, advance=1)
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
schema_path = db_path / f"schema={schema}"
|
|
56
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
state.add_schema(schema)
|
|
58
|
+
|
|
59
|
+
table_task = progress.add_task(
|
|
60
|
+
f" [cyan]{schema}[/cyan]",
|
|
61
|
+
total=len(tables),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
for table in tables:
|
|
65
|
+
table_path = schema_path / f"table={table}"
|
|
66
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
|
|
68
|
+
for accessor in accessors:
|
|
69
|
+
content = accessor.generate(conn, schema, table)
|
|
70
|
+
output_file = table_path / accessor.filename
|
|
71
|
+
output_file.write_text(content)
|
|
72
|
+
|
|
73
|
+
state.add_table(schema, table)
|
|
74
|
+
progress.update(table_task, advance=1)
|
|
75
|
+
|
|
76
|
+
progress.update(schema_task, advance=1)
|
|
77
|
+
|
|
78
|
+
return state
|