nao-core 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nao_core/__init__.py +1 -1
- nao_core/bin/fastapi/main.py +15 -2
- nao_core/bin/fastapi/test_main.py +156 -0
- nao_core/bin/migrations-postgres/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-postgres/meta/0004_snapshot.json +847 -0
- nao_core/bin/migrations-postgres/meta/_journal.json +7 -0
- nao_core/bin/migrations-sqlite/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-sqlite/meta/0004_snapshot.json +819 -0
- nao_core/bin/migrations-sqlite/meta/_journal.json +7 -0
- nao_core/bin/nao-chat-server +0 -0
- nao_core/bin/public/assets/{code-block-F6WJLWQG-EQr6mTlQ.js → code-block-F6WJLWQG-z4zcca7w.js} +3 -3
- nao_core/bin/public/assets/index-ClduEZSo.css +1 -0
- nao_core/bin/public/assets/index-DhhS7iVA.js +473 -0
- nao_core/bin/public/index.html +2 -2
- nao_core/commands/debug.py +10 -6
- nao_core/commands/init.py +39 -23
- nao_core/commands/sync/accessors.py +2 -3
- nao_core/commands/sync/databases.py +243 -1
- nao_core/config/__init__.py +14 -1
- nao_core/config/databases/__init__.py +32 -11
- nao_core/config/databases/base.py +6 -0
- nao_core/config/databases/bigquery.py +29 -1
- nao_core/config/databases/databricks.py +69 -0
- nao_core/config/databases/duckdb.py +33 -0
- nao_core/config/databases/snowflake.py +115 -0
- nao_core/config/exceptions.py +7 -0
- {nao_core-0.0.28.dist-info → nao_core-0.0.30.dist-info}/METADATA +5 -4
- {nao_core-0.0.28.dist-info → nao_core-0.0.30.dist-info}/RECORD +31 -35
- nao_core/bin/public/assets/_chat-layout-BTlqRUE5.js +0 -1
- nao_core/bin/public/assets/_chat-layout.index-DOARokp1.js +0 -1
- nao_core/bin/public/assets/agentProvider-C6dGIy-H.js +0 -1
- nao_core/bin/public/assets/button-By_1dzVx.js +0 -1
- nao_core/bin/public/assets/folder-DnRS5rg3.js +0 -1
- nao_core/bin/public/assets/index-CElAN2SH.css +0 -1
- nao_core/bin/public/assets/index-ZTHASguQ.js +0 -59
- nao_core/bin/public/assets/input-CUQA5tsi.js +0 -1
- nao_core/bin/public/assets/login-BUQDum3t.js +0 -1
- nao_core/bin/public/assets/mermaid-FSSLJTFX-Dc6ZvCPw.js +0 -427
- nao_core/bin/public/assets/sidebar-bgEk7Xg8.js +0 -1
- nao_core/bin/public/assets/signinForm-CGAhnAkv.js +0 -1
- nao_core/bin/public/assets/signup-D2n11La3.js +0 -1
- nao_core/bin/public/assets/user-CYl8Tly2.js +0 -1
- nao_core/bin/public/assets/utils-DzJYey0s.js +0 -1
- {nao_core-0.0.28.dist-info → nao_core-0.0.30.dist-info}/WHEEL +0 -0
- {nao_core-0.0.28.dist-info → nao_core-0.0.30.dist-info}/entry_points.txt +0 -0
- {nao_core-0.0.28.dist-info → nao_core-0.0.30.dist-info}/licenses/LICENSE +0 -0
nao_core/bin/public/index.html
CHANGED
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
<link rel="apple-touch-icon" href="/logo192.png" />
|
|
10
10
|
<link rel="manifest" href="/manifest.json" />
|
|
11
11
|
<title>nao — Chat with your data</title>
|
|
12
|
-
<script type="module" crossorigin src="/assets/index-
|
|
13
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
12
|
+
<script type="module" crossorigin src="/assets/index-DhhS7iVA.js"></script>
|
|
13
|
+
<link rel="stylesheet" crossorigin href="/assets/index-ClduEZSo.css">
|
|
14
14
|
</head>
|
|
15
15
|
<body>
|
|
16
16
|
<div id="app"></div>
|
nao_core/commands/debug.py
CHANGED
|
@@ -4,11 +4,12 @@ from rich.console import Console
|
|
|
4
4
|
from rich.table import Table
|
|
5
5
|
|
|
6
6
|
from nao_core.config import NaoConfig
|
|
7
|
+
from nao_core.config.databases import AnyDatabaseConfig
|
|
7
8
|
|
|
8
9
|
console = Console()
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
def test_database_connection(db_config) -> tuple[bool, str]:
|
|
12
|
+
def test_database_connection(db_config: AnyDatabaseConfig) -> tuple[bool, str]:
|
|
12
13
|
"""Test connectivity to a database.
|
|
13
14
|
|
|
14
15
|
Returns:
|
|
@@ -17,16 +18,19 @@ def test_database_connection(db_config) -> tuple[bool, str]:
|
|
|
17
18
|
try:
|
|
18
19
|
conn = db_config.connect()
|
|
19
20
|
# Run a simple query to verify the connection works
|
|
20
|
-
if db_config.dataset_id:
|
|
21
|
+
if hasattr(db_config, "dataset_id") and db_config.dataset_id:
|
|
21
22
|
# If dataset is specified, list tables in that dataset
|
|
22
23
|
tables = conn.list_tables()
|
|
23
24
|
table_count = len(tables)
|
|
24
25
|
return True, f"Connected successfully ({table_count} tables found)"
|
|
26
|
+
elif list_databases := getattr(conn, "list_databases", None):
|
|
27
|
+
# If no dataset, list schemas in the database instead
|
|
28
|
+
schemas = list_databases()
|
|
29
|
+
schema_count = len(schemas)
|
|
30
|
+
return True, f"Connected successfully ({schema_count} schemas found)"
|
|
25
31
|
else:
|
|
26
|
-
#
|
|
27
|
-
datasets
|
|
28
|
-
dataset_count = len(datasets)
|
|
29
|
-
return True, f"Connected successfully ({dataset_count} datasets found)"
|
|
32
|
+
# Fallback for backends that don't support list_tables and list_databases
|
|
33
|
+
return True, "Connected but unable to list neither datasets nor schemas"
|
|
30
34
|
except Exception as e:
|
|
31
35
|
return False, str(e)
|
|
32
36
|
|
nao_core/commands/init.py
CHANGED
|
@@ -7,18 +7,24 @@ from rich.console import Console
|
|
|
7
7
|
from rich.panel import Panel
|
|
8
8
|
from rich.prompt import Confirm, Prompt
|
|
9
9
|
|
|
10
|
-
from nao_core.config import
|
|
10
|
+
from nao_core.config import (
|
|
11
|
+
AnyDatabaseConfig,
|
|
12
|
+
BigQueryConfig,
|
|
13
|
+
DatabaseType,
|
|
14
|
+
DatabricksConfig,
|
|
15
|
+
DuckDBConfig,
|
|
16
|
+
LLMConfig,
|
|
17
|
+
LLMProvider,
|
|
18
|
+
NaoConfig,
|
|
19
|
+
SlackConfig,
|
|
20
|
+
SnowflakeConfig,
|
|
21
|
+
)
|
|
22
|
+
from nao_core.config.exceptions import InitError
|
|
11
23
|
from nao_core.config.repos import RepoConfig
|
|
12
24
|
|
|
13
25
|
console = Console()
|
|
14
26
|
|
|
15
27
|
|
|
16
|
-
class InitError(Exception):
|
|
17
|
-
"""Base exception for init command errors."""
|
|
18
|
-
|
|
19
|
-
pass
|
|
20
|
-
|
|
21
|
-
|
|
22
28
|
class EmptyProjectNameError(InitError):
|
|
23
29
|
"""Raised when project name is empty."""
|
|
24
30
|
|
|
@@ -77,27 +83,22 @@ def setup_project_name(force: bool = False) -> tuple[str, Path]:
|
|
|
77
83
|
|
|
78
84
|
def setup_bigquery() -> BigQueryConfig:
|
|
79
85
|
"""Setup a BigQuery database configuration."""
|
|
80
|
-
|
|
86
|
+
return BigQueryConfig.promptConfig()
|
|
87
|
+
|
|
81
88
|
|
|
82
|
-
|
|
89
|
+
def setup_duckdb() -> DuckDBConfig:
|
|
90
|
+
"""Setup a DuckDB database configuration."""
|
|
91
|
+
return DuckDBConfig.promptConfig()
|
|
83
92
|
|
|
84
|
-
project_id = Prompt.ask("[bold]GCP Project ID[/bold]")
|
|
85
|
-
if not project_id:
|
|
86
|
-
raise InitError("GCP Project ID cannot be empty.")
|
|
87
93
|
|
|
88
|
-
|
|
94
|
+
def setup_databricks() -> DatabricksConfig:
|
|
95
|
+
"""Setup a Databricks database configuration."""
|
|
96
|
+
return DatabricksConfig.promptConfig()
|
|
89
97
|
|
|
90
|
-
credentials_path = Prompt.ask(
|
|
91
|
-
"[bold]Service account JSON path[/bold] [dim](optional, uses ADC if empty)[/dim]",
|
|
92
|
-
default="",
|
|
93
|
-
)
|
|
94
98
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
dataset_id=dataset_id or None,
|
|
99
|
-
credentials_path=credentials_path or None,
|
|
100
|
-
)
|
|
99
|
+
def setup_snowflake() -> SnowflakeConfig:
|
|
100
|
+
"""Setup a Snowflake database configuration."""
|
|
101
|
+
return SnowflakeConfig.promptConfig()
|
|
101
102
|
|
|
102
103
|
|
|
103
104
|
def setup_databases() -> list[AnyDatabaseConfig]:
|
|
@@ -124,6 +125,21 @@ def setup_databases() -> list[AnyDatabaseConfig]:
|
|
|
124
125
|
databases.append(db_config)
|
|
125
126
|
console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
|
|
126
127
|
|
|
128
|
+
elif db_type == DatabaseType.DUCKDB.value:
|
|
129
|
+
db_config = setup_duckdb()
|
|
130
|
+
databases.append(db_config)
|
|
131
|
+
console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
|
|
132
|
+
|
|
133
|
+
elif db_type == DatabaseType.DATABRICKS.value:
|
|
134
|
+
db_config = setup_databricks()
|
|
135
|
+
databases.append(db_config)
|
|
136
|
+
console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
|
|
137
|
+
|
|
138
|
+
elif db_type == DatabaseType.SNOWFLAKE.value:
|
|
139
|
+
db_config = setup_snowflake()
|
|
140
|
+
databases.append(db_config)
|
|
141
|
+
console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
|
|
142
|
+
|
|
127
143
|
add_another = Confirm.ask("\n[bold]Add another database?[/bold]", default=False)
|
|
128
144
|
if not add_another:
|
|
129
145
|
break
|
|
@@ -31,8 +31,7 @@ class DataAccessor(ABC):
|
|
|
31
31
|
|
|
32
32
|
def get_table(self, conn: BaseBackend, dataset: str, table: str):
|
|
33
33
|
"""Helper to get an Ibis table reference."""
|
|
34
|
-
|
|
35
|
-
return conn.table(full_table_name)
|
|
34
|
+
return conn.table(table, database=dataset)
|
|
36
35
|
|
|
37
36
|
|
|
38
37
|
def truncate_middle(text: str, max_length: int) -> str:
|
|
@@ -57,7 +56,6 @@ class ColumnsAccessor(DataAccessor):
|
|
|
57
56
|
try:
|
|
58
57
|
t = self.get_table(conn, dataset, table)
|
|
59
58
|
schema = t.schema()
|
|
60
|
-
|
|
61
59
|
columns = list(schema.items())
|
|
62
60
|
|
|
63
61
|
lines = [
|
|
@@ -79,6 +77,7 @@ class ColumnsAccessor(DataAccessor):
|
|
|
79
77
|
|
|
80
78
|
return "\n".join(lines)
|
|
81
79
|
except Exception as e:
|
|
80
|
+
print(e)
|
|
82
81
|
return f"# {table}\n\nError fetching schema: {e}"
|
|
83
82
|
|
|
84
83
|
|
|
@@ -85,6 +85,234 @@ def sync_bigquery(
|
|
|
85
85
|
return datasets_synced, tables_synced
|
|
86
86
|
|
|
87
87
|
|
|
88
|
+
def sync_duckdb(
|
|
89
|
+
db_config,
|
|
90
|
+
base_path: Path,
|
|
91
|
+
progress: Progress,
|
|
92
|
+
accessors: list[DataAccessor],
|
|
93
|
+
) -> tuple[int, int]:
|
|
94
|
+
"""Sync DuckDB database schema to markdown files.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
db_config: The database configuration
|
|
98
|
+
base_path: Base output path
|
|
99
|
+
progress: Rich progress instance
|
|
100
|
+
accessors: List of data accessors to run
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Tuple of (schemas_synced, tables_synced)
|
|
104
|
+
"""
|
|
105
|
+
conn = db_config.connect()
|
|
106
|
+
|
|
107
|
+
# Derive database name from path
|
|
108
|
+
if db_config.path == ":memory:":
|
|
109
|
+
db_name = "memory"
|
|
110
|
+
else:
|
|
111
|
+
db_name = Path(db_config.path).stem
|
|
112
|
+
|
|
113
|
+
db_path = base_path / "type=duckdb" / f"database={db_name}"
|
|
114
|
+
|
|
115
|
+
schemas_synced = 0
|
|
116
|
+
tables_synced = 0
|
|
117
|
+
|
|
118
|
+
# List all schemas in DuckDB
|
|
119
|
+
schemas = conn.list_databases()
|
|
120
|
+
|
|
121
|
+
schema_task = progress.add_task(
|
|
122
|
+
f"[dim]{db_config.name}[/dim]",
|
|
123
|
+
total=len(schemas),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
for schema in schemas:
|
|
127
|
+
try:
|
|
128
|
+
all_tables = conn.list_tables(database=schema)
|
|
129
|
+
except Exception:
|
|
130
|
+
progress.update(schema_task, advance=1)
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
# Filter tables based on include/exclude patterns
|
|
134
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
135
|
+
|
|
136
|
+
# Skip schema if no tables match
|
|
137
|
+
if not tables:
|
|
138
|
+
progress.update(schema_task, advance=1)
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
schema_path = db_path / f"schema={schema}"
|
|
142
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
143
|
+
schemas_synced += 1
|
|
144
|
+
|
|
145
|
+
table_task = progress.add_task(
|
|
146
|
+
f" [cyan]{schema}[/cyan]",
|
|
147
|
+
total=len(tables),
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
for table in tables:
|
|
151
|
+
table_path = schema_path / f"table={table}"
|
|
152
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
153
|
+
|
|
154
|
+
for accessor in accessors:
|
|
155
|
+
content = accessor.generate(conn, schema, table)
|
|
156
|
+
output_file = table_path / accessor.filename
|
|
157
|
+
output_file.write_text(content)
|
|
158
|
+
|
|
159
|
+
tables_synced += 1
|
|
160
|
+
progress.update(table_task, advance=1)
|
|
161
|
+
|
|
162
|
+
progress.update(schema_task, advance=1)
|
|
163
|
+
|
|
164
|
+
return schemas_synced, tables_synced
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def sync_databricks(
|
|
168
|
+
db_config,
|
|
169
|
+
base_path: Path,
|
|
170
|
+
progress: Progress,
|
|
171
|
+
accessors: list[DataAccessor],
|
|
172
|
+
) -> tuple[int, int]:
|
|
173
|
+
"""Sync Databricks database schema to markdown files.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
db_config: The database configuration
|
|
177
|
+
base_path: Base output path
|
|
178
|
+
progress: Rich progress instance
|
|
179
|
+
accessors: List of data accessors to run
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Tuple of (schemas_synced, tables_synced)
|
|
183
|
+
"""
|
|
184
|
+
conn = db_config.connect()
|
|
185
|
+
catalog = db_config.catalog or "main"
|
|
186
|
+
db_path = base_path / "type=databricks" / f"database={catalog}"
|
|
187
|
+
|
|
188
|
+
schemas_synced = 0
|
|
189
|
+
tables_synced = 0
|
|
190
|
+
|
|
191
|
+
if db_config.schema:
|
|
192
|
+
schemas = [db_config.schema]
|
|
193
|
+
else:
|
|
194
|
+
schemas = conn.list_databases()
|
|
195
|
+
|
|
196
|
+
schema_task = progress.add_task(
|
|
197
|
+
f"[dim]{db_config.name}[/dim]",
|
|
198
|
+
total=len(schemas),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
for schema in schemas:
|
|
202
|
+
try:
|
|
203
|
+
all_tables = conn.list_tables(database=schema)
|
|
204
|
+
except Exception:
|
|
205
|
+
progress.update(schema_task, advance=1)
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
# Filter tables based on include/exclude patterns
|
|
209
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
210
|
+
|
|
211
|
+
# Skip schema if no tables match
|
|
212
|
+
if not tables:
|
|
213
|
+
progress.update(schema_task, advance=1)
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
schema_path = db_path / f"schema={schema}"
|
|
217
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
218
|
+
schemas_synced += 1
|
|
219
|
+
|
|
220
|
+
table_task = progress.add_task(
|
|
221
|
+
f" [cyan]{schema}[/cyan]",
|
|
222
|
+
total=len(tables),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
for table in tables:
|
|
226
|
+
table_path = schema_path / f"table={table}"
|
|
227
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
228
|
+
|
|
229
|
+
for accessor in accessors:
|
|
230
|
+
content = accessor.generate(conn, schema, table)
|
|
231
|
+
output_file = table_path / accessor.filename
|
|
232
|
+
output_file.write_text(content)
|
|
233
|
+
|
|
234
|
+
tables_synced += 1
|
|
235
|
+
progress.update(table_task, advance=1)
|
|
236
|
+
|
|
237
|
+
progress.update(schema_task, advance=1)
|
|
238
|
+
|
|
239
|
+
return schemas_synced, tables_synced
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def sync_snowflake(
|
|
243
|
+
db_config,
|
|
244
|
+
base_path: Path,
|
|
245
|
+
progress: Progress,
|
|
246
|
+
accessors: list[DataAccessor],
|
|
247
|
+
) -> tuple[int, int]:
|
|
248
|
+
"""Sync Snowflake database schema to markdown files.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
db_config: The database configuration
|
|
252
|
+
base_path: Base output path
|
|
253
|
+
progress: Rich progress instance
|
|
254
|
+
accessors: List of data accessors to run
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Tuple of (schemas_synced, tables_synced)
|
|
258
|
+
"""
|
|
259
|
+
conn = db_config.connect()
|
|
260
|
+
db_path = base_path / "type=snowflake" / f"database={db_config.database}"
|
|
261
|
+
|
|
262
|
+
schemas_synced = 0
|
|
263
|
+
tables_synced = 0
|
|
264
|
+
|
|
265
|
+
if db_config.schema:
|
|
266
|
+
schemas = [db_config.schema]
|
|
267
|
+
else:
|
|
268
|
+
schemas = conn.list_databases()
|
|
269
|
+
|
|
270
|
+
schema_task = progress.add_task(
|
|
271
|
+
f"[dim]{db_config.name}[/dim]",
|
|
272
|
+
total=len(schemas),
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
for schema in schemas:
|
|
276
|
+
try:
|
|
277
|
+
all_tables = conn.list_tables(database=schema)
|
|
278
|
+
except Exception:
|
|
279
|
+
progress.update(schema_task, advance=1)
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
# Filter tables based on include/exclude patterns
|
|
283
|
+
tables = [t for t in all_tables if db_config.matches_pattern(schema, t)]
|
|
284
|
+
|
|
285
|
+
# Skip schema if no tables match
|
|
286
|
+
if not tables:
|
|
287
|
+
progress.update(schema_task, advance=1)
|
|
288
|
+
continue
|
|
289
|
+
|
|
290
|
+
schema_path = db_path / f"schema={schema}"
|
|
291
|
+
schema_path.mkdir(parents=True, exist_ok=True)
|
|
292
|
+
schemas_synced += 1
|
|
293
|
+
|
|
294
|
+
table_task = progress.add_task(
|
|
295
|
+
f" [cyan]{schema}[/cyan]",
|
|
296
|
+
total=len(tables),
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
for table in tables:
|
|
300
|
+
table_path = schema_path / f"table={table}"
|
|
301
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
302
|
+
|
|
303
|
+
for accessor in accessors:
|
|
304
|
+
content = accessor.generate(conn, schema, table)
|
|
305
|
+
output_file = table_path / accessor.filename
|
|
306
|
+
output_file.write_text(content)
|
|
307
|
+
|
|
308
|
+
tables_synced += 1
|
|
309
|
+
progress.update(table_task, advance=1)
|
|
310
|
+
|
|
311
|
+
progress.update(schema_task, advance=1)
|
|
312
|
+
|
|
313
|
+
return schemas_synced, tables_synced
|
|
314
|
+
|
|
315
|
+
|
|
88
316
|
def sync_databases(databases: list, base_path: Path) -> tuple[int, int]:
|
|
89
317
|
"""Sync all configured databases.
|
|
90
318
|
|
|
@@ -119,11 +347,25 @@ def sync_databases(databases: list, base_path: Path) -> tuple[int, int]:
|
|
|
119
347
|
accessor_names = [a.filename.replace(".md", "") for a in db_accessors]
|
|
120
348
|
|
|
121
349
|
try:
|
|
350
|
+
console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
|
|
122
351
|
if db.type == "bigquery":
|
|
123
|
-
console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
|
|
124
352
|
datasets, tables = sync_bigquery(db, base_path, progress, db_accessors)
|
|
125
353
|
total_datasets += datasets
|
|
126
354
|
total_tables += tables
|
|
355
|
+
elif db.type == "duckdb":
|
|
356
|
+
schemas, tables = sync_duckdb(db, base_path, progress, db_accessors)
|
|
357
|
+
total_datasets += schemas
|
|
358
|
+
total_tables += tables
|
|
359
|
+
elif db.type == "databricks":
|
|
360
|
+
console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
|
|
361
|
+
schemas, tables = sync_databricks(db, base_path, progress, db_accessors)
|
|
362
|
+
total_datasets += schemas
|
|
363
|
+
total_tables += tables
|
|
364
|
+
elif db.type == "snowflake":
|
|
365
|
+
console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
|
|
366
|
+
schemas, tables = sync_snowflake(db, base_path, progress, db_accessors)
|
|
367
|
+
total_datasets += schemas
|
|
368
|
+
total_tables += tables
|
|
127
369
|
else:
|
|
128
370
|
console.print(f"[yellow]⚠ Unsupported database type: {db.type}[/yellow]")
|
|
129
371
|
except Exception as e:
|
nao_core/config/__init__.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
from .base import NaoConfig
|
|
2
|
-
from .databases import
|
|
2
|
+
from .databases import (
|
|
3
|
+
AccessorType,
|
|
4
|
+
AnyDatabaseConfig,
|
|
5
|
+
BigQueryConfig,
|
|
6
|
+
DatabaseType,
|
|
7
|
+
DatabricksConfig,
|
|
8
|
+
DuckDBConfig,
|
|
9
|
+
SnowflakeConfig,
|
|
10
|
+
)
|
|
11
|
+
from .exceptions import InitError
|
|
3
12
|
from .llm import LLMConfig, LLMProvider
|
|
4
13
|
from .slack import SlackConfig
|
|
5
14
|
|
|
@@ -8,8 +17,12 @@ __all__ = [
|
|
|
8
17
|
"AccessorType",
|
|
9
18
|
"AnyDatabaseConfig",
|
|
10
19
|
"BigQueryConfig",
|
|
20
|
+
"DuckDBConfig",
|
|
21
|
+
"DatabricksConfig",
|
|
22
|
+
"SnowflakeConfig",
|
|
11
23
|
"DatabaseType",
|
|
12
24
|
"LLMConfig",
|
|
13
25
|
"LLMProvider",
|
|
14
26
|
"SlackConfig",
|
|
27
|
+
"InitError",
|
|
15
28
|
]
|
|
@@ -1,20 +1,26 @@
|
|
|
1
|
+
from typing import Annotated, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import Discriminator, Tag
|
|
4
|
+
|
|
1
5
|
from .base import AccessorType, DatabaseConfig, DatabaseType
|
|
2
6
|
from .bigquery import BigQueryConfig
|
|
7
|
+
from .databricks import DatabricksConfig
|
|
8
|
+
from .duckdb import DuckDBConfig
|
|
9
|
+
from .snowflake import SnowflakeConfig
|
|
3
10
|
|
|
4
11
|
# =============================================================================
|
|
5
12
|
# Database Config Registry
|
|
6
13
|
# =============================================================================
|
|
7
14
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
AnyDatabaseConfig = BigQueryConfig
|
|
15
|
+
AnyDatabaseConfig = Annotated[
|
|
16
|
+
Union[
|
|
17
|
+
Annotated[BigQueryConfig, Tag("bigquery")],
|
|
18
|
+
Annotated[DatabricksConfig, Tag("databricks")],
|
|
19
|
+
Annotated[SnowflakeConfig, Tag("snowflake")],
|
|
20
|
+
Annotated[DuckDBConfig, Tag("duckdb")],
|
|
21
|
+
],
|
|
22
|
+
Discriminator("type"),
|
|
23
|
+
]
|
|
18
24
|
|
|
19
25
|
|
|
20
26
|
def parse_database_config(data: dict) -> DatabaseConfig:
|
|
@@ -22,8 +28,23 @@ def parse_database_config(data: dict) -> DatabaseConfig:
|
|
|
22
28
|
db_type = data.get("type")
|
|
23
29
|
if db_type == "bigquery":
|
|
24
30
|
return BigQueryConfig.model_validate(data)
|
|
31
|
+
elif db_type == "duckdb":
|
|
32
|
+
return DuckDBConfig.model_validate(data)
|
|
33
|
+
elif db_type == "databricks":
|
|
34
|
+
return DatabricksConfig.model_validate(data)
|
|
35
|
+
elif db_type == "snowflake":
|
|
36
|
+
return SnowflakeConfig.model_validate(data)
|
|
25
37
|
else:
|
|
26
38
|
raise ValueError(f"Unknown database type: {db_type}")
|
|
27
39
|
|
|
28
40
|
|
|
29
|
-
__all__ = [
|
|
41
|
+
__all__ = [
|
|
42
|
+
"AccessorType",
|
|
43
|
+
"AnyDatabaseConfig",
|
|
44
|
+
"BigQueryConfig",
|
|
45
|
+
"DuckDBConfig",
|
|
46
|
+
"DatabaseConfig",
|
|
47
|
+
"DatabaseType",
|
|
48
|
+
"DatabricksConfig",
|
|
49
|
+
"SnowflakeConfig",
|
|
50
|
+
]
|
|
@@ -4,12 +4,18 @@ from enum import Enum
|
|
|
4
4
|
|
|
5
5
|
from ibis import BaseBackend
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
class DatabaseType(str, Enum):
|
|
10
13
|
"""Supported database types."""
|
|
11
14
|
|
|
12
15
|
BIGQUERY = "bigquery"
|
|
16
|
+
DUCKDB = "duckdb"
|
|
17
|
+
DATABRICKS = "databricks"
|
|
18
|
+
SNOWFLAKE = "snowflake"
|
|
13
19
|
|
|
14
20
|
|
|
15
21
|
class AccessorType(str, Enum):
|
|
@@ -3,8 +3,11 @@ from typing import Literal
|
|
|
3
3
|
import ibis
|
|
4
4
|
from ibis import BaseBackend
|
|
5
5
|
from pydantic import Field
|
|
6
|
+
from rich.prompt import Prompt
|
|
6
7
|
|
|
7
|
-
from .
|
|
8
|
+
from nao_core.config.exceptions import InitError
|
|
9
|
+
|
|
10
|
+
from .base import DatabaseConfig, console
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class BigQueryConfig(DatabaseConfig):
|
|
@@ -20,6 +23,31 @@ class BigQueryConfig(DatabaseConfig):
|
|
|
20
23
|
sso: bool = Field(default=False, description="Use Single Sign-On (SSO) for authentication")
|
|
21
24
|
location: str | None = Field(default=None, description="BigQuery location")
|
|
22
25
|
|
|
26
|
+
@classmethod
|
|
27
|
+
def promptConfig(cls) -> "BigQueryConfig":
|
|
28
|
+
"""Interactively prompt the user for BigQuery configuration."""
|
|
29
|
+
console.print("\n[bold cyan]BigQuery Configuration[/bold cyan]\n")
|
|
30
|
+
|
|
31
|
+
name = Prompt.ask("[bold]Connection name[/bold]", default="bigquery-prod")
|
|
32
|
+
|
|
33
|
+
project_id = Prompt.ask("[bold]GCP Project ID[/bold]")
|
|
34
|
+
if not project_id:
|
|
35
|
+
raise InitError("GCP Project ID cannot be empty.")
|
|
36
|
+
|
|
37
|
+
dataset_id = Prompt.ask("[bold]Default dataset[/bold] [dim](optional, press Enter to skip)[/dim]", default="")
|
|
38
|
+
|
|
39
|
+
credentials_path = Prompt.ask(
|
|
40
|
+
"[bold]Service account JSON path[/bold] [dim](optional, uses ADC if empty)[/dim]",
|
|
41
|
+
default="",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return BigQueryConfig(
|
|
45
|
+
name=name,
|
|
46
|
+
project_id=project_id,
|
|
47
|
+
dataset_id=dataset_id or None,
|
|
48
|
+
credentials_path=credentials_path or None,
|
|
49
|
+
)
|
|
50
|
+
|
|
23
51
|
def connect(self) -> BaseBackend:
|
|
24
52
|
"""Create an Ibis BigQuery connection."""
|
|
25
53
|
kwargs: dict = {"project_id": self.project_id}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
import ibis
|
|
4
|
+
from ibis import BaseBackend
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
from rich.prompt import Prompt
|
|
7
|
+
|
|
8
|
+
from nao_core.config.exceptions import InitError
|
|
9
|
+
|
|
10
|
+
from .base import DatabaseConfig, console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DatabricksConfig(DatabaseConfig):
|
|
14
|
+
"""Databricks-specific configuration."""
|
|
15
|
+
|
|
16
|
+
type: Literal["databricks"] = "databricks"
|
|
17
|
+
server_hostname: str = Field(description="Databricks server hostname (e.g., 'adb-xxxx.azuredatabricks.net')")
|
|
18
|
+
http_path: str = Field(description="HTTP path to the SQL warehouse or cluster")
|
|
19
|
+
access_token: str = Field(description="Databricks personal access token")
|
|
20
|
+
catalog: str | None = Field(default=None, description="Unity Catalog name (optional)")
|
|
21
|
+
schema: str | None = Field(default=None, description="Default schema (optional)")
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def promptConfig(cls) -> "DatabricksConfig":
|
|
25
|
+
"""Interactively prompt the user for Databricks configuration."""
|
|
26
|
+
console.print("\n[bold cyan]Databricks Configuration[/bold cyan]\n")
|
|
27
|
+
|
|
28
|
+
name = Prompt.ask("[bold]Connection name[/bold]", default="databricks-prod")
|
|
29
|
+
|
|
30
|
+
server_hostname = Prompt.ask("[bold]Server hostname[/bold] [dim](e.g., adb-xxxx.azuredatabricks.net)[/dim]")
|
|
31
|
+
if not server_hostname:
|
|
32
|
+
raise InitError("Server hostname cannot be empty.")
|
|
33
|
+
|
|
34
|
+
http_path = Prompt.ask("[bold]HTTP path[/bold] [dim](e.g., /sql/1.0/warehouses/xxxx)[/dim]")
|
|
35
|
+
if not http_path:
|
|
36
|
+
raise InitError("HTTP path cannot be empty.")
|
|
37
|
+
|
|
38
|
+
access_token = Prompt.ask("[bold]Access token[/bold]", password=True)
|
|
39
|
+
if not access_token:
|
|
40
|
+
raise InitError("Access token cannot be empty.")
|
|
41
|
+
|
|
42
|
+
catalog = Prompt.ask("[bold]Catalog[/bold] [dim](optional, press Enter to skip)[/dim]", default=None)
|
|
43
|
+
|
|
44
|
+
schema = Prompt.ask("[bold]Default schema[/bold] [dim](optional, press Enter to skip)[/dim]", default=None)
|
|
45
|
+
|
|
46
|
+
return DatabricksConfig(
|
|
47
|
+
name=name,
|
|
48
|
+
server_hostname=server_hostname,
|
|
49
|
+
http_path=http_path,
|
|
50
|
+
access_token=access_token,
|
|
51
|
+
catalog=catalog,
|
|
52
|
+
schema=schema,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def connect(self) -> BaseBackend:
|
|
56
|
+
"""Create an Ibis Databricks connection."""
|
|
57
|
+
kwargs: dict = {
|
|
58
|
+
"server_hostname": self.server_hostname,
|
|
59
|
+
"http_path": self.http_path,
|
|
60
|
+
"access_token": self.access_token,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if self.catalog:
|
|
64
|
+
kwargs["catalog"] = self.catalog
|
|
65
|
+
|
|
66
|
+
if self.schema:
|
|
67
|
+
kwargs["schema"] = self.schema
|
|
68
|
+
|
|
69
|
+
return ibis.databricks.connect(**kwargs)
|