nao-core 0.0.12__py3-none-any.whl → 0.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nao_core/__init__.py +1 -1
- nao_core/bin/db.sqlite +0 -0
- nao_core/bin/fastapi/main.py +102 -0
- nao_core/bin/public/assets/_chatId-z5gRlor1.js +1 -0
- nao_core/bin/public/assets/chat-messages-DUR3D342.js +1 -0
- nao_core/bin/public/assets/index-BDlcD_HE.js +1 -0
- nao_core/bin/public/assets/index-Bc7icYyJ.css +1 -0
- nao_core/bin/public/assets/index-CGg3ZQH6.js +49 -0
- nao_core/bin/public/assets/{login-CGCfd7iQ.js → login-D87n9R5V.js} +1 -1
- nao_core/bin/public/assets/signinForm-9PY1Lvqj.js +1 -0
- nao_core/bin/public/assets/{signup-BGjbIX9B.js → signup-B7NC1g08.js} +1 -1
- nao_core/bin/public/favicon.ico +0 -0
- nao_core/bin/public/index.html +3 -3
- nao_core/commands/chat.py +67 -25
- nao_core/commands/debug.py +0 -4
- nao_core/commands/init.py +3 -3
- nao_core/commands/sync.py +273 -44
- nao_core/config/__init__.py +13 -0
- nao_core/{config.py → config/base.py} +4 -66
- nao_core/config/databases/__init__.py +29 -0
- nao_core/config/databases/base.py +72 -0
- nao_core/config/databases/bigquery.py +42 -0
- nao_core/config/llm/__init__.py +16 -0
- {nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/METADATA +3 -1
- nao_core-0.0.15.dist-info/RECORD +39 -0
- nao_core/bin/public/assets/index-BUcR0FCx.css +0 -1
- nao_core/bin/public/assets/index-DDQ8i103.js +0 -14
- nao_core/bin/public/assets/index-nOBqrovO.js +0 -36
- nao_core/bin/public/assets/signinForm-BGrBZeLW.js +0 -1
- nao_core-0.0.12.dist-info/RECORD +0 -31
- {nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/WHEEL +0 -0
- {nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/entry_points.txt +0 -0
- {nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/licenses/LICENSE +0 -0
nao_core/commands/sync.py
CHANGED
|
@@ -1,46 +1,261 @@
|
|
|
1
1
|
import sys
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
from ibis import BaseBackend
|
|
5
6
|
from rich.console import Console
|
|
6
7
|
from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
|
|
7
8
|
|
|
8
|
-
from nao_core.config import NaoConfig
|
|
9
|
+
from nao_core.config import AccessorType, NaoConfig
|
|
9
10
|
|
|
10
11
|
console = Console()
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
# =============================================================================
|
|
15
|
+
# Data Accessors
|
|
16
|
+
# =============================================================================
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DataAccessor(ABC):
|
|
20
|
+
"""Base class for data accessors that generate markdown files for tables."""
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def filename(self) -> str:
|
|
25
|
+
"""The filename this accessor writes to (e.g., 'columns.md')."""
|
|
26
|
+
...
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
|
|
30
|
+
"""Generate the markdown content for a table.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
conn: The Ibis database connection
|
|
34
|
+
dataset: The dataset/schema name
|
|
35
|
+
table: The table name
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Markdown string content
|
|
39
|
+
"""
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
def get_table(self, conn: BaseBackend, dataset: str, table: str):
|
|
43
|
+
"""Helper to get an Ibis table reference."""
|
|
17
44
|
full_table_name = f"{dataset}.{table}"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
45
|
+
return conn.table(full_table_name)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ColumnsAccessor(DataAccessor):
|
|
49
|
+
"""Generates columns.md with column names, types, and nullable info."""
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def filename(self) -> str:
|
|
53
|
+
return "columns.md"
|
|
54
|
+
|
|
55
|
+
def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
|
|
56
|
+
try:
|
|
57
|
+
t = self.get_table(conn, dataset, table)
|
|
58
|
+
schema = t.schema()
|
|
59
|
+
|
|
60
|
+
lines = [
|
|
61
|
+
f"# {table}",
|
|
62
|
+
"",
|
|
63
|
+
f"**Dataset:** `{dataset}`",
|
|
64
|
+
"",
|
|
65
|
+
"## Columns",
|
|
66
|
+
"",
|
|
67
|
+
"| Column | Type | Nullable | Description |",
|
|
68
|
+
"|--------|------|----------|-------------|",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
for name, dtype in schema.items():
|
|
72
|
+
nullable = "Yes" if dtype.nullable else "No"
|
|
73
|
+
description = ""
|
|
74
|
+
lines.append(f"| `{name}` | `{dtype}` | {nullable} | {description} |")
|
|
75
|
+
|
|
76
|
+
return "\n".join(lines)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
return f"# {table}\n\nError fetching schema: {e}"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class PreviewAccessor(DataAccessor):
|
|
82
|
+
"""Generates preview.md with the first N rows of data."""
|
|
83
|
+
|
|
84
|
+
def __init__(self, num_rows: int = 10):
|
|
85
|
+
self.num_rows = num_rows
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def filename(self) -> str:
|
|
89
|
+
return "preview.md"
|
|
90
|
+
|
|
91
|
+
def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
|
|
92
|
+
try:
|
|
93
|
+
t = self.get_table(conn, dataset, table)
|
|
94
|
+
schema = t.schema()
|
|
95
|
+
|
|
96
|
+
preview_df = t.limit(self.num_rows).execute()
|
|
97
|
+
|
|
98
|
+
lines = [
|
|
99
|
+
f"# {table} - Preview",
|
|
100
|
+
"",
|
|
101
|
+
f"**Dataset:** `{dataset}`",
|
|
102
|
+
f"**Showing:** First {len(preview_df)} rows",
|
|
103
|
+
"",
|
|
104
|
+
"## Data Preview",
|
|
105
|
+
"",
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
columns = list(schema.keys())
|
|
109
|
+
header = "| " + " | ".join(f"`{col}`" for col in columns) + " |"
|
|
110
|
+
separator = "| " + " | ".join("---" for _ in columns) + " |"
|
|
111
|
+
lines.append(header)
|
|
112
|
+
lines.append(separator)
|
|
113
|
+
|
|
114
|
+
for _, row in preview_df.iterrows():
|
|
115
|
+
row_values = []
|
|
116
|
+
for col in columns:
|
|
117
|
+
val = row[col]
|
|
118
|
+
val_str = str(val) if val is not None else ""
|
|
119
|
+
if len(val_str) > 50:
|
|
120
|
+
val_str = val_str[:47] + "..."
|
|
121
|
+
val_str = val_str.replace("|", "\\|").replace("\n", " ")
|
|
122
|
+
row_values.append(val_str)
|
|
123
|
+
lines.append("| " + " | ".join(row_values) + " |")
|
|
124
|
+
|
|
125
|
+
return "\n".join(lines)
|
|
126
|
+
except Exception as e:
|
|
127
|
+
return f"# {table} - Preview\n\nError fetching preview: {e}"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class DescriptionAccessor(DataAccessor):
|
|
131
|
+
"""Generates description.md with table metadata (row count, column count, etc.)."""
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def filename(self) -> str:
|
|
135
|
+
return "description.md"
|
|
136
|
+
|
|
137
|
+
def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
|
|
138
|
+
try:
|
|
139
|
+
t = self.get_table(conn, dataset, table)
|
|
140
|
+
schema = t.schema()
|
|
141
|
+
|
|
142
|
+
row_count = t.count().execute()
|
|
143
|
+
col_count = len(schema)
|
|
144
|
+
|
|
145
|
+
lines = [
|
|
146
|
+
f"# {table}",
|
|
147
|
+
"",
|
|
148
|
+
f"**Dataset:** `{dataset}`",
|
|
149
|
+
"",
|
|
150
|
+
"## Table Metadata",
|
|
151
|
+
"",
|
|
152
|
+
"| Property | Value |",
|
|
153
|
+
"|----------|-------|",
|
|
154
|
+
f"| **Row Count** | {row_count:,} |",
|
|
155
|
+
f"| **Column Count** | {col_count} |",
|
|
156
|
+
"",
|
|
157
|
+
"## Description",
|
|
158
|
+
"",
|
|
159
|
+
"_No description available._",
|
|
160
|
+
"",
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
return "\n".join(lines)
|
|
164
|
+
except Exception as e:
|
|
165
|
+
return f"# {table}\n\nError fetching description: {e}"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ProfilingAccessor(DataAccessor):
|
|
169
|
+
"""Generates profiling.md with column statistics and data profiling."""
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def filename(self) -> str:
|
|
173
|
+
return "profiling.md"
|
|
174
|
+
|
|
175
|
+
def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
|
|
176
|
+
try:
|
|
177
|
+
t = self.get_table(conn, dataset, table)
|
|
178
|
+
schema = t.schema()
|
|
179
|
+
|
|
180
|
+
lines = [
|
|
181
|
+
f"# {table} - Profiling",
|
|
182
|
+
"",
|
|
183
|
+
f"**Dataset:** `{dataset}`",
|
|
184
|
+
"",
|
|
185
|
+
"## Column Statistics",
|
|
186
|
+
"",
|
|
187
|
+
"| Column | Type | Nulls | Unique | Min | Max |",
|
|
188
|
+
"|--------|------|-------|--------|-----|-----|",
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
for name, dtype in schema.items():
|
|
192
|
+
col = t[name]
|
|
193
|
+
dtype_str = str(dtype)
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
null_count = t.filter(col.isnull()).count().execute()
|
|
197
|
+
unique_count = col.nunique().execute()
|
|
198
|
+
|
|
199
|
+
min_val = ""
|
|
200
|
+
max_val = ""
|
|
201
|
+
if dtype.is_numeric() or dtype.is_temporal():
|
|
202
|
+
try:
|
|
203
|
+
min_val = str(col.min().execute())
|
|
204
|
+
max_val = str(col.max().execute())
|
|
205
|
+
if len(min_val) > 20:
|
|
206
|
+
min_val = min_val[:17] + "..."
|
|
207
|
+
if len(max_val) > 20:
|
|
208
|
+
max_val = max_val[:17] + "..."
|
|
209
|
+
except Exception:
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
lines.append(
|
|
213
|
+
f"| `{name}` | `{dtype_str}` | {null_count:,} | {unique_count:,} | {min_val} | {max_val} |"
|
|
214
|
+
)
|
|
215
|
+
except Exception as col_error:
|
|
216
|
+
lines.append(f"| `{name}` | `{dtype_str}` | Error: {col_error} | | | |")
|
|
217
|
+
|
|
218
|
+
return "\n".join(lines)
|
|
219
|
+
except Exception as e:
|
|
220
|
+
return f"# {table} - Profiling\n\nError fetching profiling: {e}"
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# =============================================================================
|
|
224
|
+
# Accessor Registry
|
|
225
|
+
# =============================================================================
|
|
226
|
+
|
|
227
|
+
ACCESSOR_REGISTRY: dict[AccessorType, DataAccessor] = {
|
|
228
|
+
AccessorType.COLUMNS: ColumnsAccessor(),
|
|
229
|
+
AccessorType.PREVIEW: PreviewAccessor(num_rows=10),
|
|
230
|
+
AccessorType.DESCRIPTION: DescriptionAccessor(),
|
|
231
|
+
AccessorType.PROFILING: ProfilingAccessor(),
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def get_accessors(accessor_types: list[AccessorType]) -> list[DataAccessor]:
|
|
236
|
+
"""Get accessor instances for the given types."""
|
|
237
|
+
return [ACCESSOR_REGISTRY[t] for t in accessor_types if t in ACCESSOR_REGISTRY]
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# =============================================================================
|
|
241
|
+
# Sync Functions
|
|
242
|
+
# =============================================================================
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def sync_bigquery(
|
|
246
|
+
db_config,
|
|
247
|
+
base_path: Path,
|
|
248
|
+
progress: Progress,
|
|
249
|
+
accessors: list[DataAccessor],
|
|
250
|
+
) -> tuple[int, int]:
|
|
42
251
|
"""Sync BigQuery database schema to markdown files.
|
|
43
252
|
|
|
253
|
+
Args:
|
|
254
|
+
db_config: The database configuration
|
|
255
|
+
base_path: Base output path
|
|
256
|
+
progress: Rich progress instance
|
|
257
|
+
accessors: List of data accessors to run
|
|
258
|
+
|
|
44
259
|
Returns:
|
|
45
260
|
Tuple of (datasets_synced, tables_synced)
|
|
46
261
|
"""
|
|
@@ -50,7 +265,6 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
|
|
|
50
265
|
datasets_synced = 0
|
|
51
266
|
tables_synced = 0
|
|
52
267
|
|
|
53
|
-
# Get datasets to sync
|
|
54
268
|
if db_config.dataset_id:
|
|
55
269
|
datasets = [db_config.dataset_id]
|
|
56
270
|
else:
|
|
@@ -62,17 +276,24 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
|
|
|
62
276
|
)
|
|
63
277
|
|
|
64
278
|
for dataset in datasets:
|
|
65
|
-
dataset_path = db_path / dataset
|
|
66
|
-
dataset_path.mkdir(parents=True, exist_ok=True)
|
|
67
|
-
datasets_synced += 1
|
|
68
|
-
|
|
69
|
-
# List tables in this dataset
|
|
70
279
|
try:
|
|
71
|
-
|
|
280
|
+
all_tables = conn.list_tables(database=dataset)
|
|
72
281
|
except Exception:
|
|
73
282
|
progress.update(dataset_task, advance=1)
|
|
74
283
|
continue
|
|
75
284
|
|
|
285
|
+
# Filter tables based on include/exclude patterns
|
|
286
|
+
tables = [t for t in all_tables if db_config.matches_pattern(dataset, t)]
|
|
287
|
+
|
|
288
|
+
# Skip dataset if no tables match
|
|
289
|
+
if not tables:
|
|
290
|
+
progress.update(dataset_task, advance=1)
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
dataset_path = db_path / dataset
|
|
294
|
+
dataset_path.mkdir(parents=True, exist_ok=True)
|
|
295
|
+
datasets_synced += 1
|
|
296
|
+
|
|
76
297
|
table_task = progress.add_task(
|
|
77
298
|
f" [cyan]{dataset}[/cyan]",
|
|
78
299
|
total=len(tables),
|
|
@@ -82,11 +303,12 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
|
|
|
82
303
|
table_path = dataset_path / table
|
|
83
304
|
table_path.mkdir(parents=True, exist_ok=True)
|
|
84
305
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
306
|
+
for accessor in accessors:
|
|
307
|
+
content = accessor.generate(conn, dataset, table)
|
|
308
|
+
output_file = table_path / accessor.filename
|
|
309
|
+
output_file.write_text(content)
|
|
89
310
|
|
|
311
|
+
tables_synced += 1
|
|
90
312
|
progress.update(table_task, advance=1)
|
|
91
313
|
|
|
92
314
|
progress.update(dataset_task, advance=1)
|
|
@@ -97,15 +319,17 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
|
|
|
97
319
|
def sync(output_dir: str = "databases"):
|
|
98
320
|
"""Sync database schemas to local markdown files.
|
|
99
321
|
|
|
100
|
-
Creates a folder structure with table
|
|
322
|
+
Creates a folder structure with table metadata:
|
|
101
323
|
databases/bigquery/<connection>/<dataset>/<table>/columns.md
|
|
324
|
+
databases/bigquery/<connection>/<dataset>/<table>/preview.md
|
|
325
|
+
databases/bigquery/<connection>/<dataset>/<table>/description.md
|
|
326
|
+
databases/bigquery/<connection>/<dataset>/<table>/profiling.md
|
|
102
327
|
|
|
103
328
|
Args:
|
|
104
|
-
|
|
329
|
+
output_dir: Output directory for the database schemas (default: "databases")
|
|
105
330
|
"""
|
|
106
331
|
console.print("\n[bold cyan]🔄 nao sync[/bold cyan]\n")
|
|
107
332
|
|
|
108
|
-
# Load config
|
|
109
333
|
config = NaoConfig.try_load()
|
|
110
334
|
if not config:
|
|
111
335
|
console.print("[bold red]✗[/bold red] No nao_config.yaml found in current directory")
|
|
@@ -133,9 +357,14 @@ def sync(output_dir: str = "databases"):
|
|
|
133
357
|
transient=False,
|
|
134
358
|
) as progress:
|
|
135
359
|
for db in config.databases:
|
|
360
|
+
# Get accessors from database config
|
|
361
|
+
db_accessors = get_accessors(db.accessors)
|
|
362
|
+
accessor_names = [a.filename.replace(".md", "") for a in db_accessors]
|
|
363
|
+
|
|
136
364
|
try:
|
|
137
365
|
if db.type == "bigquery":
|
|
138
|
-
|
|
366
|
+
console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
|
|
367
|
+
datasets, tables = sync_bigquery(db, base_path, progress, db_accessors)
|
|
139
368
|
total_datasets += datasets
|
|
140
369
|
total_tables += tables
|
|
141
370
|
else:
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .base import NaoConfig
|
|
2
|
+
from .databases import AccessorType, AnyDatabaseConfig, BigQueryConfig, DatabaseType
|
|
3
|
+
from .llm import LLMConfig, LLMProvider
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"NaoConfig",
|
|
7
|
+
"AccessorType",
|
|
8
|
+
"AnyDatabaseConfig",
|
|
9
|
+
"BigQueryConfig",
|
|
10
|
+
"DatabaseType",
|
|
11
|
+
"LLMConfig",
|
|
12
|
+
"LLMProvider",
|
|
13
|
+
]
|
|
@@ -1,80 +1,18 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
1
|
from pathlib import Path
|
|
3
|
-
from typing import Literal
|
|
4
2
|
|
|
5
|
-
import ibis
|
|
6
3
|
import yaml
|
|
7
4
|
from ibis import BaseBackend
|
|
8
5
|
from pydantic import BaseModel, Field, model_validator
|
|
9
6
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"""Supported LLM providers."""
|
|
13
|
-
|
|
14
|
-
OPENAI = "openai"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class DatabaseType(str, Enum):
|
|
18
|
-
"""Supported database types."""
|
|
19
|
-
|
|
20
|
-
BIGQUERY = "bigquery"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class BigQueryConfig(BaseModel):
|
|
24
|
-
"""BigQuery-specific configuration."""
|
|
25
|
-
|
|
26
|
-
type: Literal["bigquery"] = "bigquery"
|
|
27
|
-
name: str = Field(description="A friendly name for this connection")
|
|
28
|
-
project_id: str = Field(description="GCP project ID")
|
|
29
|
-
dataset_id: str | None = Field(default=None, description="Default BigQuery dataset")
|
|
30
|
-
credentials_path: str | None = Field(
|
|
31
|
-
default=None,
|
|
32
|
-
description="Path to service account JSON file. If not provided, uses Application Default Credentials (ADC)",
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
def connect(self) -> BaseBackend:
|
|
36
|
-
"""Create an Ibis BigQuery connection."""
|
|
37
|
-
kwargs: dict = {"project_id": self.project_id}
|
|
38
|
-
|
|
39
|
-
if self.dataset_id:
|
|
40
|
-
kwargs["dataset_id"] = self.dataset_id
|
|
41
|
-
|
|
42
|
-
if self.credentials_path:
|
|
43
|
-
from google.oauth2 import service_account
|
|
44
|
-
|
|
45
|
-
credentials = service_account.Credentials.from_service_account_file(
|
|
46
|
-
self.credentials_path,
|
|
47
|
-
scopes=["https://www.googleapis.com/auth/bigquery"],
|
|
48
|
-
)
|
|
49
|
-
kwargs["credentials"] = credentials
|
|
50
|
-
|
|
51
|
-
return ibis.bigquery.connect(**kwargs)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
DatabaseConfig = BigQueryConfig
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def parse_database_config(data: dict) -> DatabaseConfig:
|
|
58
|
-
"""Parse a database config dict into the appropriate type."""
|
|
59
|
-
db_type = data.get("type")
|
|
60
|
-
if db_type == "bigquery":
|
|
61
|
-
return BigQueryConfig.model_validate(data)
|
|
62
|
-
else:
|
|
63
|
-
raise ValueError(f"Unknown database type: {db_type}")
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class LLMConfig(BaseModel):
|
|
67
|
-
"""LLM configuration."""
|
|
68
|
-
|
|
69
|
-
provider: LLMProvider = Field(description="The LLM provider to use")
|
|
70
|
-
api_key: str = Field(description="The API key to use")
|
|
7
|
+
from .databases import AnyDatabaseConfig, parse_database_config
|
|
8
|
+
from .llm import LLMConfig
|
|
71
9
|
|
|
72
10
|
|
|
73
11
|
class NaoConfig(BaseModel):
|
|
74
12
|
"""nao project configuration."""
|
|
75
13
|
|
|
76
14
|
project_name: str = Field(description="The name of the nao project")
|
|
77
|
-
databases: list[
|
|
15
|
+
databases: list[AnyDatabaseConfig] = Field(default_factory=list, description="The databases to use")
|
|
78
16
|
llm: LLMConfig | None = Field(default=None, description="The LLM configuration")
|
|
79
17
|
|
|
80
18
|
@model_validator(mode="before")
|
|
@@ -121,7 +59,7 @@ class NaoConfig(BaseModel):
|
|
|
121
59
|
"""Try to load config from path, returns None if not found or invalid.
|
|
122
60
|
|
|
123
61
|
Args:
|
|
124
|
-
|
|
62
|
+
path: Directory containing nao_config.yaml. Defaults to current directory.
|
|
125
63
|
"""
|
|
126
64
|
if path is None:
|
|
127
65
|
path = Path.cwd()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from .base import AccessorType, DatabaseConfig, DatabaseType
|
|
2
|
+
from .bigquery import BigQueryConfig
|
|
3
|
+
|
|
4
|
+
# =============================================================================
|
|
5
|
+
# Database Config Registry
|
|
6
|
+
# =============================================================================
|
|
7
|
+
|
|
8
|
+
# When adding more backends, convert this to a discriminated union:
|
|
9
|
+
# AnyDatabaseConfig = Annotated[
|
|
10
|
+
# Union[
|
|
11
|
+
# Annotated[BigQueryConfig, Tag("bigquery")],
|
|
12
|
+
# Annotated[PostgresConfig, Tag("postgres")],
|
|
13
|
+
# ],
|
|
14
|
+
# Discriminator(lambda x: x.get("type", "bigquery")),
|
|
15
|
+
# ]
|
|
16
|
+
|
|
17
|
+
AnyDatabaseConfig = BigQueryConfig
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_database_config(data: dict) -> DatabaseConfig:
|
|
21
|
+
"""Parse a database config dict into the appropriate type."""
|
|
22
|
+
db_type = data.get("type")
|
|
23
|
+
if db_type == "bigquery":
|
|
24
|
+
return BigQueryConfig.model_validate(data)
|
|
25
|
+
else:
|
|
26
|
+
raise ValueError(f"Unknown database type: {db_type}")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__all__ = ["AccessorType", "DatabaseConfig", "DatabaseType", "BigQueryConfig", "AnyDatabaseConfig"]
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
from ibis import BaseBackend
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DatabaseType(str, Enum):
|
|
10
|
+
"""Supported database types."""
|
|
11
|
+
|
|
12
|
+
BIGQUERY = "bigquery"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AccessorType(str, Enum):
|
|
16
|
+
"""Available data accessors for sync."""
|
|
17
|
+
|
|
18
|
+
COLUMNS = "columns"
|
|
19
|
+
PREVIEW = "preview"
|
|
20
|
+
DESCRIPTION = "description"
|
|
21
|
+
PROFILING = "profiling"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DatabaseConfig(BaseModel, ABC):
|
|
25
|
+
"""Base configuration for all database backends."""
|
|
26
|
+
|
|
27
|
+
name: str = Field(description="A friendly name for this connection")
|
|
28
|
+
|
|
29
|
+
# Sync settings
|
|
30
|
+
accessors: list[AccessorType] = Field(
|
|
31
|
+
default=[AccessorType.COLUMNS, AccessorType.PREVIEW, AccessorType.DESCRIPTION],
|
|
32
|
+
description="List of accessors to run during sync (columns, preview, description, profiling)",
|
|
33
|
+
)
|
|
34
|
+
include: list[str] = Field(
|
|
35
|
+
default_factory=list,
|
|
36
|
+
description="Glob patterns for schemas/tables to include (e.g., 'prod_*.*', 'analytics.dim_*'). Empty means include all.",
|
|
37
|
+
)
|
|
38
|
+
exclude: list[str] = Field(
|
|
39
|
+
default_factory=list,
|
|
40
|
+
description="Glob patterns for schemas/tables to exclude (e.g., 'temp_*.*', '*.backup_*')",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def connect(self) -> BaseBackend:
|
|
45
|
+
"""Create an Ibis connection for this database."""
|
|
46
|
+
...
|
|
47
|
+
|
|
48
|
+
def matches_pattern(self, schema: str, table: str) -> bool:
|
|
49
|
+
"""Check if a schema.table matches the include/exclude patterns.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
schema: The schema/dataset name
|
|
53
|
+
table: The table name
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
True if the table should be included, False if excluded
|
|
57
|
+
"""
|
|
58
|
+
full_name = f"{schema}.{table}"
|
|
59
|
+
|
|
60
|
+
# If include patterns exist, table must match at least one
|
|
61
|
+
if self.include:
|
|
62
|
+
included = any(fnmatch.fnmatch(full_name, pattern) for pattern in self.include)
|
|
63
|
+
if not included:
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
# If exclude patterns exist, table must not match any
|
|
67
|
+
if self.exclude:
|
|
68
|
+
excluded = any(fnmatch.fnmatch(full_name, pattern) for pattern in self.exclude)
|
|
69
|
+
if excluded:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
return True
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
import ibis
|
|
4
|
+
from ibis import BaseBackend
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
|
|
7
|
+
from .base import DatabaseConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BigQueryConfig(DatabaseConfig):
|
|
11
|
+
"""BigQuery-specific configuration."""
|
|
12
|
+
|
|
13
|
+
type: Literal["bigquery"] = "bigquery"
|
|
14
|
+
project_id: str = Field(description="GCP project ID")
|
|
15
|
+
dataset_id: str | None = Field(default=None, description="Default BigQuery dataset")
|
|
16
|
+
credentials_path: str | None = Field(
|
|
17
|
+
default=None,
|
|
18
|
+
description="Path to service account JSON file. If not provided, uses Application Default Credentials (ADC)",
|
|
19
|
+
)
|
|
20
|
+
sso: bool = Field(default=False, description="Use Single Sign-On (SSO) for authentication")
|
|
21
|
+
location: str | None = Field(default=None, description="BigQuery location")
|
|
22
|
+
|
|
23
|
+
def connect(self) -> BaseBackend:
|
|
24
|
+
"""Create an Ibis BigQuery connection."""
|
|
25
|
+
kwargs: dict = {"project_id": self.project_id}
|
|
26
|
+
|
|
27
|
+
if self.dataset_id:
|
|
28
|
+
kwargs["dataset_id"] = self.dataset_id
|
|
29
|
+
|
|
30
|
+
if self.sso:
|
|
31
|
+
kwargs["auth_local_webserver"] = True
|
|
32
|
+
|
|
33
|
+
if self.credentials_path:
|
|
34
|
+
from google.oauth2 import service_account
|
|
35
|
+
|
|
36
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
37
|
+
self.credentials_path,
|
|
38
|
+
scopes=["https://www.googleapis.com/auth/bigquery"],
|
|
39
|
+
)
|
|
40
|
+
kwargs["credentials"] = credentials
|
|
41
|
+
|
|
42
|
+
return ibis.bigquery.connect(**kwargs)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LLMProvider(str, Enum):
|
|
7
|
+
"""Supported LLM providers."""
|
|
8
|
+
|
|
9
|
+
OPENAI = "openai"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LLMConfig(BaseModel):
|
|
13
|
+
"""LLM configuration."""
|
|
14
|
+
|
|
15
|
+
provider: LLMProvider = Field(description="The LLM provider to use")
|
|
16
|
+
api_key: str = Field(description="The API key to use")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nao-core
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.15
|
|
4
4
|
Summary: nao Core is your analytics context builder with the best chat interface.
|
|
5
5
|
Project-URL: Homepage, https://getnao.io
|
|
6
6
|
Project-URL: Repository, https://github.com/naolabs/chat
|
|
@@ -20,11 +20,13 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
21
|
Requires-Python: >=3.10
|
|
22
22
|
Requires-Dist: cyclopts>=4.4.4
|
|
23
|
+
Requires-Dist: fastapi>=0.128.0
|
|
23
24
|
Requires-Dist: ibis-framework[bigquery]>=9.0.0
|
|
24
25
|
Requires-Dist: openai>=1.0.0
|
|
25
26
|
Requires-Dist: pydantic>=2.10.0
|
|
26
27
|
Requires-Dist: pyyaml>=6.0.0
|
|
27
28
|
Requires-Dist: rich>=14.0.0
|
|
29
|
+
Requires-Dist: uvicorn>=0.40.0
|
|
28
30
|
Description-Content-Type: text/markdown
|
|
29
31
|
|
|
30
32
|
# nao CLI
|