nao-core 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nao_core/__init__.py +1 -1
- nao_core/bin/fastapi/main.py +6 -0
- nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
- nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
- nao_core/bin/migrations-postgres/meta/_journal.json +7 -0
- nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
- nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
- nao_core/bin/migrations-sqlite/meta/_journal.json +7 -0
- nao_core/bin/nao-chat-server +0 -0
- nao_core/bin/public/assets/{code-block-F6WJLWQG-z4zcca7w.js → code-block-F6WJLWQG-TAi8koem.js} +1 -1
- nao_core/bin/public/assets/index-BfHcd9Xz.css +1 -0
- nao_core/bin/public/assets/{index-DhhS7iVA.js → index-Mzo9bkag.js} +256 -172
- nao_core/bin/public/index.html +2 -2
- nao_core/commands/chat.py +11 -10
- nao_core/commands/init.py +27 -4
- nao_core/commands/sync/__init__.py +40 -21
- nao_core/commands/sync/accessors.py +218 -139
- nao_core/commands/sync/cleanup.py +133 -0
- nao_core/commands/sync/providers/__init__.py +30 -0
- nao_core/commands/sync/providers/base.py +87 -0
- nao_core/commands/sync/providers/databases/__init__.py +17 -0
- nao_core/commands/sync/providers/databases/bigquery.py +78 -0
- nao_core/commands/sync/providers/databases/databricks.py +79 -0
- nao_core/commands/sync/providers/databases/duckdb.py +83 -0
- nao_core/commands/sync/providers/databases/postgres.py +78 -0
- nao_core/commands/sync/providers/databases/provider.py +123 -0
- nao_core/commands/sync/providers/databases/snowflake.py +78 -0
- nao_core/commands/sync/providers/repositories/__init__.py +5 -0
- nao_core/commands/sync/{repositories.py → providers/repositories/provider.py} +43 -20
- nao_core/config/__init__.py +2 -0
- nao_core/config/base.py +23 -4
- nao_core/config/databases/__init__.py +5 -0
- nao_core/config/databases/base.py +1 -0
- nao_core/config/databases/postgres.py +78 -0
- nao_core/templates/__init__.py +12 -0
- nao_core/templates/defaults/databases/columns.md.j2 +23 -0
- nao_core/templates/defaults/databases/description.md.j2 +32 -0
- nao_core/templates/defaults/databases/preview.md.j2 +22 -0
- nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
- nao_core/templates/engine.py +133 -0
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/METADATA +6 -2
- nao_core-0.0.31.dist-info/RECORD +86 -0
- nao_core/bin/public/assets/index-ClduEZSo.css +0 -1
- nao_core/commands/sync/databases.py +0 -374
- nao_core-0.0.30.dist-info/RECORD +0 -65
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/WHEEL +0 -0
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/entry_points.txt +0 -0
- {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/licenses/LICENSE +0 -0
nao_core/bin/public/index.html
CHANGED
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
<link rel="apple-touch-icon" href="/logo192.png" />
|
|
10
10
|
<link rel="manifest" href="/manifest.json" />
|
|
11
11
|
<title>nao — Chat with your data</title>
|
|
12
|
-
<script type="module" crossorigin src="/assets/index-
|
|
13
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
12
|
+
<script type="module" crossorigin src="/assets/index-Mzo9bkag.js"></script>
|
|
13
|
+
<link rel="stylesheet" crossorigin href="/assets/index-BfHcd9Xz.css">
|
|
14
14
|
</head>
|
|
15
15
|
<body>
|
|
16
16
|
<div id="app"></div>
|
nao_core/commands/chat.py
CHANGED
|
@@ -106,18 +106,21 @@ def chat():
|
|
|
106
106
|
"""
|
|
107
107
|
console.print("\n[bold cyan]💬 Starting nao chat...[/bold cyan]\n")
|
|
108
108
|
|
|
109
|
-
binary_path = get_server_binary_path()
|
|
110
|
-
bin_dir = binary_path.parent
|
|
111
|
-
|
|
112
|
-
console.print(f"[dim]Server binary: {binary_path}[/dim]")
|
|
113
|
-
console.print(f"[dim]Working directory: {bin_dir}[/dim]")
|
|
114
|
-
|
|
115
109
|
# Try to load nao config from current directory
|
|
116
110
|
config = NaoConfig.try_load()
|
|
117
111
|
if config:
|
|
118
112
|
console.print(f"[bold green]✓[/bold green] Loaded config from {Path.cwd() / 'nao_config.yaml'}")
|
|
119
113
|
else:
|
|
120
|
-
console.print(
|
|
114
|
+
console.print(
|
|
115
|
+
"[bold red]✗No nao_config.yaml found in current directory. Please move to a nao project directory.[/bold red]"
|
|
116
|
+
)
|
|
117
|
+
sys.exit(1)
|
|
118
|
+
|
|
119
|
+
binary_path = get_server_binary_path()
|
|
120
|
+
bin_dir = binary_path.parent
|
|
121
|
+
|
|
122
|
+
console.print(f"[dim]Server binary: {binary_path}[/dim]")
|
|
123
|
+
console.print(f"[dim]Working directory: {bin_dir}[/dim]")
|
|
121
124
|
|
|
122
125
|
# Start the server processes
|
|
123
126
|
chat_process = None
|
|
@@ -154,10 +157,9 @@ def chat():
|
|
|
154
157
|
if config and config.slack:
|
|
155
158
|
env["SLACK_BOT_TOKEN"] = config.slack.bot_token
|
|
156
159
|
env["SLACK_SIGNING_SECRET"] = config.slack.signing_secret
|
|
157
|
-
env["SLACK_POST_MESSAGE_URL"] = config.slack.post_message_url
|
|
158
160
|
console.print("[bold green]✓[/bold green] Set Slack environment variables from config")
|
|
159
161
|
|
|
160
|
-
env["
|
|
162
|
+
env["NAO_DEFAULT_PROJECT_PATH"] = str(Path.cwd())
|
|
161
163
|
env["FASTAPI_URL"] = f"http://localhost:{FASTAPI_PORT}"
|
|
162
164
|
|
|
163
165
|
# Start the FastAPI server first
|
|
@@ -166,7 +168,6 @@ def chat():
|
|
|
166
168
|
|
|
167
169
|
fastapi_process = subprocess.Popen(
|
|
168
170
|
[sys.executable, str(fastapi_path)],
|
|
169
|
-
cwd=str(fastapi_path.parent),
|
|
170
171
|
env=env,
|
|
171
172
|
stdout=subprocess.DEVNULL,
|
|
172
173
|
stderr=subprocess.DEVNULL,
|
nao_core/commands/init.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Annotated
|
|
4
5
|
|
|
@@ -16,6 +17,7 @@ from nao_core.config import (
|
|
|
16
17
|
LLMConfig,
|
|
17
18
|
LLMProvider,
|
|
18
19
|
NaoConfig,
|
|
20
|
+
PostgresConfig,
|
|
19
21
|
SlackConfig,
|
|
20
22
|
SnowflakeConfig,
|
|
21
23
|
)
|
|
@@ -47,6 +49,12 @@ class EmptyApiKeyError(InitError):
|
|
|
47
49
|
super().__init__("API key cannot be empty.")
|
|
48
50
|
|
|
49
51
|
|
|
52
|
+
@dataclass
|
|
53
|
+
class CreatedFile:
|
|
54
|
+
path: Path
|
|
55
|
+
content: str | None
|
|
56
|
+
|
|
57
|
+
|
|
50
58
|
def setup_project_name(force: bool = False) -> tuple[str, Path]:
|
|
51
59
|
"""Setup the project name."""
|
|
52
60
|
# Check if we're in a directory with an existing nao_config.yaml
|
|
@@ -101,6 +109,11 @@ def setup_snowflake() -> SnowflakeConfig:
|
|
|
101
109
|
return SnowflakeConfig.promptConfig()
|
|
102
110
|
|
|
103
111
|
|
|
112
|
+
def setup_postgres() -> PostgresConfig:
|
|
113
|
+
"""Setup a PostgreSQL database configuration."""
|
|
114
|
+
return PostgresConfig.promptConfig()
|
|
115
|
+
|
|
116
|
+
|
|
104
117
|
def setup_databases() -> list[AnyDatabaseConfig]:
|
|
105
118
|
"""Setup database configurations."""
|
|
106
119
|
databases: list[AnyDatabaseConfig] = []
|
|
@@ -124,6 +137,10 @@ def setup_databases() -> list[AnyDatabaseConfig]:
|
|
|
124
137
|
db_config = setup_bigquery()
|
|
125
138
|
databases.append(db_config)
|
|
126
139
|
console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
|
|
140
|
+
elif db_type == DatabaseType.POSTGRES.value:
|
|
141
|
+
db_config = setup_postgres()
|
|
142
|
+
databases.append(db_config)
|
|
143
|
+
console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
|
|
127
144
|
|
|
128
145
|
elif db_type == DatabaseType.DUCKDB.value:
|
|
129
146
|
db_config = setup_duckdb()
|
|
@@ -233,7 +250,7 @@ def setup_slack() -> SlackConfig | None:
|
|
|
233
250
|
return slack_config
|
|
234
251
|
|
|
235
252
|
|
|
236
|
-
def create_empty_structure(project_path: Path) -> tuple[list[str], list[
|
|
253
|
+
def create_empty_structure(project_path: Path) -> tuple[list[str], list[CreatedFile]]:
|
|
237
254
|
"""Create project folder structure to guide users.
|
|
238
255
|
|
|
239
256
|
To add new folders, simply append them to the FOLDERS list below.
|
|
@@ -249,7 +266,10 @@ def create_empty_structure(project_path: Path) -> tuple[list[str], list[str]]:
|
|
|
249
266
|
"agent/mcps",
|
|
250
267
|
]
|
|
251
268
|
|
|
252
|
-
FILES = [
|
|
269
|
+
FILES = [
|
|
270
|
+
CreatedFile(path=Path("RULES.md"), content=None),
|
|
271
|
+
CreatedFile(path=Path(".naoignore"), content="templates/\n"),
|
|
272
|
+
]
|
|
253
273
|
|
|
254
274
|
created_folders = []
|
|
255
275
|
for folder in FOLDERS:
|
|
@@ -259,8 +279,11 @@ def create_empty_structure(project_path: Path) -> tuple[list[str], list[str]]:
|
|
|
259
279
|
|
|
260
280
|
created_files = []
|
|
261
281
|
for file in FILES:
|
|
262
|
-
file_path = project_path / file
|
|
263
|
-
|
|
282
|
+
file_path = project_path / file.path
|
|
283
|
+
if file.content:
|
|
284
|
+
file_path.write_text(file.content)
|
|
285
|
+
else:
|
|
286
|
+
file_path.touch()
|
|
264
287
|
created_files.append(file)
|
|
265
288
|
|
|
266
289
|
return created_folders, created_files
|
|
@@ -7,50 +7,69 @@ from rich.console import Console
|
|
|
7
7
|
|
|
8
8
|
from nao_core.config import NaoConfig
|
|
9
9
|
|
|
10
|
-
from .
|
|
11
|
-
from .repositories import sync_repositories
|
|
10
|
+
from .providers import SyncProvider, SyncResult, get_all_providers
|
|
12
11
|
|
|
13
12
|
console = Console()
|
|
14
13
|
|
|
15
14
|
|
|
16
|
-
def sync(
|
|
17
|
-
|
|
15
|
+
def sync(
|
|
16
|
+
output_dirs: dict[str, str] | None = None,
|
|
17
|
+
providers: list[SyncProvider] | None = None,
|
|
18
|
+
):
|
|
19
|
+
"""Sync resources using configured providers.
|
|
18
20
|
|
|
19
|
-
Creates folder structures:
|
|
21
|
+
Creates folder structures based on each provider's default output directory:
|
|
20
22
|
- repos/<repo_name>/ (git repositories)
|
|
21
|
-
- databases
|
|
23
|
+
- databases/<type>/<connection>/<dataset>/<table>/*.md (database schemas)
|
|
22
24
|
|
|
23
25
|
Args:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
+
output_dirs: Optional dict mapping provider names to custom output directories.
|
|
27
|
+
If not specified, uses each provider's default_output_dir.
|
|
28
|
+
providers: Optional list of providers to use. If not specified, uses all
|
|
29
|
+
registered providers.
|
|
26
30
|
"""
|
|
27
31
|
console.print("\n[bold cyan]🔄 nao sync[/bold cyan]\n")
|
|
28
32
|
|
|
29
33
|
config = NaoConfig.try_load()
|
|
30
|
-
if
|
|
34
|
+
if config is None:
|
|
31
35
|
console.print("[bold red]✗[/bold red] No nao_config.yaml found in current directory")
|
|
32
36
|
console.print("[dim]Run 'nao init' to create a configuration file[/dim]")
|
|
33
37
|
sys.exit(1)
|
|
34
38
|
|
|
39
|
+
# Get project path (current working directory after NaoConfig.try_load)
|
|
40
|
+
project_path = Path.cwd()
|
|
41
|
+
|
|
35
42
|
console.print(f"[dim]Project:[/dim] {config.project_name}")
|
|
36
43
|
|
|
37
|
-
|
|
38
|
-
if
|
|
39
|
-
|
|
40
|
-
repos_synced = sync_repositories(config.repos, repos_path)
|
|
44
|
+
# Use provided providers or default to all registered providers
|
|
45
|
+
active_providers = providers if providers is not None else get_all_providers()
|
|
46
|
+
output_dirs = output_dirs or {}
|
|
41
47
|
|
|
42
|
-
|
|
43
|
-
|
|
48
|
+
# Run each provider
|
|
49
|
+
results: list[SyncResult] = []
|
|
50
|
+
for provider in active_providers:
|
|
51
|
+
if config is None or not provider.should_sync(config):
|
|
52
|
+
continue
|
|
44
53
|
|
|
45
|
-
|
|
54
|
+
# Get output directory (custom or default)
|
|
55
|
+
output_dir = output_dirs.get(provider.name, provider.default_output_dir)
|
|
56
|
+
output_path = Path(output_dir)
|
|
46
57
|
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
# Get items and sync
|
|
59
|
+
items = provider.get_items(config)
|
|
60
|
+
result = provider.sync(items, output_path, project_path=project_path)
|
|
61
|
+
results.append(result)
|
|
62
|
+
|
|
63
|
+
# Print summary
|
|
64
|
+
console.print("\n[bold green]✓ Sync Complete[/bold green]\n")
|
|
49
65
|
|
|
50
|
-
|
|
51
|
-
|
|
66
|
+
has_results = False
|
|
67
|
+
for result in results:
|
|
68
|
+
if result.items_synced > 0:
|
|
69
|
+
has_results = True
|
|
70
|
+
console.print(f" [dim]{result.provider_name}:[/dim] {result.get_summary()}")
|
|
52
71
|
|
|
53
|
-
if
|
|
72
|
+
if not has_results:
|
|
54
73
|
console.print(" [dim]Nothing to sync[/dim]")
|
|
55
74
|
|
|
56
75
|
console.print()
|
|
@@ -1,13 +1,28 @@
|
|
|
1
1
|
"""Data accessor classes for generating markdown documentation from database tables."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
7
|
from ibis import BaseBackend
|
|
7
8
|
|
|
9
|
+
from nao_core.templates import get_template_engine
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class DataAccessor(ABC):
|
|
10
|
-
"""Base class for data accessors that generate markdown files for tables.
|
|
13
|
+
"""Base class for data accessors that generate markdown files for tables.
|
|
14
|
+
|
|
15
|
+
Accessors use Jinja2 templates for generating output. Default templates
|
|
16
|
+
are shipped with nao and can be overridden by users by placing templates
|
|
17
|
+
with the same name in their project's `templates/` directory.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
To override the preview template, create:
|
|
21
|
+
`<project_root>/templates/databases/preview.md.j2`
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# Path to the nao project root (set by sync provider)
|
|
25
|
+
_project_path: Path | None = None
|
|
11
26
|
|
|
12
27
|
@property
|
|
13
28
|
@abstractmethod
|
|
@@ -15,24 +30,57 @@ class DataAccessor(ABC):
|
|
|
15
30
|
"""The filename this accessor writes to (e.g., 'columns.md')."""
|
|
16
31
|
...
|
|
17
32
|
|
|
33
|
+
@property
|
|
18
34
|
@abstractmethod
|
|
19
|
-
def
|
|
20
|
-
"""
|
|
35
|
+
def template_name(self) -> str:
|
|
36
|
+
"""The template file to use (e.g., 'databases/columns.md.j2')."""
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
41
|
+
"""Get the template context for rendering.
|
|
21
42
|
|
|
22
43
|
Args:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
44
|
+
conn: The Ibis database connection
|
|
45
|
+
dataset: The dataset/schema name
|
|
46
|
+
table: The table name
|
|
26
47
|
|
|
27
48
|
Returns:
|
|
28
|
-
|
|
49
|
+
Dictionary of variables to pass to the template
|
|
29
50
|
"""
|
|
30
51
|
...
|
|
31
52
|
|
|
53
|
+
def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
|
|
54
|
+
"""Generate the markdown content for a table using templates.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
conn: The Ibis database connection
|
|
58
|
+
dataset: The dataset/schema name
|
|
59
|
+
table: The table name
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Markdown string content
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
context = self.get_context(conn, dataset, table)
|
|
66
|
+
engine = get_template_engine(self._project_path)
|
|
67
|
+
return engine.render(self.template_name, **context)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return f"# {table}\n\nError generating content: {e}"
|
|
70
|
+
|
|
32
71
|
def get_table(self, conn: BaseBackend, dataset: str, table: str):
|
|
33
72
|
"""Helper to get an Ibis table reference."""
|
|
34
73
|
return conn.table(table, database=dataset)
|
|
35
74
|
|
|
75
|
+
@classmethod
|
|
76
|
+
def set_project_path(cls, path: Path | None) -> None:
|
|
77
|
+
"""Set the project path for template resolution.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
path: Path to the nao project root
|
|
81
|
+
"""
|
|
82
|
+
cls._project_path = path
|
|
83
|
+
|
|
36
84
|
|
|
37
85
|
def truncate_middle(text: str, max_length: int) -> str:
|
|
38
86
|
"""Truncate text in the middle if it exceeds max_length."""
|
|
@@ -43,7 +91,14 @@ def truncate_middle(text: str, max_length: int) -> str:
|
|
|
43
91
|
|
|
44
92
|
|
|
45
93
|
class ColumnsAccessor(DataAccessor):
|
|
46
|
-
"""Generates columns.md with column names, types, and nullable info.
|
|
94
|
+
"""Generates columns.md with column names, types, and nullable info.
|
|
95
|
+
|
|
96
|
+
Template variables:
|
|
97
|
+
- table_name: Name of the table
|
|
98
|
+
- dataset: Schema/dataset name
|
|
99
|
+
- columns: List of dicts with 'name', 'type', 'nullable', 'description'
|
|
100
|
+
- column_count: Total number of columns
|
|
101
|
+
"""
|
|
47
102
|
|
|
48
103
|
def __init__(self, max_description_length: int = 256):
|
|
49
104
|
self.max_description_length = max_description_length
|
|
@@ -52,37 +107,43 @@ class ColumnsAccessor(DataAccessor):
|
|
|
52
107
|
def filename(self) -> str:
|
|
53
108
|
return "columns.md"
|
|
54
109
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
print(e)
|
|
81
|
-
return f"# {table}\n\nError fetching schema: {e}"
|
|
110
|
+
@property
|
|
111
|
+
def template_name(self) -> str:
|
|
112
|
+
return "databases/columns.md.j2"
|
|
113
|
+
|
|
114
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
115
|
+
t = self.get_table(conn, dataset, table)
|
|
116
|
+
schema = t.schema()
|
|
117
|
+
|
|
118
|
+
columns = []
|
|
119
|
+
for name, dtype in schema.items():
|
|
120
|
+
columns.append(
|
|
121
|
+
{
|
|
122
|
+
"name": name,
|
|
123
|
+
"type": str(dtype),
|
|
124
|
+
"nullable": dtype.nullable if hasattr(dtype, "nullable") else True,
|
|
125
|
+
"description": None, # Could be populated from metadata
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
"table_name": table,
|
|
131
|
+
"dataset": dataset,
|
|
132
|
+
"columns": columns,
|
|
133
|
+
"column_count": len(columns),
|
|
134
|
+
}
|
|
82
135
|
|
|
83
136
|
|
|
84
137
|
class PreviewAccessor(DataAccessor):
|
|
85
|
-
"""Generates preview.md with the first N rows of data as JSONL.
|
|
138
|
+
"""Generates preview.md with the first N rows of data as JSONL.
|
|
139
|
+
|
|
140
|
+
Template variables:
|
|
141
|
+
- table_name: Name of the table
|
|
142
|
+
- dataset: Schema/dataset name
|
|
143
|
+
- rows: List of row dictionaries
|
|
144
|
+
- row_count: Number of preview rows
|
|
145
|
+
- columns: List of column info dicts
|
|
146
|
+
"""
|
|
86
147
|
|
|
87
148
|
def __init__(self, num_rows: int = 10):
|
|
88
149
|
self.num_rows = num_rows
|
|
@@ -91,121 +152,139 @@ class PreviewAccessor(DataAccessor):
|
|
|
91
152
|
def filename(self) -> str:
|
|
92
153
|
return "preview.md"
|
|
93
154
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
155
|
+
@property
|
|
156
|
+
def template_name(self) -> str:
|
|
157
|
+
return "databases/preview.md.j2"
|
|
158
|
+
|
|
159
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
160
|
+
t = self.get_table(conn, dataset, table)
|
|
161
|
+
schema = t.schema()
|
|
162
|
+
preview_df = t.limit(self.num_rows).execute()
|
|
163
|
+
|
|
164
|
+
rows = []
|
|
165
|
+
for _, row in preview_df.iterrows():
|
|
166
|
+
row_dict = row.to_dict()
|
|
167
|
+
# Convert non-serializable types to strings
|
|
168
|
+
for key, val in row_dict.items():
|
|
169
|
+
if val is not None and not isinstance(val, (str, int, float, bool, list, dict)):
|
|
170
|
+
row_dict[key] = str(val)
|
|
171
|
+
rows.append(row_dict)
|
|
172
|
+
|
|
173
|
+
columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"table_name": table,
|
|
177
|
+
"dataset": dataset,
|
|
178
|
+
"rows": rows,
|
|
179
|
+
"row_count": len(rows),
|
|
180
|
+
"columns": columns,
|
|
181
|
+
}
|
|
119
182
|
|
|
120
183
|
|
|
121
184
|
class DescriptionAccessor(DataAccessor):
|
|
122
|
-
"""Generates description.md with table metadata (row count, column count, etc.).
|
|
185
|
+
"""Generates description.md with table metadata (row count, column count, etc.).
|
|
186
|
+
|
|
187
|
+
Template variables:
|
|
188
|
+
- table_name: Name of the table
|
|
189
|
+
- dataset: Schema/dataset name
|
|
190
|
+
- row_count: Total rows in the table
|
|
191
|
+
- column_count: Number of columns
|
|
192
|
+
- description: Table description (if available)
|
|
193
|
+
- columns: List of column info dicts
|
|
194
|
+
"""
|
|
123
195
|
|
|
124
196
|
@property
|
|
125
197
|
def filename(self) -> str:
|
|
126
198
|
return "description.md"
|
|
127
199
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
"",
|
|
148
|
-
"## Description",
|
|
149
|
-
"",
|
|
150
|
-
"_No description available._",
|
|
151
|
-
"",
|
|
152
|
-
]
|
|
153
|
-
|
|
154
|
-
return "\n".join(lines)
|
|
155
|
-
except Exception as e:
|
|
156
|
-
return f"# {table}\n\nError fetching description: {e}"
|
|
200
|
+
@property
|
|
201
|
+
def template_name(self) -> str:
|
|
202
|
+
return "databases/description.md.j2"
|
|
203
|
+
|
|
204
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
205
|
+
t = self.get_table(conn, dataset, table)
|
|
206
|
+
schema = t.schema()
|
|
207
|
+
|
|
208
|
+
row_count = t.count().execute()
|
|
209
|
+
columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
"table_name": table,
|
|
213
|
+
"dataset": dataset,
|
|
214
|
+
"row_count": row_count,
|
|
215
|
+
"column_count": len(schema),
|
|
216
|
+
"description": None, # Could be populated from metadata
|
|
217
|
+
"columns": columns,
|
|
218
|
+
}
|
|
157
219
|
|
|
158
220
|
|
|
159
221
|
class ProfilingAccessor(DataAccessor):
|
|
160
|
-
"""Generates profiling.md with column statistics and data profiling.
|
|
222
|
+
"""Generates profiling.md with column statistics and data profiling.
|
|
223
|
+
|
|
224
|
+
Template variables:
|
|
225
|
+
- table_name: Name of the table
|
|
226
|
+
- dataset: Schema/dataset name
|
|
227
|
+
- column_stats: List of dicts with stats for each column:
|
|
228
|
+
- name: Column name
|
|
229
|
+
- type: Data type
|
|
230
|
+
- null_count: Number of nulls
|
|
231
|
+
- unique_count: Number of unique values
|
|
232
|
+
- min_value: Min value (numeric/temporal)
|
|
233
|
+
- max_value: Max value (numeric/temporal)
|
|
234
|
+
- error: Error message if stats couldn't be computed
|
|
235
|
+
- columns: List of column info dicts
|
|
236
|
+
"""
|
|
161
237
|
|
|
162
238
|
@property
|
|
163
239
|
def filename(self) -> str:
|
|
164
240
|
return "profiling.md"
|
|
165
241
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
dtype_str
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
242
|
+
@property
|
|
243
|
+
def template_name(self) -> str:
|
|
244
|
+
return "databases/profiling.md.j2"
|
|
245
|
+
|
|
246
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
247
|
+
t = self.get_table(conn, dataset, table)
|
|
248
|
+
schema = t.schema()
|
|
249
|
+
|
|
250
|
+
column_stats = []
|
|
251
|
+
columns = []
|
|
252
|
+
|
|
253
|
+
for name, dtype in schema.items():
|
|
254
|
+
columns.append({"name": name, "type": str(dtype)})
|
|
255
|
+
col = t[name]
|
|
256
|
+
dtype_str = str(dtype)
|
|
257
|
+
|
|
258
|
+
stat = {
|
|
259
|
+
"name": name,
|
|
260
|
+
"type": dtype_str,
|
|
261
|
+
"null_count": 0,
|
|
262
|
+
"unique_count": 0,
|
|
263
|
+
"min_value": None,
|
|
264
|
+
"max_value": None,
|
|
265
|
+
"error": None,
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
stat["null_count"] = t.filter(col.isnull()).count().execute()
|
|
270
|
+
stat["unique_count"] = col.nunique().execute()
|
|
271
|
+
|
|
272
|
+
if dtype.is_numeric() or dtype.is_temporal():
|
|
273
|
+
try:
|
|
274
|
+
min_val = str(col.min().execute())
|
|
275
|
+
max_val = str(col.max().execute())
|
|
276
|
+
stat["min_value"] = truncate_middle(min_val, 20)
|
|
277
|
+
stat["max_value"] = truncate_middle(max_val, 20)
|
|
278
|
+
except Exception:
|
|
279
|
+
pass
|
|
280
|
+
except Exception as col_error:
|
|
281
|
+
stat["error"] = str(col_error)
|
|
282
|
+
|
|
283
|
+
column_stats.append(stat)
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
"table_name": table,
|
|
287
|
+
"dataset": dataset,
|
|
288
|
+
"column_stats": column_stats,
|
|
289
|
+
"columns": columns,
|
|
290
|
+
}
|