nao-core 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nao_core/__init__.py +1 -1
  2. nao_core/bin/fastapi/main.py +6 -0
  3. nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
  4. nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
  5. nao_core/bin/migrations-postgres/meta/_journal.json +7 -0
  6. nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
  7. nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
  8. nao_core/bin/migrations-sqlite/meta/_journal.json +7 -0
  9. nao_core/bin/nao-chat-server +0 -0
  10. nao_core/bin/public/assets/{code-block-F6WJLWQG-z4zcca7w.js → code-block-F6WJLWQG-TAi8koem.js} +1 -1
  11. nao_core/bin/public/assets/index-BfHcd9Xz.css +1 -0
  12. nao_core/bin/public/assets/{index-DhhS7iVA.js → index-Mzo9bkag.js} +256 -172
  13. nao_core/bin/public/index.html +2 -2
  14. nao_core/commands/chat.py +11 -10
  15. nao_core/commands/init.py +27 -4
  16. nao_core/commands/sync/__init__.py +40 -21
  17. nao_core/commands/sync/accessors.py +218 -139
  18. nao_core/commands/sync/cleanup.py +133 -0
  19. nao_core/commands/sync/providers/__init__.py +30 -0
  20. nao_core/commands/sync/providers/base.py +87 -0
  21. nao_core/commands/sync/providers/databases/__init__.py +17 -0
  22. nao_core/commands/sync/providers/databases/bigquery.py +78 -0
  23. nao_core/commands/sync/providers/databases/databricks.py +79 -0
  24. nao_core/commands/sync/providers/databases/duckdb.py +83 -0
  25. nao_core/commands/sync/providers/databases/postgres.py +78 -0
  26. nao_core/commands/sync/providers/databases/provider.py +123 -0
  27. nao_core/commands/sync/providers/databases/snowflake.py +78 -0
  28. nao_core/commands/sync/providers/repositories/__init__.py +5 -0
  29. nao_core/commands/sync/{repositories.py → providers/repositories/provider.py} +43 -20
  30. nao_core/config/__init__.py +2 -0
  31. nao_core/config/base.py +23 -4
  32. nao_core/config/databases/__init__.py +5 -0
  33. nao_core/config/databases/base.py +1 -0
  34. nao_core/config/databases/postgres.py +78 -0
  35. nao_core/templates/__init__.py +12 -0
  36. nao_core/templates/defaults/databases/columns.md.j2 +23 -0
  37. nao_core/templates/defaults/databases/description.md.j2 +32 -0
  38. nao_core/templates/defaults/databases/preview.md.j2 +22 -0
  39. nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
  40. nao_core/templates/engine.py +133 -0
  41. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/METADATA +6 -2
  42. nao_core-0.0.31.dist-info/RECORD +86 -0
  43. nao_core/bin/public/assets/index-ClduEZSo.css +0 -1
  44. nao_core/commands/sync/databases.py +0 -374
  45. nao_core-0.0.30.dist-info/RECORD +0 -65
  46. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/WHEEL +0 -0
  47. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/entry_points.txt +0 -0
  48. {nao_core-0.0.30.dist-info → nao_core-0.0.31.dist-info}/licenses/LICENSE +0 -0
@@ -9,8 +9,8 @@
9
9
  <link rel="apple-touch-icon" href="/logo192.png" />
10
10
  <link rel="manifest" href="/manifest.json" />
11
11
  <title>nao — Chat with your data</title>
12
- <script type="module" crossorigin src="/assets/index-DhhS7iVA.js"></script>
13
- <link rel="stylesheet" crossorigin href="/assets/index-ClduEZSo.css">
12
+ <script type="module" crossorigin src="/assets/index-Mzo9bkag.js"></script>
13
+ <link rel="stylesheet" crossorigin href="/assets/index-BfHcd9Xz.css">
14
14
  </head>
15
15
  <body>
16
16
  <div id="app"></div>
nao_core/commands/chat.py CHANGED
@@ -106,18 +106,21 @@ def chat():
106
106
  """
107
107
  console.print("\n[bold cyan]💬 Starting nao chat...[/bold cyan]\n")
108
108
 
109
- binary_path = get_server_binary_path()
110
- bin_dir = binary_path.parent
111
-
112
- console.print(f"[dim]Server binary: {binary_path}[/dim]")
113
- console.print(f"[dim]Working directory: {bin_dir}[/dim]")
114
-
115
109
  # Try to load nao config from current directory
116
110
  config = NaoConfig.try_load()
117
111
  if config:
118
112
  console.print(f"[bold green]✓[/bold green] Loaded config from {Path.cwd() / 'nao_config.yaml'}")
119
113
  else:
120
- console.print("[dim]No nao_config.yaml found in current directory[/dim]")
114
+ console.print(
115
+ "[bold red]✗No nao_config.yaml found in current directory. Please move to a nao project directory.[/bold red]"
116
+ )
117
+ sys.exit(1)
118
+
119
+ binary_path = get_server_binary_path()
120
+ bin_dir = binary_path.parent
121
+
122
+ console.print(f"[dim]Server binary: {binary_path}[/dim]")
123
+ console.print(f"[dim]Working directory: {bin_dir}[/dim]")
121
124
 
122
125
  # Start the server processes
123
126
  chat_process = None
@@ -154,10 +157,9 @@ def chat():
154
157
  if config and config.slack:
155
158
  env["SLACK_BOT_TOKEN"] = config.slack.bot_token
156
159
  env["SLACK_SIGNING_SECRET"] = config.slack.signing_secret
157
- env["SLACK_POST_MESSAGE_URL"] = config.slack.post_message_url
158
160
  console.print("[bold green]✓[/bold green] Set Slack environment variables from config")
159
161
 
160
- env["NAO_PROJECT_FOLDER"] = str(Path.cwd())
162
+ env["NAO_DEFAULT_PROJECT_PATH"] = str(Path.cwd())
161
163
  env["FASTAPI_URL"] = f"http://localhost:{FASTAPI_PORT}"
162
164
 
163
165
  # Start the FastAPI server first
@@ -166,7 +168,6 @@ def chat():
166
168
 
167
169
  fastapi_process = subprocess.Popen(
168
170
  [sys.executable, str(fastapi_path)],
169
- cwd=str(fastapi_path.parent),
170
171
  env=env,
171
172
  stdout=subprocess.DEVNULL,
172
173
  stderr=subprocess.DEVNULL,
nao_core/commands/init.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ from dataclasses import dataclass
2
3
  from pathlib import Path
3
4
  from typing import Annotated
4
5
 
@@ -16,6 +17,7 @@ from nao_core.config import (
16
17
  LLMConfig,
17
18
  LLMProvider,
18
19
  NaoConfig,
20
+ PostgresConfig,
19
21
  SlackConfig,
20
22
  SnowflakeConfig,
21
23
  )
@@ -47,6 +49,12 @@ class EmptyApiKeyError(InitError):
47
49
  super().__init__("API key cannot be empty.")
48
50
 
49
51
 
52
+ @dataclass
53
+ class CreatedFile:
54
+ path: Path
55
+ content: str | None
56
+
57
+
50
58
  def setup_project_name(force: bool = False) -> tuple[str, Path]:
51
59
  """Setup the project name."""
52
60
  # Check if we're in a directory with an existing nao_config.yaml
@@ -101,6 +109,11 @@ def setup_snowflake() -> SnowflakeConfig:
101
109
  return SnowflakeConfig.promptConfig()
102
110
 
103
111
 
112
+ def setup_postgres() -> PostgresConfig:
113
+ """Setup a PostgreSQL database configuration."""
114
+ return PostgresConfig.promptConfig()
115
+
116
+
104
117
  def setup_databases() -> list[AnyDatabaseConfig]:
105
118
  """Setup database configurations."""
106
119
  databases: list[AnyDatabaseConfig] = []
@@ -124,6 +137,10 @@ def setup_databases() -> list[AnyDatabaseConfig]:
124
137
  db_config = setup_bigquery()
125
138
  databases.append(db_config)
126
139
  console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
140
+ elif db_type == DatabaseType.POSTGRES.value:
141
+ db_config = setup_postgres()
142
+ databases.append(db_config)
143
+ console.print(f"\n[bold green]✓[/bold green] Added database [cyan]{db_config.name}[/cyan]")
127
144
 
128
145
  elif db_type == DatabaseType.DUCKDB.value:
129
146
  db_config = setup_duckdb()
@@ -233,7 +250,7 @@ def setup_slack() -> SlackConfig | None:
233
250
  return slack_config
234
251
 
235
252
 
236
- def create_empty_structure(project_path: Path) -> tuple[list[str], list[str]]:
253
+ def create_empty_structure(project_path: Path) -> tuple[list[str], list[CreatedFile]]:
237
254
  """Create project folder structure to guide users.
238
255
 
239
256
  To add new folders, simply append them to the FOLDERS list below.
@@ -249,7 +266,10 @@ def create_empty_structure(project_path: Path) -> tuple[list[str], list[str]]:
249
266
  "agent/mcps",
250
267
  ]
251
268
 
252
- FILES = ["RULES.md"]
269
+ FILES = [
270
+ CreatedFile(path=Path("RULES.md"), content=None),
271
+ CreatedFile(path=Path(".naoignore"), content="templates/\n"),
272
+ ]
253
273
 
254
274
  created_folders = []
255
275
  for folder in FOLDERS:
@@ -259,8 +279,11 @@ def create_empty_structure(project_path: Path) -> tuple[list[str], list[str]]:
259
279
 
260
280
  created_files = []
261
281
  for file in FILES:
262
- file_path = project_path / file
263
- file_path.touch()
282
+ file_path = project_path / file.path
283
+ if file.content:
284
+ file_path.write_text(file.content)
285
+ else:
286
+ file_path.touch()
264
287
  created_files.append(file)
265
288
 
266
289
  return created_folders, created_files
@@ -7,50 +7,69 @@ from rich.console import Console
7
7
 
8
8
  from nao_core.config import NaoConfig
9
9
 
10
- from .databases import sync_databases
11
- from .repositories import sync_repositories
10
+ from .providers import SyncProvider, SyncResult, get_all_providers
12
11
 
13
12
  console = Console()
14
13
 
15
14
 
16
- def sync(output_dir: str = "databases", repos_dir: str = "repos"):
17
- """Sync repositories and database schemas to local files.
15
+ def sync(
16
+ output_dirs: dict[str, str] | None = None,
17
+ providers: list[SyncProvider] | None = None,
18
+ ):
19
+ """Sync resources using configured providers.
18
20
 
19
- Creates folder structures:
21
+ Creates folder structures based on each provider's default output directory:
20
22
  - repos/<repo_name>/ (git repositories)
21
- - databases/bigquery/<connection>/<dataset>/<table>/*.md (database schemas)
23
+ - databases/<type>/<connection>/<dataset>/<table>/*.md (database schemas)
22
24
 
23
25
  Args:
24
- output_dir: Output directory for database schemas (default: "databases")
25
- repos_dir: Output directory for repositories (default: "repos")
26
+ output_dirs: Optional dict mapping provider names to custom output directories.
27
+ If not specified, uses each provider's default_output_dir.
28
+ providers: Optional list of providers to use. If not specified, uses all
29
+ registered providers.
26
30
  """
27
31
  console.print("\n[bold cyan]🔄 nao sync[/bold cyan]\n")
28
32
 
29
33
  config = NaoConfig.try_load()
30
- if not config:
34
+ if config is None:
31
35
  console.print("[bold red]✗[/bold red] No nao_config.yaml found in current directory")
32
36
  console.print("[dim]Run 'nao init' to create a configuration file[/dim]")
33
37
  sys.exit(1)
34
38
 
39
+ # Get project path (current working directory after NaoConfig.try_load)
40
+ project_path = Path.cwd()
41
+
35
42
  console.print(f"[dim]Project:[/dim] {config.project_name}")
36
43
 
37
- repos_synced = 0
38
- if config.repos:
39
- repos_path = Path(repos_dir)
40
- repos_synced = sync_repositories(config.repos, repos_path)
44
+ # Use provided providers or default to all registered providers
45
+ active_providers = providers if providers is not None else get_all_providers()
46
+ output_dirs = output_dirs or {}
41
47
 
42
- db_path = Path(output_dir)
43
- datasets_synced, tables_synced = sync_databases(config.databases, db_path)
48
+ # Run each provider
49
+ results: list[SyncResult] = []
50
+ for provider in active_providers:
51
+ if config is None or not provider.should_sync(config):
52
+ continue
44
53
 
45
- console.print("\n[bold green]✓ Sync Complete[/bold green]\n")
54
+ # Get output directory (custom or default)
55
+ output_dir = output_dirs.get(provider.name, provider.default_output_dir)
56
+ output_path = Path(output_dir)
46
57
 
47
- if repos_synced > 0:
48
- console.print(f" [dim]Repositories:[/dim] {repos_synced} synced")
58
+ # Get items and sync
59
+ items = provider.get_items(config)
60
+ result = provider.sync(items, output_path, project_path=project_path)
61
+ results.append(result)
62
+
63
+ # Print summary
64
+ console.print("\n[bold green]✓ Sync Complete[/bold green]\n")
49
65
 
50
- if tables_synced > 0:
51
- console.print(f" [dim]Databases:[/dim] {tables_synced} tables across {datasets_synced} datasets")
66
+ has_results = False
67
+ for result in results:
68
+ if result.items_synced > 0:
69
+ has_results = True
70
+ console.print(f" [dim]{result.provider_name}:[/dim] {result.get_summary()}")
52
71
 
53
- if repos_synced == 0 and tables_synced == 0:
72
+ if not has_results:
54
73
  console.print(" [dim]Nothing to sync[/dim]")
55
74
 
56
75
  console.print()
@@ -1,13 +1,28 @@
1
1
  """Data accessor classes for generating markdown documentation from database tables."""
2
2
 
3
- import json
4
3
  from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import Any
5
6
 
6
7
  from ibis import BaseBackend
7
8
 
9
+ from nao_core.templates import get_template_engine
10
+
8
11
 
9
12
  class DataAccessor(ABC):
10
- """Base class for data accessors that generate markdown files for tables."""
13
+ """Base class for data accessors that generate markdown files for tables.
14
+
15
+ Accessors use Jinja2 templates for generating output. Default templates
16
+ are shipped with nao and can be overridden by users by placing templates
17
+ with the same name in their project's `templates/` directory.
18
+
19
+ Example:
20
+ To override the preview template, create:
21
+ `<project_root>/templates/databases/preview.md.j2`
22
+ """
23
+
24
+ # Path to the nao project root (set by sync provider)
25
+ _project_path: Path | None = None
11
26
 
12
27
  @property
13
28
  @abstractmethod
@@ -15,24 +30,57 @@ class DataAccessor(ABC):
15
30
  """The filename this accessor writes to (e.g., 'columns.md')."""
16
31
  ...
17
32
 
33
+ @property
18
34
  @abstractmethod
19
- def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
20
- """Generate the markdown content for a table.
35
+ def template_name(self) -> str:
36
+ """The template file to use (e.g., 'databases/columns.md.j2')."""
37
+ ...
38
+
39
+ @abstractmethod
40
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
41
+ """Get the template context for rendering.
21
42
 
22
43
  Args:
23
- conn: The Ibis database connection
24
- dataset: The dataset/schema name
25
- table: The table name
44
+ conn: The Ibis database connection
45
+ dataset: The dataset/schema name
46
+ table: The table name
26
47
 
27
48
  Returns:
28
- Markdown string content
49
+ Dictionary of variables to pass to the template
29
50
  """
30
51
  ...
31
52
 
53
+ def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
54
+ """Generate the markdown content for a table using templates.
55
+
56
+ Args:
57
+ conn: The Ibis database connection
58
+ dataset: The dataset/schema name
59
+ table: The table name
60
+
61
+ Returns:
62
+ Markdown string content
63
+ """
64
+ try:
65
+ context = self.get_context(conn, dataset, table)
66
+ engine = get_template_engine(self._project_path)
67
+ return engine.render(self.template_name, **context)
68
+ except Exception as e:
69
+ return f"# {table}\n\nError generating content: {e}"
70
+
32
71
  def get_table(self, conn: BaseBackend, dataset: str, table: str):
33
72
  """Helper to get an Ibis table reference."""
34
73
  return conn.table(table, database=dataset)
35
74
 
75
+ @classmethod
76
+ def set_project_path(cls, path: Path | None) -> None:
77
+ """Set the project path for template resolution.
78
+
79
+ Args:
80
+ path: Path to the nao project root
81
+ """
82
+ cls._project_path = path
83
+
36
84
 
37
85
  def truncate_middle(text: str, max_length: int) -> str:
38
86
  """Truncate text in the middle if it exceeds max_length."""
@@ -43,7 +91,14 @@ def truncate_middle(text: str, max_length: int) -> str:
43
91
 
44
92
 
45
93
  class ColumnsAccessor(DataAccessor):
46
- """Generates columns.md with column names, types, and nullable info."""
94
+ """Generates columns.md with column names, types, and nullable info.
95
+
96
+ Template variables:
97
+ - table_name: Name of the table
98
+ - dataset: Schema/dataset name
99
+ - columns: List of dicts with 'name', 'type', 'nullable', 'description'
100
+ - column_count: Total number of columns
101
+ """
47
102
 
48
103
  def __init__(self, max_description_length: int = 256):
49
104
  self.max_description_length = max_description_length
@@ -52,37 +107,43 @@ class ColumnsAccessor(DataAccessor):
52
107
  def filename(self) -> str:
53
108
  return "columns.md"
54
109
 
55
- def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
56
- try:
57
- t = self.get_table(conn, dataset, table)
58
- schema = t.schema()
59
- columns = list(schema.items())
60
-
61
- lines = [
62
- f"# {table}",
63
- "",
64
- f"**Dataset:** `{dataset}`",
65
- "",
66
- f"## Columns ({len(columns)})",
67
- "",
68
- ]
69
-
70
- for name, dtype in columns:
71
- description = None
72
- parts = [str(dtype)]
73
- if description:
74
- truncated = truncate_middle(description, self.max_description_length)
75
- parts.append(f'"{truncated}"')
76
- lines.append(f"- {name} ({', '.join(parts)})")
77
-
78
- return "\n".join(lines)
79
- except Exception as e:
80
- print(e)
81
- return f"# {table}\n\nError fetching schema: {e}"
110
+ @property
111
+ def template_name(self) -> str:
112
+ return "databases/columns.md.j2"
113
+
114
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
115
+ t = self.get_table(conn, dataset, table)
116
+ schema = t.schema()
117
+
118
+ columns = []
119
+ for name, dtype in schema.items():
120
+ columns.append(
121
+ {
122
+ "name": name,
123
+ "type": str(dtype),
124
+ "nullable": dtype.nullable if hasattr(dtype, "nullable") else True,
125
+ "description": None, # Could be populated from metadata
126
+ }
127
+ )
128
+
129
+ return {
130
+ "table_name": table,
131
+ "dataset": dataset,
132
+ "columns": columns,
133
+ "column_count": len(columns),
134
+ }
82
135
 
83
136
 
84
137
  class PreviewAccessor(DataAccessor):
85
- """Generates preview.md with the first N rows of data as JSONL."""
138
+ """Generates preview.md with the first N rows of data as JSONL.
139
+
140
+ Template variables:
141
+ - table_name: Name of the table
142
+ - dataset: Schema/dataset name
143
+ - rows: List of row dictionaries
144
+ - row_count: Number of preview rows
145
+ - columns: List of column info dicts
146
+ """
86
147
 
87
148
  def __init__(self, num_rows: int = 10):
88
149
  self.num_rows = num_rows
@@ -91,121 +152,139 @@ class PreviewAccessor(DataAccessor):
91
152
  def filename(self) -> str:
92
153
  return "preview.md"
93
154
 
94
- def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
95
- try:
96
- t = self.get_table(conn, dataset, table)
97
- preview_df = t.limit(self.num_rows).execute()
98
-
99
- lines = [
100
- f"# {table} - Preview",
101
- "",
102
- f"**Dataset:** `{dataset}`",
103
- "",
104
- f"## Rows ({len(preview_df)})",
105
- "",
106
- ]
107
-
108
- for _, row in preview_df.iterrows():
109
- row_dict = row.to_dict()
110
- # Convert non-serializable types to strings
111
- for key, val in row_dict.items():
112
- if val is not None and not isinstance(val, (str, int, float, bool, list, dict)):
113
- row_dict[key] = str(val)
114
- lines.append(f"- {json.dumps(row_dict)}")
115
-
116
- return "\n".join(lines)
117
- except Exception as e:
118
- return f"# {table} - Preview\n\nError fetching preview: {e}"
155
+ @property
156
+ def template_name(self) -> str:
157
+ return "databases/preview.md.j2"
158
+
159
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
160
+ t = self.get_table(conn, dataset, table)
161
+ schema = t.schema()
162
+ preview_df = t.limit(self.num_rows).execute()
163
+
164
+ rows = []
165
+ for _, row in preview_df.iterrows():
166
+ row_dict = row.to_dict()
167
+ # Convert non-serializable types to strings
168
+ for key, val in row_dict.items():
169
+ if val is not None and not isinstance(val, (str, int, float, bool, list, dict)):
170
+ row_dict[key] = str(val)
171
+ rows.append(row_dict)
172
+
173
+ columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
174
+
175
+ return {
176
+ "table_name": table,
177
+ "dataset": dataset,
178
+ "rows": rows,
179
+ "row_count": len(rows),
180
+ "columns": columns,
181
+ }
119
182
 
120
183
 
121
184
  class DescriptionAccessor(DataAccessor):
122
- """Generates description.md with table metadata (row count, column count, etc.)."""
185
+ """Generates description.md with table metadata (row count, column count, etc.).
186
+
187
+ Template variables:
188
+ - table_name: Name of the table
189
+ - dataset: Schema/dataset name
190
+ - row_count: Total rows in the table
191
+ - column_count: Number of columns
192
+ - description: Table description (if available)
193
+ - columns: List of column info dicts
194
+ """
123
195
 
124
196
  @property
125
197
  def filename(self) -> str:
126
198
  return "description.md"
127
199
 
128
- def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
129
- try:
130
- t = self.get_table(conn, dataset, table)
131
- schema = t.schema()
132
-
133
- row_count = t.count().execute()
134
- col_count = len(schema)
135
-
136
- lines = [
137
- f"# {table}",
138
- "",
139
- f"**Dataset:** `{dataset}`",
140
- "",
141
- "## Table Metadata",
142
- "",
143
- "| Property | Value |",
144
- "|----------|-------|",
145
- f"| **Row Count** | {row_count:,} |",
146
- f"| **Column Count** | {col_count} |",
147
- "",
148
- "## Description",
149
- "",
150
- "_No description available._",
151
- "",
152
- ]
153
-
154
- return "\n".join(lines)
155
- except Exception as e:
156
- return f"# {table}\n\nError fetching description: {e}"
200
+ @property
201
+ def template_name(self) -> str:
202
+ return "databases/description.md.j2"
203
+
204
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
205
+ t = self.get_table(conn, dataset, table)
206
+ schema = t.schema()
207
+
208
+ row_count = t.count().execute()
209
+ columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
210
+
211
+ return {
212
+ "table_name": table,
213
+ "dataset": dataset,
214
+ "row_count": row_count,
215
+ "column_count": len(schema),
216
+ "description": None, # Could be populated from metadata
217
+ "columns": columns,
218
+ }
157
219
 
158
220
 
159
221
  class ProfilingAccessor(DataAccessor):
160
- """Generates profiling.md with column statistics and data profiling."""
222
+ """Generates profiling.md with column statistics and data profiling.
223
+
224
+ Template variables:
225
+ - table_name: Name of the table
226
+ - dataset: Schema/dataset name
227
+ - column_stats: List of dicts with stats for each column:
228
+ - name: Column name
229
+ - type: Data type
230
+ - null_count: Number of nulls
231
+ - unique_count: Number of unique values
232
+ - min_value: Min value (numeric/temporal)
233
+ - max_value: Max value (numeric/temporal)
234
+ - error: Error message if stats couldn't be computed
235
+ - columns: List of column info dicts
236
+ """
161
237
 
162
238
  @property
163
239
  def filename(self) -> str:
164
240
  return "profiling.md"
165
241
 
166
- def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
167
- try:
168
- t = self.get_table(conn, dataset, table)
169
- schema = t.schema()
170
-
171
- lines = [
172
- f"# {table} - Profiling",
173
- "",
174
- f"**Dataset:** `{dataset}`",
175
- "",
176
- "## Column Statistics",
177
- "",
178
- "| Column | Type | Nulls | Unique | Min | Max |",
179
- "|--------|------|-------|--------|-----|-----|",
180
- ]
181
-
182
- for name, dtype in schema.items():
183
- col = t[name]
184
- dtype_str = str(dtype)
185
-
186
- try:
187
- null_count = t.filter(col.isnull()).count().execute()
188
- unique_count = col.nunique().execute()
189
-
190
- min_val = ""
191
- max_val = ""
192
- if dtype.is_numeric() or dtype.is_temporal():
193
- try:
194
- min_val = str(col.min().execute())
195
- max_val = str(col.max().execute())
196
- if len(min_val) > 20:
197
- min_val = min_val[:17] + "..."
198
- if len(max_val) > 20:
199
- max_val = max_val[:17] + "..."
200
- except Exception:
201
- pass
202
-
203
- lines.append(
204
- f"| `{name}` | `{dtype_str}` | {null_count:,} | {unique_count:,} | {min_val} | {max_val} |"
205
- )
206
- except Exception as col_error:
207
- lines.append(f"| `{name}` | `{dtype_str}` | Error: {col_error} | | | |")
208
-
209
- return "\n".join(lines)
210
- except Exception as e:
211
- return f"# {table} - Profiling\n\nError fetching profiling: {e}"
242
+ @property
243
+ def template_name(self) -> str:
244
+ return "databases/profiling.md.j2"
245
+
246
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
247
+ t = self.get_table(conn, dataset, table)
248
+ schema = t.schema()
249
+
250
+ column_stats = []
251
+ columns = []
252
+
253
+ for name, dtype in schema.items():
254
+ columns.append({"name": name, "type": str(dtype)})
255
+ col = t[name]
256
+ dtype_str = str(dtype)
257
+
258
+ stat = {
259
+ "name": name,
260
+ "type": dtype_str,
261
+ "null_count": 0,
262
+ "unique_count": 0,
263
+ "min_value": None,
264
+ "max_value": None,
265
+ "error": None,
266
+ }
267
+
268
+ try:
269
+ stat["null_count"] = t.filter(col.isnull()).count().execute()
270
+ stat["unique_count"] = col.nunique().execute()
271
+
272
+ if dtype.is_numeric() or dtype.is_temporal():
273
+ try:
274
+ min_val = str(col.min().execute())
275
+ max_val = str(col.max().execute())
276
+ stat["min_value"] = truncate_middle(min_val, 20)
277
+ stat["max_value"] = truncate_middle(max_val, 20)
278
+ except Exception:
279
+ pass
280
+ except Exception as col_error:
281
+ stat["error"] = str(col_error)
282
+
283
+ column_stats.append(stat)
284
+
285
+ return {
286
+ "table_name": table,
287
+ "dataset": dataset,
288
+ "column_stats": column_stats,
289
+ "columns": columns,
290
+ }