nao-core 0.0.38__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. nao_core/__init__.py +2 -0
  2. nao_core/__init__.py.bak +2 -0
  3. nao_core/bin/build-info.json +5 -0
  4. nao_core/bin/fastapi/main.py +268 -0
  5. nao_core/bin/fastapi/test_main.py +156 -0
  6. nao_core/bin/migrations-postgres/0000_user_auth_and_chat_tables.sql +98 -0
  7. nao_core/bin/migrations-postgres/0001_message_feedback.sql +9 -0
  8. nao_core/bin/migrations-postgres/0002_chat_message_stop_reason_and_error_message.sql +2 -0
  9. nao_core/bin/migrations-postgres/0003_handle_slack_with_thread.sql +2 -0
  10. nao_core/bin/migrations-postgres/0004_input_and_output_tokens.sql +8 -0
  11. nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
  12. nao_core/bin/migrations-postgres/0006_llm_model_ids.sql +4 -0
  13. nao_core/bin/migrations-postgres/0007_chat_message_llm_info.sql +2 -0
  14. nao_core/bin/migrations-postgres/meta/0000_snapshot.json +707 -0
  15. nao_core/bin/migrations-postgres/meta/0001_snapshot.json +766 -0
  16. nao_core/bin/migrations-postgres/meta/0002_snapshot.json +778 -0
  17. nao_core/bin/migrations-postgres/meta/0003_snapshot.json +799 -0
  18. nao_core/bin/migrations-postgres/meta/0004_snapshot.json +847 -0
  19. nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
  20. nao_core/bin/migrations-postgres/meta/0006_snapshot.json +1141 -0
  21. nao_core/bin/migrations-postgres/meta/_journal.json +62 -0
  22. nao_core/bin/migrations-sqlite/0000_user_auth_and_chat_tables.sql +98 -0
  23. nao_core/bin/migrations-sqlite/0001_message_feedback.sql +8 -0
  24. nao_core/bin/migrations-sqlite/0002_chat_message_stop_reason_and_error_message.sql +2 -0
  25. nao_core/bin/migrations-sqlite/0003_handle_slack_with_thread.sql +2 -0
  26. nao_core/bin/migrations-sqlite/0004_input_and_output_tokens.sql +8 -0
  27. nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
  28. nao_core/bin/migrations-sqlite/0006_llm_model_ids.sql +4 -0
  29. nao_core/bin/migrations-sqlite/0007_chat_message_llm_info.sql +2 -0
  30. nao_core/bin/migrations-sqlite/meta/0000_snapshot.json +674 -0
  31. nao_core/bin/migrations-sqlite/meta/0001_snapshot.json +735 -0
  32. nao_core/bin/migrations-sqlite/meta/0002_snapshot.json +749 -0
  33. nao_core/bin/migrations-sqlite/meta/0003_snapshot.json +763 -0
  34. nao_core/bin/migrations-sqlite/meta/0004_snapshot.json +819 -0
  35. nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
  36. nao_core/bin/migrations-sqlite/meta/0006_snapshot.json +1100 -0
  37. nao_core/bin/migrations-sqlite/meta/_journal.json +62 -0
  38. nao_core/bin/nao-chat-server +0 -0
  39. nao_core/bin/public/assets/code-block-F6WJLWQG-CV0uOmNJ.js +153 -0
  40. nao_core/bin/public/assets/index-DcbndLHo.css +1 -0
  41. nao_core/bin/public/assets/index-t1hZI3nl.js +560 -0
  42. nao_core/bin/public/favicon.ico +0 -0
  43. nao_core/bin/public/index.html +18 -0
  44. nao_core/bin/rg +0 -0
  45. nao_core/commands/__init__.py +6 -0
  46. nao_core/commands/chat.py +225 -0
  47. nao_core/commands/debug.py +158 -0
  48. nao_core/commands/init.py +358 -0
  49. nao_core/commands/sync/__init__.py +124 -0
  50. nao_core/commands/sync/accessors.py +290 -0
  51. nao_core/commands/sync/cleanup.py +156 -0
  52. nao_core/commands/sync/providers/__init__.py +32 -0
  53. nao_core/commands/sync/providers/base.py +113 -0
  54. nao_core/commands/sync/providers/databases/__init__.py +17 -0
  55. nao_core/commands/sync/providers/databases/bigquery.py +79 -0
  56. nao_core/commands/sync/providers/databases/databricks.py +79 -0
  57. nao_core/commands/sync/providers/databases/duckdb.py +78 -0
  58. nao_core/commands/sync/providers/databases/postgres.py +79 -0
  59. nao_core/commands/sync/providers/databases/provider.py +129 -0
  60. nao_core/commands/sync/providers/databases/snowflake.py +79 -0
  61. nao_core/commands/sync/providers/notion/__init__.py +5 -0
  62. nao_core/commands/sync/providers/notion/provider.py +205 -0
  63. nao_core/commands/sync/providers/repositories/__init__.py +5 -0
  64. nao_core/commands/sync/providers/repositories/provider.py +134 -0
  65. nao_core/commands/sync/registry.py +23 -0
  66. nao_core/config/__init__.py +30 -0
  67. nao_core/config/base.py +100 -0
  68. nao_core/config/databases/__init__.py +55 -0
  69. nao_core/config/databases/base.py +85 -0
  70. nao_core/config/databases/bigquery.py +99 -0
  71. nao_core/config/databases/databricks.py +79 -0
  72. nao_core/config/databases/duckdb.py +41 -0
  73. nao_core/config/databases/postgres.py +83 -0
  74. nao_core/config/databases/snowflake.py +125 -0
  75. nao_core/config/exceptions.py +7 -0
  76. nao_core/config/llm/__init__.py +19 -0
  77. nao_core/config/notion/__init__.py +8 -0
  78. nao_core/config/repos/__init__.py +3 -0
  79. nao_core/config/repos/base.py +11 -0
  80. nao_core/config/slack/__init__.py +12 -0
  81. nao_core/context/__init__.py +54 -0
  82. nao_core/context/base.py +57 -0
  83. nao_core/context/git.py +177 -0
  84. nao_core/context/local.py +59 -0
  85. nao_core/main.py +13 -0
  86. nao_core/templates/__init__.py +41 -0
  87. nao_core/templates/context.py +193 -0
  88. nao_core/templates/defaults/databases/columns.md.j2 +23 -0
  89. nao_core/templates/defaults/databases/description.md.j2 +32 -0
  90. nao_core/templates/defaults/databases/preview.md.j2 +22 -0
  91. nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
  92. nao_core/templates/engine.py +133 -0
  93. nao_core/templates/render.py +196 -0
  94. nao_core-0.0.38.dist-info/METADATA +150 -0
  95. nao_core-0.0.38.dist-info/RECORD +98 -0
  96. nao_core-0.0.38.dist-info/WHEEL +4 -0
  97. nao_core-0.0.38.dist-info/entry_points.txt +2 -0
  98. nao_core-0.0.38.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,290 @@
1
+ """Data accessor classes for generating markdown documentation from database tables."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from ibis import BaseBackend
8
+
9
+ from nao_core.templates import get_template_engine
10
+
11
+
12
+ class DataAccessor(ABC):
13
+ """Base class for data accessors that generate markdown files for tables.
14
+
15
+ Accessors use Jinja2 templates for generating output. Default templates
16
+ are shipped with nao and can be overridden by users by placing templates
17
+ with the same name in their project's `templates/` directory.
18
+
19
+ Example:
20
+ To override the preview template, create:
21
+ `<project_root>/templates/databases/preview.md.j2`
22
+ """
23
+
24
+ # Path to the nao project root (set by sync provider)
25
+ _project_path: Path | None = None
26
+
27
+ @property
28
+ @abstractmethod
29
+ def filename(self) -> str:
30
+ """The filename this accessor writes to (e.g., 'columns.md')."""
31
+ ...
32
+
33
+ @property
34
+ @abstractmethod
35
+ def template_name(self) -> str:
36
+ """The template file to use (e.g., 'databases/columns.md.j2')."""
37
+ ...
38
+
39
+ @abstractmethod
40
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
41
+ """Get the template context for rendering.
42
+
43
+ Args:
44
+ conn: The Ibis database connection
45
+ dataset: The dataset/schema name
46
+ table: The table name
47
+
48
+ Returns:
49
+ Dictionary of variables to pass to the template
50
+ """
51
+ ...
52
+
53
+ def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
54
+ """Generate the markdown content for a table using templates.
55
+
56
+ Args:
57
+ conn: The Ibis database connection
58
+ dataset: The dataset/schema name
59
+ table: The table name
60
+
61
+ Returns:
62
+ Markdown string content
63
+ """
64
+ try:
65
+ context = self.get_context(conn, dataset, table)
66
+ engine = get_template_engine(self._project_path)
67
+ return engine.render(self.template_name, **context)
68
+ except Exception as e:
69
+ return f"# {table}\n\nError generating content: {e}"
70
+
71
+ def get_table(self, conn: BaseBackend, dataset: str, table: str):
72
+ """Helper to get an Ibis table reference."""
73
+ return conn.table(table, database=dataset)
74
+
75
+ @classmethod
76
+ def set_project_path(cls, path: Path | None) -> None:
77
+ """Set the project path for template resolution.
78
+
79
+ Args:
80
+ path: Path to the nao project root
81
+ """
82
+ cls._project_path = path
83
+
84
+
85
+ def truncate_middle(text: str, max_length: int) -> str:
86
+ """Truncate text in the middle if it exceeds max_length."""
87
+ if len(text) <= max_length:
88
+ return text
89
+ half = (max_length - 3) // 2
90
+ return text[:half] + "..." + text[-half:]
91
+
92
+
93
+ class ColumnsAccessor(DataAccessor):
94
+ """Generates columns.md with column names, types, and nullable info.
95
+
96
+ Template variables:
97
+ - table_name: Name of the table
98
+ - dataset: Schema/dataset name
99
+ - columns: List of dicts with 'name', 'type', 'nullable', 'description'
100
+ - column_count: Total number of columns
101
+ """
102
+
103
+ def __init__(self, max_description_length: int = 256):
104
+ self.max_description_length = max_description_length
105
+
106
+ @property
107
+ def filename(self) -> str:
108
+ return "columns.md"
109
+
110
+ @property
111
+ def template_name(self) -> str:
112
+ return "databases/columns.md.j2"
113
+
114
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
115
+ t = self.get_table(conn, dataset, table)
116
+ schema = t.schema()
117
+
118
+ columns = []
119
+ for name, dtype in schema.items():
120
+ columns.append(
121
+ {
122
+ "name": name,
123
+ "type": str(dtype),
124
+ "nullable": dtype.nullable if hasattr(dtype, "nullable") else True,
125
+ "description": None, # Could be populated from metadata
126
+ }
127
+ )
128
+
129
+ return {
130
+ "table_name": table,
131
+ "dataset": dataset,
132
+ "columns": columns,
133
+ "column_count": len(columns),
134
+ }
135
+
136
+
137
+ class PreviewAccessor(DataAccessor):
138
+ """Generates preview.md with the first N rows of data as JSONL.
139
+
140
+ Template variables:
141
+ - table_name: Name of the table
142
+ - dataset: Schema/dataset name
143
+ - rows: List of row dictionaries
144
+ - row_count: Number of preview rows
145
+ - columns: List of column info dicts
146
+ """
147
+
148
+ def __init__(self, num_rows: int = 10):
149
+ self.num_rows = num_rows
150
+
151
+ @property
152
+ def filename(self) -> str:
153
+ return "preview.md"
154
+
155
+ @property
156
+ def template_name(self) -> str:
157
+ return "databases/preview.md.j2"
158
+
159
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
160
+ t = self.get_table(conn, dataset, table)
161
+ schema = t.schema()
162
+ preview_df = t.limit(self.num_rows).execute()
163
+
164
+ rows = []
165
+ for _, row in preview_df.iterrows():
166
+ row_dict = row.to_dict()
167
+ # Convert non-serializable types to strings
168
+ for key, val in row_dict.items():
169
+ if val is not None and not isinstance(val, (str, int, float, bool, list, dict)):
170
+ row_dict[key] = str(val)
171
+ rows.append(row_dict)
172
+
173
+ columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
174
+
175
+ return {
176
+ "table_name": table,
177
+ "dataset": dataset,
178
+ "rows": rows,
179
+ "row_count": len(rows),
180
+ "columns": columns,
181
+ }
182
+
183
+
184
+ class DescriptionAccessor(DataAccessor):
185
+ """Generates description.md with table metadata (row count, column count, etc.).
186
+
187
+ Template variables:
188
+ - table_name: Name of the table
189
+ - dataset: Schema/dataset name
190
+ - row_count: Total rows in the table
191
+ - column_count: Number of columns
192
+ - description: Table description (if available)
193
+ - columns: List of column info dicts
194
+ """
195
+
196
+ @property
197
+ def filename(self) -> str:
198
+ return "description.md"
199
+
200
+ @property
201
+ def template_name(self) -> str:
202
+ return "databases/description.md.j2"
203
+
204
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
205
+ t = self.get_table(conn, dataset, table)
206
+ schema = t.schema()
207
+
208
+ row_count = t.count().execute()
209
+ columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
210
+
211
+ return {
212
+ "table_name": table,
213
+ "dataset": dataset,
214
+ "row_count": row_count,
215
+ "column_count": len(schema),
216
+ "description": None, # Could be populated from metadata
217
+ "columns": columns,
218
+ }
219
+
220
+
221
+ class ProfilingAccessor(DataAccessor):
222
+ """Generates profiling.md with column statistics and data profiling.
223
+
224
+ Template variables:
225
+ - table_name: Name of the table
226
+ - dataset: Schema/dataset name
227
+ - column_stats: List of dicts with stats for each column:
228
+ - name: Column name
229
+ - type: Data type
230
+ - null_count: Number of nulls
231
+ - unique_count: Number of unique values
232
+ - min_value: Min value (numeric/temporal)
233
+ - max_value: Max value (numeric/temporal)
234
+ - error: Error message if stats couldn't be computed
235
+ - columns: List of column info dicts
236
+ """
237
+
238
+ @property
239
+ def filename(self) -> str:
240
+ return "profiling.md"
241
+
242
+ @property
243
+ def template_name(self) -> str:
244
+ return "databases/profiling.md.j2"
245
+
246
+ def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
247
+ t = self.get_table(conn, dataset, table)
248
+ schema = t.schema()
249
+
250
+ column_stats = []
251
+ columns = []
252
+
253
+ for name, dtype in schema.items():
254
+ columns.append({"name": name, "type": str(dtype)})
255
+ col = t[name]
256
+ dtype_str = str(dtype)
257
+
258
+ stat = {
259
+ "name": name,
260
+ "type": dtype_str,
261
+ "null_count": 0,
262
+ "unique_count": 0,
263
+ "min_value": None,
264
+ "max_value": None,
265
+ "error": None,
266
+ }
267
+
268
+ try:
269
+ stat["null_count"] = t.filter(col.isnull()).count().execute()
270
+ stat["unique_count"] = col.nunique().execute()
271
+
272
+ if dtype.is_numeric() or dtype.is_temporal():
273
+ try:
274
+ min_val = str(col.min().execute())
275
+ max_val = str(col.max().execute())
276
+ stat["min_value"] = truncate_middle(min_val, 20)
277
+ stat["max_value"] = truncate_middle(max_val, 20)
278
+ except Exception:
279
+ pass
280
+ except Exception as col_error:
281
+ stat["error"] = str(col_error)
282
+
283
+ column_stats.append(stat)
284
+
285
+ return {
286
+ "table_name": table,
287
+ "dataset": dataset,
288
+ "column_stats": column_stats,
289
+ "columns": columns,
290
+ }
@@ -0,0 +1,156 @@
1
+ """Cleanup utilities for removing stale sync files."""
2
+
3
+ import shutil
4
+ from collections import defaultdict
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Dict, List
8
+
9
+ from rich.console import Console
10
+
11
+ console = Console()
12
+
13
+
14
+ @dataclass
15
+ class DatabaseSyncState:
16
+ """Tracks the state of a database sync operation.
17
+
18
+ Used to track which paths were synced so stale paths can be cleaned up.
19
+ """
20
+
21
+ db_path: Path
22
+ """The root path for this database (e.g., databases/type=duckdb/database=mydb)"""
23
+
24
+ synced_schemas: set[str] = field(default_factory=set)
25
+ """Set of schema names that were synced"""
26
+
27
+ synced_tables: dict[str, set[str]] = field(default_factory=dict)
28
+ """Dict mapping schema names to sets of table names that were synced"""
29
+
30
+ schemas_synced: int = 0
31
+ """Count of schemas synced"""
32
+
33
+ tables_synced: int = 0
34
+ """Count of tables synced"""
35
+
36
+ def add_table(self, schema: str, table: str) -> None:
37
+ """Record that a table was synced.
38
+
39
+ Args:
40
+ schema: The schema/dataset name
41
+ table: The table name
42
+ """
43
+ self.synced_schemas.add(schema)
44
+ if schema not in self.synced_tables:
45
+ self.synced_tables[schema] = set()
46
+ self.synced_tables[schema].add(table)
47
+ self.tables_synced += 1
48
+
49
+ def add_schema(self, schema: str) -> None:
50
+ """Record that a schema was synced (even if empty).
51
+
52
+ Args:
53
+ schema: The schema/dataset name
54
+ """
55
+ self.synced_schemas.add(schema)
56
+ self.schemas_synced += 1
57
+
58
+
59
+ def cleanup_stale_paths(state: DatabaseSyncState, verbose: bool = False) -> int:
60
+ """Remove directories that exist on disk but weren't synced.
61
+
62
+ This function cleans up:
63
+ - Table directories that no longer exist in the source
64
+ - Schema directories that no longer exist or have no tables
65
+
66
+ Args:
67
+ state: The sync state tracking what was synced
68
+ verbose: Whether to print cleanup messages
69
+
70
+ Returns:
71
+ Number of stale paths removed
72
+ """
73
+ removed_count = 0
74
+
75
+ if not state.db_path.exists():
76
+ return 0
77
+
78
+ # Find all existing schema directories
79
+ existing_schemas = {
80
+ d.name.replace("schema=", ""): d for d in state.db_path.iterdir() if d.is_dir() and d.name.startswith("schema=")
81
+ }
82
+
83
+ # Remove schemas that weren't synced
84
+ for schema_name, schema_path in existing_schemas.items():
85
+ if schema_name not in state.synced_schemas:
86
+ if verbose:
87
+ console.print(f" [dim red]removing stale schema:[/dim red] {schema_name}")
88
+ shutil.rmtree(schema_path)
89
+ removed_count += 1
90
+ continue
91
+
92
+ # Find existing tables in this schema
93
+ existing_tables = {
94
+ d.name.replace("table=", ""): d for d in schema_path.iterdir() if d.is_dir() and d.name.startswith("table=")
95
+ }
96
+
97
+ synced_tables_for_schema = state.synced_tables.get(schema_name, set())
98
+
99
+ # Remove tables that weren't synced
100
+ for table_name, table_path in existing_tables.items():
101
+ if table_name not in synced_tables_for_schema:
102
+ if verbose:
103
+ console.print(f" [dim red]removing stale table:[/dim red] {schema_name}.{table_name}")
104
+ shutil.rmtree(table_path)
105
+ removed_count += 1
106
+
107
+ return removed_count
108
+
109
+
110
+ def cleanup_stale_databases(active_databases: List, base_path: Path, verbose: bool = False):
111
+ """Remove databases that are not present in the config file."""
112
+
113
+ valid_db_folders_by_type: Dict[str, set] = defaultdict(set)
114
+
115
+ for db in active_databases:
116
+ type_folder = f"type={db.type}"
117
+ db_identifier = db.get_database_name()
118
+ db_folder = f"database={db_identifier}"
119
+
120
+ valid_db_folders_by_type[type_folder].add(db_folder)
121
+
122
+ for type_dir in base_path.iterdir():
123
+ if not type_dir.is_dir():
124
+ continue
125
+
126
+ type_folder_name = type_dir.name
127
+
128
+ # Remove entire type directory if it doesn't exist in nao_config
129
+ if type_folder_name not in valid_db_folders_by_type:
130
+ shutil.rmtree(type_dir)
131
+ if verbose:
132
+ console.print(f"\n[yellow] Removed unused database type:[/yellow] {type_dir}")
133
+ continue
134
+
135
+ valid_db_folders = valid_db_folders_by_type[type_folder_name]
136
+
137
+ # Remove unused database folders if it doesn't exist in nao_config
138
+ for db_dir in type_dir.iterdir():
139
+ if not db_dir.is_dir():
140
+ continue
141
+
142
+ if db_dir.name not in valid_db_folders:
143
+ shutil.rmtree(db_dir)
144
+ if verbose:
145
+ console.print(f"\n[yellow] Removed unused database:[/yellow] {type_folder_name}/{db_dir.name}")
146
+
147
+
148
+ def cleanup_stale_repos(config_repos: list, base_path: Path, verbose: bool = False) -> None:
149
+ """Remove repositories that are not present in the config file."""
150
+
151
+ repo_names = {repo.name for repo in config_repos}
152
+ for repo_dir in base_path.iterdir():
153
+ if repo_dir.is_dir() and repo_dir.name not in repo_names:
154
+ shutil.rmtree(repo_dir)
155
+ if verbose:
156
+ console.print(f"\n[yellow] Removed unused repo:[/yellow] {repo_dir.name}")
@@ -0,0 +1,32 @@
1
+ """Sync providers for different resource types."""
2
+
3
+ from .base import SyncProvider, SyncResult
4
+ from .databases.provider import DatabaseSyncProvider
5
+ from .notion.provider import NotionSyncProvider
6
+ from .repositories.provider import RepositorySyncProvider
7
+
8
+ # Default providers in order of execution
9
+ DEFAULT_PROVIDERS: list[SyncProvider] = [
10
+ NotionSyncProvider(),
11
+ RepositorySyncProvider(),
12
+ DatabaseSyncProvider(),
13
+ ]
14
+
15
+
16
+ def get_all_providers() -> list[SyncProvider]:
17
+ """Get all registered sync providers.
18
+
19
+ Returns:
20
+ List of sync provider instances
21
+ """
22
+ return DEFAULT_PROVIDERS.copy()
23
+
24
+
25
+ __all__ = [
26
+ "SyncProvider",
27
+ "SyncResult",
28
+ "DatabaseSyncProvider",
29
+ "RepositorySyncProvider",
30
+ "DEFAULT_PROVIDERS",
31
+ "get_all_providers",
32
+ ]
@@ -0,0 +1,113 @@
1
+ """Base class for sync providers."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from nao_core.config import NaoConfig
9
+
10
+
11
+ @dataclass
12
+ class SyncResult:
13
+ """Result of a sync operation."""
14
+
15
+ provider_name: str
16
+ items_synced: int
17
+ details: dict[str, Any] | None = None
18
+ summary: str | None = None
19
+ error: str | None = None
20
+
21
+ @property
22
+ def success(self) -> bool:
23
+ """Check if the sync was successful."""
24
+ return self.error is None
25
+
26
+ def get_summary(self) -> str:
27
+ """Get a human-readable summary of the sync result."""
28
+ if self.error:
29
+ return f"failed: {self.error}"
30
+ if self.summary:
31
+ return self.summary
32
+ return f"{self.items_synced} synced"
33
+
34
+ @classmethod
35
+ def from_error(cls, provider_name: str, error: Exception) -> "SyncResult":
36
+ """Create a SyncResult from an exception."""
37
+ return cls(
38
+ provider_name=provider_name,
39
+ items_synced=0,
40
+ error=str(error),
41
+ )
42
+
43
+
44
+ class SyncProvider(ABC):
45
+ """Abstract base class for sync providers.
46
+
47
+ A sync provider is responsible for synchronizing a specific type of resource
48
+ (e.g., repositories, databases) from the nao configuration to local files.
49
+ """
50
+
51
+ @property
52
+ @abstractmethod
53
+ def name(self) -> str:
54
+ """Human-readable name for this provider (e.g., 'Repositories', 'Databases')."""
55
+ ...
56
+
57
+ @property
58
+ @abstractmethod
59
+ def emoji(self) -> str:
60
+ """Emoji icon for this provider."""
61
+ ...
62
+
63
+ @property
64
+ @abstractmethod
65
+ def default_output_dir(self) -> str:
66
+ """Default output directory for this provider."""
67
+ ...
68
+
69
+ @abstractmethod
70
+ def get_items(self, config: NaoConfig) -> list[Any]:
71
+ """Extract items to sync from the configuration.
72
+
73
+ Args:
74
+ config: The nao configuration
75
+
76
+ Returns:
77
+ List of items to sync (e.g., repo configs, database configs)
78
+ """
79
+ ...
80
+
81
+ @abstractmethod
82
+ def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
83
+ """Sync the items to the output path.
84
+
85
+ Args:
86
+ items: List of items to sync
87
+ output_path: Path where synced data should be written
88
+ project_path: Path to the nao project root (for template resolution)
89
+
90
+ Returns:
91
+ SyncResult with statistics about what was synced
92
+ """
93
+ ...
94
+
95
+ def should_sync(self, config: NaoConfig) -> bool:
96
+ """Check if this provider has items to sync.
97
+
98
+ Args:
99
+ config: The nao configuration
100
+
101
+ Returns:
102
+ True if there are items to sync
103
+ """
104
+ return len(self.get_items(config)) > 0
105
+
106
+ def pre_sync(self, config: NaoConfig, output_path: Path) -> None:
107
+ """For preparation before sync.
108
+
109
+ Args:
110
+ config: The loaded nao configuration.
111
+ output_path: Base directory where the preparation should be applied.
112
+ """
113
+ pass
@@ -0,0 +1,17 @@
1
+ """Database syncing functionality for generating markdown documentation from database schemas."""
2
+
3
+ from .bigquery import sync_bigquery
4
+ from .databricks import sync_databricks
5
+ from .duckdb import sync_duckdb
6
+ from .postgres import sync_postgres
7
+ from .provider import DatabaseSyncProvider
8
+ from .snowflake import sync_snowflake
9
+
10
+ __all__ = [
11
+ "DatabaseSyncProvider",
12
+ "sync_bigquery",
13
+ "sync_databricks",
14
+ "sync_duckdb",
15
+ "sync_postgres",
16
+ "sync_snowflake",
17
+ ]
@@ -0,0 +1,79 @@
1
+ from pathlib import Path
2
+
3
+ from rich.progress import Progress
4
+
5
+ from nao_core.commands.sync.accessors import DataAccessor
6
+ from nao_core.commands.sync.cleanup import DatabaseSyncState
7
+
8
+
9
+ def sync_bigquery(
10
+ db_config,
11
+ base_path: Path,
12
+ progress: Progress,
13
+ accessors: list[DataAccessor],
14
+ ) -> DatabaseSyncState:
15
+ """Sync BigQuery database schema to markdown files.
16
+
17
+ Args:
18
+ db_config: The database configuration
19
+ base_path: Base output path
20
+ progress: Rich progress instance
21
+ accessors: List of data accessors to run
22
+
23
+ Returns:
24
+ DatabaseSyncState with sync results and tracked paths
25
+ """
26
+ conn = db_config.connect()
27
+ db_name = db_config.get_database_name()
28
+ db_path = base_path / "type=bigquery" / f"database={db_name}"
29
+ state = DatabaseSyncState(db_path=db_path)
30
+
31
+ if db_config.dataset_id:
32
+ datasets = [db_config.dataset_id]
33
+ else:
34
+ datasets = conn.list_databases()
35
+
36
+ dataset_task = progress.add_task(
37
+ f"[dim]{db_config.name}[/dim]",
38
+ total=len(datasets),
39
+ )
40
+
41
+ for dataset in datasets:
42
+ try:
43
+ all_tables = conn.list_tables(database=dataset)
44
+ except Exception:
45
+ progress.update(dataset_task, advance=1)
46
+ continue
47
+
48
+ # Filter tables based on include/exclude patterns
49
+ tables = [t for t in all_tables if db_config.matches_pattern(dataset, t)]
50
+
51
+ # Skip dataset if no tables match
52
+ if not tables:
53
+ progress.update(dataset_task, advance=1)
54
+ continue
55
+
56
+ dataset_path = db_path / f"schema={dataset}"
57
+ dataset_path.mkdir(parents=True, exist_ok=True)
58
+ state.add_schema(dataset)
59
+
60
+ table_task = progress.add_task(
61
+ f" [cyan]{dataset}[/cyan]",
62
+ total=len(tables),
63
+ )
64
+
65
+ for table in tables:
66
+ table_path = dataset_path / f"table={table}"
67
+ table_path.mkdir(parents=True, exist_ok=True)
68
+
69
+ for accessor in accessors:
70
+ content = accessor.generate(conn, dataset, table)
71
+ output_file = table_path / accessor.filename
72
+ output_file.write_text(content)
73
+
74
+ state.add_table(dataset, table)
75
+ progress.update(table_task, advance=1)
76
+
77
+ progress.update(dataset_task, advance=1)
78
+
79
+ return state