nao-core 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nao_core/__init__.py +1 -1
- nao_core/bin/fastapi/main.py +21 -2
- nao_core/bin/fastapi/test_main.py +156 -0
- nao_core/bin/migrations-postgres/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
- nao_core/bin/migrations-postgres/meta/0004_snapshot.json +847 -0
- nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
- nao_core/bin/migrations-postgres/meta/_journal.json +14 -0
- nao_core/bin/migrations-sqlite/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
- nao_core/bin/migrations-sqlite/meta/0004_snapshot.json +819 -0
- nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
- nao_core/bin/migrations-sqlite/meta/_journal.json +14 -0
- nao_core/bin/nao-chat-server +0 -0
- nao_core/bin/public/assets/{code-block-F6WJLWQG-EQr6mTlQ.js → code-block-F6WJLWQG-TAi8koem.js} +3 -3
- nao_core/bin/public/assets/index-BfHcd9Xz.css +1 -0
- nao_core/bin/public/assets/index-Mzo9bkag.js +557 -0
- nao_core/bin/public/index.html +2 -2
- nao_core/commands/chat.py +11 -10
- nao_core/commands/debug.py +10 -6
- nao_core/commands/init.py +66 -27
- nao_core/commands/sync/__init__.py +40 -21
- nao_core/commands/sync/accessors.py +219 -141
- nao_core/commands/sync/cleanup.py +133 -0
- nao_core/commands/sync/providers/__init__.py +30 -0
- nao_core/commands/sync/providers/base.py +87 -0
- nao_core/commands/sync/providers/databases/__init__.py +17 -0
- nao_core/commands/sync/providers/databases/bigquery.py +78 -0
- nao_core/commands/sync/providers/databases/databricks.py +79 -0
- nao_core/commands/sync/providers/databases/duckdb.py +83 -0
- nao_core/commands/sync/providers/databases/postgres.py +78 -0
- nao_core/commands/sync/providers/databases/provider.py +123 -0
- nao_core/commands/sync/providers/databases/snowflake.py +78 -0
- nao_core/commands/sync/providers/repositories/__init__.py +5 -0
- nao_core/commands/sync/{repositories.py → providers/repositories/provider.py} +43 -20
- nao_core/config/__init__.py +16 -1
- nao_core/config/base.py +23 -4
- nao_core/config/databases/__init__.py +37 -11
- nao_core/config/databases/base.py +7 -0
- nao_core/config/databases/bigquery.py +29 -1
- nao_core/config/databases/databricks.py +69 -0
- nao_core/config/databases/duckdb.py +33 -0
- nao_core/config/databases/postgres.py +78 -0
- nao_core/config/databases/snowflake.py +115 -0
- nao_core/config/exceptions.py +7 -0
- nao_core/templates/__init__.py +12 -0
- nao_core/templates/defaults/databases/columns.md.j2 +23 -0
- nao_core/templates/defaults/databases/description.md.j2 +32 -0
- nao_core/templates/defaults/databases/preview.md.j2 +22 -0
- nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
- nao_core/templates/engine.py +133 -0
- {nao_core-0.0.29.dist-info → nao_core-0.0.31.dist-info}/METADATA +9 -4
- nao_core-0.0.31.dist-info/RECORD +86 -0
- nao_core/bin/public/assets/_chat-layout-BTlqRUE5.js +0 -1
- nao_core/bin/public/assets/_chat-layout.index-DOARokp1.js +0 -1
- nao_core/bin/public/assets/agentProvider-C6dGIy-H.js +0 -1
- nao_core/bin/public/assets/button-By_1dzVx.js +0 -1
- nao_core/bin/public/assets/folder-DnRS5rg3.js +0 -1
- nao_core/bin/public/assets/index-CElAN2SH.css +0 -1
- nao_core/bin/public/assets/index-ZTHASguQ.js +0 -59
- nao_core/bin/public/assets/input-CUQA5tsi.js +0 -1
- nao_core/bin/public/assets/login-BUQDum3t.js +0 -1
- nao_core/bin/public/assets/mermaid-FSSLJTFX-Dc6ZvCPw.js +0 -427
- nao_core/bin/public/assets/sidebar-bgEk7Xg8.js +0 -1
- nao_core/bin/public/assets/signinForm-CGAhnAkv.js +0 -1
- nao_core/bin/public/assets/signup-D2n11La3.js +0 -1
- nao_core/bin/public/assets/user-CYl8Tly2.js +0 -1
- nao_core/bin/public/assets/utils-DzJYey0s.js +0 -1
- nao_core/commands/sync/databases.py +0 -132
- nao_core-0.0.29.dist-info/RECORD +0 -69
- {nao_core-0.0.29.dist-info → nao_core-0.0.31.dist-info}/WHEEL +0 -0
- {nao_core-0.0.29.dist-info → nao_core-0.0.31.dist-info}/entry_points.txt +0 -0
- {nao_core-0.0.29.dist-info → nao_core-0.0.31.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,13 +1,28 @@
|
|
|
1
1
|
"""Data accessor classes for generating markdown documentation from database tables."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
7
|
from ibis import BaseBackend
|
|
7
8
|
|
|
9
|
+
from nao_core.templates import get_template_engine
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class DataAccessor(ABC):
|
|
10
|
-
"""Base class for data accessors that generate markdown files for tables.
|
|
13
|
+
"""Base class for data accessors that generate markdown files for tables.
|
|
14
|
+
|
|
15
|
+
Accessors use Jinja2 templates for generating output. Default templates
|
|
16
|
+
are shipped with nao and can be overridden by users by placing templates
|
|
17
|
+
with the same name in their project's `templates/` directory.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
To override the preview template, create:
|
|
21
|
+
`<project_root>/templates/databases/preview.md.j2`
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# Path to the nao project root (set by sync provider)
|
|
25
|
+
_project_path: Path | None = None
|
|
11
26
|
|
|
12
27
|
@property
|
|
13
28
|
@abstractmethod
|
|
@@ -15,24 +30,56 @@ class DataAccessor(ABC):
|
|
|
15
30
|
"""The filename this accessor writes to (e.g., 'columns.md')."""
|
|
16
31
|
...
|
|
17
32
|
|
|
33
|
+
@property
|
|
18
34
|
@abstractmethod
|
|
19
|
-
def
|
|
20
|
-
"""
|
|
35
|
+
def template_name(self) -> str:
|
|
36
|
+
"""The template file to use (e.g., 'databases/columns.md.j2')."""
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
41
|
+
"""Get the template context for rendering.
|
|
21
42
|
|
|
22
43
|
Args:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
44
|
+
conn: The Ibis database connection
|
|
45
|
+
dataset: The dataset/schema name
|
|
46
|
+
table: The table name
|
|
26
47
|
|
|
27
48
|
Returns:
|
|
28
|
-
|
|
49
|
+
Dictionary of variables to pass to the template
|
|
29
50
|
"""
|
|
30
51
|
...
|
|
31
52
|
|
|
53
|
+
def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
|
|
54
|
+
"""Generate the markdown content for a table using templates.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
conn: The Ibis database connection
|
|
58
|
+
dataset: The dataset/schema name
|
|
59
|
+
table: The table name
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Markdown string content
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
context = self.get_context(conn, dataset, table)
|
|
66
|
+
engine = get_template_engine(self._project_path)
|
|
67
|
+
return engine.render(self.template_name, **context)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return f"# {table}\n\nError generating content: {e}"
|
|
70
|
+
|
|
32
71
|
def get_table(self, conn: BaseBackend, dataset: str, table: str):
|
|
33
72
|
"""Helper to get an Ibis table reference."""
|
|
34
|
-
|
|
35
|
-
|
|
73
|
+
return conn.table(table, database=dataset)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def set_project_path(cls, path: Path | None) -> None:
|
|
77
|
+
"""Set the project path for template resolution.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
path: Path to the nao project root
|
|
81
|
+
"""
|
|
82
|
+
cls._project_path = path
|
|
36
83
|
|
|
37
84
|
|
|
38
85
|
def truncate_middle(text: str, max_length: int) -> str:
|
|
@@ -44,7 +91,14 @@ def truncate_middle(text: str, max_length: int) -> str:
|
|
|
44
91
|
|
|
45
92
|
|
|
46
93
|
class ColumnsAccessor(DataAccessor):
|
|
47
|
-
"""Generates columns.md with column names, types, and nullable info.
|
|
94
|
+
"""Generates columns.md with column names, types, and nullable info.
|
|
95
|
+
|
|
96
|
+
Template variables:
|
|
97
|
+
- table_name: Name of the table
|
|
98
|
+
- dataset: Schema/dataset name
|
|
99
|
+
- columns: List of dicts with 'name', 'type', 'nullable', 'description'
|
|
100
|
+
- column_count: Total number of columns
|
|
101
|
+
"""
|
|
48
102
|
|
|
49
103
|
def __init__(self, max_description_length: int = 256):
|
|
50
104
|
self.max_description_length = max_description_length
|
|
@@ -53,37 +107,43 @@ class ColumnsAccessor(DataAccessor):
|
|
|
53
107
|
def filename(self) -> str:
|
|
54
108
|
return "columns.md"
|
|
55
109
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
except Exception as e:
|
|
82
|
-
return f"# {table}\n\nError fetching schema: {e}"
|
|
110
|
+
@property
|
|
111
|
+
def template_name(self) -> str:
|
|
112
|
+
return "databases/columns.md.j2"
|
|
113
|
+
|
|
114
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
115
|
+
t = self.get_table(conn, dataset, table)
|
|
116
|
+
schema = t.schema()
|
|
117
|
+
|
|
118
|
+
columns = []
|
|
119
|
+
for name, dtype in schema.items():
|
|
120
|
+
columns.append(
|
|
121
|
+
{
|
|
122
|
+
"name": name,
|
|
123
|
+
"type": str(dtype),
|
|
124
|
+
"nullable": dtype.nullable if hasattr(dtype, "nullable") else True,
|
|
125
|
+
"description": None, # Could be populated from metadata
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
"table_name": table,
|
|
131
|
+
"dataset": dataset,
|
|
132
|
+
"columns": columns,
|
|
133
|
+
"column_count": len(columns),
|
|
134
|
+
}
|
|
83
135
|
|
|
84
136
|
|
|
85
137
|
class PreviewAccessor(DataAccessor):
|
|
86
|
-
"""Generates preview.md with the first N rows of data as JSONL.
|
|
138
|
+
"""Generates preview.md with the first N rows of data as JSONL.
|
|
139
|
+
|
|
140
|
+
Template variables:
|
|
141
|
+
- table_name: Name of the table
|
|
142
|
+
- dataset: Schema/dataset name
|
|
143
|
+
- rows: List of row dictionaries
|
|
144
|
+
- row_count: Number of preview rows
|
|
145
|
+
- columns: List of column info dicts
|
|
146
|
+
"""
|
|
87
147
|
|
|
88
148
|
def __init__(self, num_rows: int = 10):
|
|
89
149
|
self.num_rows = num_rows
|
|
@@ -92,121 +152,139 @@ class PreviewAccessor(DataAccessor):
|
|
|
92
152
|
def filename(self) -> str:
|
|
93
153
|
return "preview.md"
|
|
94
154
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
155
|
+
@property
|
|
156
|
+
def template_name(self) -> str:
|
|
157
|
+
return "databases/preview.md.j2"
|
|
158
|
+
|
|
159
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
160
|
+
t = self.get_table(conn, dataset, table)
|
|
161
|
+
schema = t.schema()
|
|
162
|
+
preview_df = t.limit(self.num_rows).execute()
|
|
163
|
+
|
|
164
|
+
rows = []
|
|
165
|
+
for _, row in preview_df.iterrows():
|
|
166
|
+
row_dict = row.to_dict()
|
|
167
|
+
# Convert non-serializable types to strings
|
|
168
|
+
for key, val in row_dict.items():
|
|
169
|
+
if val is not None and not isinstance(val, (str, int, float, bool, list, dict)):
|
|
170
|
+
row_dict[key] = str(val)
|
|
171
|
+
rows.append(row_dict)
|
|
172
|
+
|
|
173
|
+
columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"table_name": table,
|
|
177
|
+
"dataset": dataset,
|
|
178
|
+
"rows": rows,
|
|
179
|
+
"row_count": len(rows),
|
|
180
|
+
"columns": columns,
|
|
181
|
+
}
|
|
120
182
|
|
|
121
183
|
|
|
122
184
|
class DescriptionAccessor(DataAccessor):
|
|
123
|
-
"""Generates description.md with table metadata (row count, column count, etc.).
|
|
185
|
+
"""Generates description.md with table metadata (row count, column count, etc.).
|
|
186
|
+
|
|
187
|
+
Template variables:
|
|
188
|
+
- table_name: Name of the table
|
|
189
|
+
- dataset: Schema/dataset name
|
|
190
|
+
- row_count: Total rows in the table
|
|
191
|
+
- column_count: Number of columns
|
|
192
|
+
- description: Table description (if available)
|
|
193
|
+
- columns: List of column info dicts
|
|
194
|
+
"""
|
|
124
195
|
|
|
125
196
|
@property
|
|
126
197
|
def filename(self) -> str:
|
|
127
198
|
return "description.md"
|
|
128
199
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
"",
|
|
149
|
-
"## Description",
|
|
150
|
-
"",
|
|
151
|
-
"_No description available._",
|
|
152
|
-
"",
|
|
153
|
-
]
|
|
154
|
-
|
|
155
|
-
return "\n".join(lines)
|
|
156
|
-
except Exception as e:
|
|
157
|
-
return f"# {table}\n\nError fetching description: {e}"
|
|
200
|
+
@property
|
|
201
|
+
def template_name(self) -> str:
|
|
202
|
+
return "databases/description.md.j2"
|
|
203
|
+
|
|
204
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
205
|
+
t = self.get_table(conn, dataset, table)
|
|
206
|
+
schema = t.schema()
|
|
207
|
+
|
|
208
|
+
row_count = t.count().execute()
|
|
209
|
+
columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
"table_name": table,
|
|
213
|
+
"dataset": dataset,
|
|
214
|
+
"row_count": row_count,
|
|
215
|
+
"column_count": len(schema),
|
|
216
|
+
"description": None, # Could be populated from metadata
|
|
217
|
+
"columns": columns,
|
|
218
|
+
}
|
|
158
219
|
|
|
159
220
|
|
|
160
221
|
class ProfilingAccessor(DataAccessor):
|
|
161
|
-
"""Generates profiling.md with column statistics and data profiling.
|
|
222
|
+
"""Generates profiling.md with column statistics and data profiling.
|
|
223
|
+
|
|
224
|
+
Template variables:
|
|
225
|
+
- table_name: Name of the table
|
|
226
|
+
- dataset: Schema/dataset name
|
|
227
|
+
- column_stats: List of dicts with stats for each column:
|
|
228
|
+
- name: Column name
|
|
229
|
+
- type: Data type
|
|
230
|
+
- null_count: Number of nulls
|
|
231
|
+
- unique_count: Number of unique values
|
|
232
|
+
- min_value: Min value (numeric/temporal)
|
|
233
|
+
- max_value: Max value (numeric/temporal)
|
|
234
|
+
- error: Error message if stats couldn't be computed
|
|
235
|
+
- columns: List of column info dicts
|
|
236
|
+
"""
|
|
162
237
|
|
|
163
238
|
@property
|
|
164
239
|
def filename(self) -> str:
|
|
165
240
|
return "profiling.md"
|
|
166
241
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
dtype_str
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
242
|
+
@property
|
|
243
|
+
def template_name(self) -> str:
|
|
244
|
+
return "databases/profiling.md.j2"
|
|
245
|
+
|
|
246
|
+
def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
|
|
247
|
+
t = self.get_table(conn, dataset, table)
|
|
248
|
+
schema = t.schema()
|
|
249
|
+
|
|
250
|
+
column_stats = []
|
|
251
|
+
columns = []
|
|
252
|
+
|
|
253
|
+
for name, dtype in schema.items():
|
|
254
|
+
columns.append({"name": name, "type": str(dtype)})
|
|
255
|
+
col = t[name]
|
|
256
|
+
dtype_str = str(dtype)
|
|
257
|
+
|
|
258
|
+
stat = {
|
|
259
|
+
"name": name,
|
|
260
|
+
"type": dtype_str,
|
|
261
|
+
"null_count": 0,
|
|
262
|
+
"unique_count": 0,
|
|
263
|
+
"min_value": None,
|
|
264
|
+
"max_value": None,
|
|
265
|
+
"error": None,
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
stat["null_count"] = t.filter(col.isnull()).count().execute()
|
|
270
|
+
stat["unique_count"] = col.nunique().execute()
|
|
271
|
+
|
|
272
|
+
if dtype.is_numeric() or dtype.is_temporal():
|
|
273
|
+
try:
|
|
274
|
+
min_val = str(col.min().execute())
|
|
275
|
+
max_val = str(col.max().execute())
|
|
276
|
+
stat["min_value"] = truncate_middle(min_val, 20)
|
|
277
|
+
stat["max_value"] = truncate_middle(max_val, 20)
|
|
278
|
+
except Exception:
|
|
279
|
+
pass
|
|
280
|
+
except Exception as col_error:
|
|
281
|
+
stat["error"] = str(col_error)
|
|
282
|
+
|
|
283
|
+
column_stats.append(stat)
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
"table_name": table,
|
|
287
|
+
"dataset": dataset,
|
|
288
|
+
"column_stats": column_stats,
|
|
289
|
+
"columns": columns,
|
|
290
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Cleanup utilities for removing stale sync files."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DatabaseSyncState:
|
|
14
|
+
"""Tracks the state of a database sync operation.
|
|
15
|
+
|
|
16
|
+
Used to track which paths were synced so stale paths can be cleaned up.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
db_path: Path
|
|
20
|
+
"""The root path for this database (e.g., databases/type=duckdb/database=mydb)"""
|
|
21
|
+
|
|
22
|
+
synced_schemas: set[str] = field(default_factory=set)
|
|
23
|
+
"""Set of schema names that were synced"""
|
|
24
|
+
|
|
25
|
+
synced_tables: dict[str, set[str]] = field(default_factory=dict)
|
|
26
|
+
"""Dict mapping schema names to sets of table names that were synced"""
|
|
27
|
+
|
|
28
|
+
schemas_synced: int = 0
|
|
29
|
+
"""Count of schemas synced"""
|
|
30
|
+
|
|
31
|
+
tables_synced: int = 0
|
|
32
|
+
"""Count of tables synced"""
|
|
33
|
+
|
|
34
|
+
def add_table(self, schema: str, table: str) -> None:
|
|
35
|
+
"""Record that a table was synced.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
schema: The schema/dataset name
|
|
39
|
+
table: The table name
|
|
40
|
+
"""
|
|
41
|
+
self.synced_schemas.add(schema)
|
|
42
|
+
if schema not in self.synced_tables:
|
|
43
|
+
self.synced_tables[schema] = set()
|
|
44
|
+
self.synced_tables[schema].add(table)
|
|
45
|
+
self.tables_synced += 1
|
|
46
|
+
|
|
47
|
+
def add_schema(self, schema: str) -> None:
|
|
48
|
+
"""Record that a schema was synced (even if empty).
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
schema: The schema/dataset name
|
|
52
|
+
"""
|
|
53
|
+
self.synced_schemas.add(schema)
|
|
54
|
+
self.schemas_synced += 1
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def cleanup_stale_paths(state: DatabaseSyncState, verbose: bool = False) -> int:
|
|
58
|
+
"""Remove directories that exist on disk but weren't synced.
|
|
59
|
+
|
|
60
|
+
This function cleans up:
|
|
61
|
+
- Table directories that no longer exist in the source
|
|
62
|
+
- Schema directories that no longer exist or have no tables
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
state: The sync state tracking what was synced
|
|
66
|
+
verbose: Whether to print cleanup messages
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Number of stale paths removed
|
|
70
|
+
"""
|
|
71
|
+
removed_count = 0
|
|
72
|
+
|
|
73
|
+
if not state.db_path.exists():
|
|
74
|
+
return 0
|
|
75
|
+
|
|
76
|
+
# Find all existing schema directories
|
|
77
|
+
existing_schemas = {
|
|
78
|
+
d.name.replace("schema=", ""): d for d in state.db_path.iterdir() if d.is_dir() and d.name.startswith("schema=")
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Remove schemas that weren't synced
|
|
82
|
+
for schema_name, schema_path in existing_schemas.items():
|
|
83
|
+
if schema_name not in state.synced_schemas:
|
|
84
|
+
if verbose:
|
|
85
|
+
console.print(f" [dim red]removing stale schema:[/dim red] {schema_name}")
|
|
86
|
+
shutil.rmtree(schema_path)
|
|
87
|
+
removed_count += 1
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
# Find existing tables in this schema
|
|
91
|
+
existing_tables = {
|
|
92
|
+
d.name.replace("table=", ""): d for d in schema_path.iterdir() if d.is_dir() and d.name.startswith("table=")
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
synced_tables_for_schema = state.synced_tables.get(schema_name, set())
|
|
96
|
+
|
|
97
|
+
# Remove tables that weren't synced
|
|
98
|
+
for table_name, table_path in existing_tables.items():
|
|
99
|
+
if table_name not in synced_tables_for_schema:
|
|
100
|
+
if verbose:
|
|
101
|
+
console.print(f" [dim red]removing stale table:[/dim red] {schema_name}.{table_name}")
|
|
102
|
+
shutil.rmtree(table_path)
|
|
103
|
+
removed_count += 1
|
|
104
|
+
|
|
105
|
+
return removed_count
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def cleanup_stale_database_types(base_path: Path, active_db_types: set[str], verbose: bool = False) -> int:
|
|
109
|
+
"""Remove database type directories that are no longer configured.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
base_path: The base databases output path
|
|
113
|
+
active_db_types: Set of database type directory names that should exist
|
|
114
|
+
(e.g., {'type=duckdb', 'type=postgres'})
|
|
115
|
+
verbose: Whether to print cleanup messages
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Number of stale database type directories removed
|
|
119
|
+
"""
|
|
120
|
+
removed_count = 0
|
|
121
|
+
|
|
122
|
+
if not base_path.exists():
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
for db_type_dir in base_path.iterdir():
|
|
126
|
+
if db_type_dir.is_dir() and db_type_dir.name.startswith("type="):
|
|
127
|
+
if db_type_dir.name not in active_db_types:
|
|
128
|
+
if verbose:
|
|
129
|
+
console.print(f" [dim red]removing stale database type:[/dim red] {db_type_dir.name}")
|
|
130
|
+
shutil.rmtree(db_type_dir)
|
|
131
|
+
removed_count += 1
|
|
132
|
+
|
|
133
|
+
return removed_count
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Sync providers for different resource types."""
|
|
2
|
+
|
|
3
|
+
from .base import SyncProvider, SyncResult
|
|
4
|
+
from .databases.provider import DatabaseSyncProvider
|
|
5
|
+
from .repositories.provider import RepositorySyncProvider
|
|
6
|
+
|
|
7
|
+
# Default providers in order of execution
|
|
8
|
+
DEFAULT_PROVIDERS: list[SyncProvider] = [
|
|
9
|
+
RepositorySyncProvider(),
|
|
10
|
+
DatabaseSyncProvider(),
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_all_providers() -> list[SyncProvider]:
|
|
15
|
+
"""Get all registered sync providers.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
List of sync provider instances
|
|
19
|
+
"""
|
|
20
|
+
return DEFAULT_PROVIDERS.copy()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"SyncProvider",
|
|
25
|
+
"SyncResult",
|
|
26
|
+
"DatabaseSyncProvider",
|
|
27
|
+
"RepositorySyncProvider",
|
|
28
|
+
"DEFAULT_PROVIDERS",
|
|
29
|
+
"get_all_providers",
|
|
30
|
+
]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Base class for sync providers."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from nao_core.config import NaoConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class SyncResult:
|
|
13
|
+
"""Result of a sync operation."""
|
|
14
|
+
|
|
15
|
+
provider_name: str
|
|
16
|
+
items_synced: int
|
|
17
|
+
details: dict[str, Any] | None = None
|
|
18
|
+
summary: str | None = None
|
|
19
|
+
|
|
20
|
+
def get_summary(self) -> str:
|
|
21
|
+
"""Get a human-readable summary of the sync result."""
|
|
22
|
+
if self.summary:
|
|
23
|
+
return self.summary
|
|
24
|
+
return f"{self.items_synced} synced"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SyncProvider(ABC):
|
|
28
|
+
"""Abstract base class for sync providers.
|
|
29
|
+
|
|
30
|
+
A sync provider is responsible for synchronizing a specific type of resource
|
|
31
|
+
(e.g., repositories, databases) from the nao configuration to local files.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def name(self) -> str:
|
|
37
|
+
"""Human-readable name for this provider (e.g., 'Repositories', 'Databases')."""
|
|
38
|
+
...
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def emoji(self) -> str:
|
|
43
|
+
"""Emoji icon for this provider."""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def default_output_dir(self) -> str:
|
|
49
|
+
"""Default output directory for this provider."""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def get_items(self, config: NaoConfig) -> list[Any]:
|
|
54
|
+
"""Extract items to sync from the configuration.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
config: The nao configuration
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of items to sync (e.g., repo configs, database configs)
|
|
61
|
+
"""
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
|
|
66
|
+
"""Sync the items to the output path.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
items: List of items to sync
|
|
70
|
+
output_path: Path where synced data should be written
|
|
71
|
+
project_path: Path to the nao project root (for template resolution)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
SyncResult with statistics about what was synced
|
|
75
|
+
"""
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
def should_sync(self, config: NaoConfig) -> bool:
|
|
79
|
+
"""Check if this provider has items to sync.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
config: The nao configuration
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
True if there are items to sync
|
|
86
|
+
"""
|
|
87
|
+
return len(self.get_items(config)) > 0
|