datalex-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalex_cli/__init__.py +1 -0
- datalex_cli/datalex_cli.py +658 -0
- datalex_cli/main.py +2925 -0
- datalex_cli-0.1.1.dist-info/METADATA +228 -0
- datalex_cli-0.1.1.dist-info/RECORD +64 -0
- datalex_cli-0.1.1.dist-info/WHEEL +5 -0
- datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
- datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
- datalex_core/__init__.py +94 -0
- datalex_core/_schemas/datalex/common.schema.json +127 -0
- datalex_core/_schemas/datalex/domain.schema.json +24 -0
- datalex_core/_schemas/datalex/entity.schema.json +158 -0
- datalex_core/_schemas/datalex/model.schema.json +141 -0
- datalex_core/_schemas/datalex/policy.schema.json +70 -0
- datalex_core/_schemas/datalex/project.schema.json +82 -0
- datalex_core/_schemas/datalex/snippet.schema.json +24 -0
- datalex_core/_schemas/datalex/source.schema.json +104 -0
- datalex_core/_schemas/datalex/term.schema.json +30 -0
- datalex_core/canonical.py +166 -0
- datalex_core/completion.py +204 -0
- datalex_core/connectors/__init__.py +39 -0
- datalex_core/connectors/base.py +417 -0
- datalex_core/connectors/bigquery.py +229 -0
- datalex_core/connectors/databricks.py +262 -0
- datalex_core/connectors/mysql.py +266 -0
- datalex_core/connectors/postgres.py +309 -0
- datalex_core/connectors/redshift.py +298 -0
- datalex_core/connectors/snowflake.py +336 -0
- datalex_core/connectors/sqlserver.py +425 -0
- datalex_core/datalex/__init__.py +26 -0
- datalex_core/datalex/diff.py +188 -0
- datalex_core/datalex/errors.py +85 -0
- datalex_core/datalex/loader.py +512 -0
- datalex_core/datalex/migrate_layout.py +382 -0
- datalex_core/datalex/parse_cache.py +102 -0
- datalex_core/datalex/project.py +214 -0
- datalex_core/datalex/types.py +224 -0
- datalex_core/dbt/__init__.py +18 -0
- datalex_core/dbt/emit.py +344 -0
- datalex_core/dbt/manifest.py +329 -0
- datalex_core/dbt/profiles.py +185 -0
- datalex_core/dbt/sync.py +279 -0
- datalex_core/dbt/warehouse.py +215 -0
- datalex_core/dialects/__init__.py +15 -0
- datalex_core/dialects/_common.py +48 -0
- datalex_core/dialects/base.py +47 -0
- datalex_core/dialects/postgres.py +164 -0
- datalex_core/dialects/registry.py +36 -0
- datalex_core/dialects/snowflake.py +129 -0
- datalex_core/diffing.py +358 -0
- datalex_core/docs_generator.py +797 -0
- datalex_core/doctor.py +181 -0
- datalex_core/generators.py +478 -0
- datalex_core/importers.py +1176 -0
- datalex_core/issues.py +23 -0
- datalex_core/loader.py +21 -0
- datalex_core/migrate.py +316 -0
- datalex_core/modeling.py +679 -0
- datalex_core/packages.py +430 -0
- datalex_core/policy.py +1037 -0
- datalex_core/resolver.py +456 -0
- datalex_core/schema.py +54 -0
- datalex_core/semantic.py +1561 -0
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
"""SQL Server-family connectors (SQL Server, Azure SQL, Microsoft Fabric Warehouse)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import date
|
|
6
|
+
from typing import Any, Dict, List, Tuple
|
|
7
|
+
|
|
8
|
+
from datalex_core.connectors.base import (
|
|
9
|
+
BaseConnector,
|
|
10
|
+
ConnectorConfig,
|
|
11
|
+
ConnectorResult,
|
|
12
|
+
infer_primary_keys,
|
|
13
|
+
infer_relationships,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_SQLSERVER_TYPE_MAP = {
|
|
18
|
+
"int": "integer",
|
|
19
|
+
"bigint": "bigint",
|
|
20
|
+
"smallint": "smallint",
|
|
21
|
+
"tinyint": "tinyint",
|
|
22
|
+
"bit": "boolean",
|
|
23
|
+
"decimal": "decimal",
|
|
24
|
+
"numeric": "decimal",
|
|
25
|
+
"money": "decimal",
|
|
26
|
+
"smallmoney": "decimal",
|
|
27
|
+
"float": "float",
|
|
28
|
+
"real": "float",
|
|
29
|
+
"char": "string",
|
|
30
|
+
"nchar": "string",
|
|
31
|
+
"varchar": "string",
|
|
32
|
+
"nvarchar": "string",
|
|
33
|
+
"text": "text",
|
|
34
|
+
"ntext": "text",
|
|
35
|
+
"date": "date",
|
|
36
|
+
"datetime": "timestamp",
|
|
37
|
+
"datetime2": "timestamp",
|
|
38
|
+
"smalldatetime": "timestamp",
|
|
39
|
+
"time": "time",
|
|
40
|
+
"datetimeoffset": "timestamp",
|
|
41
|
+
"uniqueidentifier": "uuid",
|
|
42
|
+
"binary": "binary",
|
|
43
|
+
"varbinary": "binary",
|
|
44
|
+
"image": "binary",
|
|
45
|
+
"xml": "string",
|
|
46
|
+
"sql_variant": "string",
|
|
47
|
+
"geography": "string",
|
|
48
|
+
"geometry": "string",
|
|
49
|
+
"hierarchyid": "string",
|
|
50
|
+
"json": "json",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class _SqlServerBaseConnector(BaseConnector):
|
|
55
|
+
required_package = "pyodbc"
|
|
56
|
+
default_port = 1433
|
|
57
|
+
default_schema = "dbo"
|
|
58
|
+
|
|
59
|
+
def _build_conn_string(self, config: ConnectorConfig) -> str:
|
|
60
|
+
server = config.host or "localhost"
|
|
61
|
+
port = config.port or self.default_port
|
|
62
|
+
if port:
|
|
63
|
+
server = f"{server},{port}"
|
|
64
|
+
|
|
65
|
+
driver = config.extra.get("odbc_driver", "ODBC Driver 18 for SQL Server")
|
|
66
|
+
database = config.database or "master"
|
|
67
|
+
encrypt = str(config.extra.get("encrypt", "yes"))
|
|
68
|
+
trust = str(config.extra.get("trust_server_certificate", "yes"))
|
|
69
|
+
|
|
70
|
+
parts = [
|
|
71
|
+
f"DRIVER={{{driver}}}",
|
|
72
|
+
f"SERVER={server}",
|
|
73
|
+
f"DATABASE={database}",
|
|
74
|
+
f"Encrypt={encrypt}",
|
|
75
|
+
f"TrustServerCertificate={trust}",
|
|
76
|
+
"Connection Timeout=10",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
if config.user:
|
|
80
|
+
parts.extend([
|
|
81
|
+
f"UID={config.user}",
|
|
82
|
+
f"PWD={config.password or ''}",
|
|
83
|
+
])
|
|
84
|
+
else:
|
|
85
|
+
parts.append("Trusted_Connection=yes")
|
|
86
|
+
|
|
87
|
+
return ";".join(parts)
|
|
88
|
+
|
|
89
|
+
def _map_type(self, data_type: str, char_max_len: Any, num_prec: Any, num_scale: Any) -> str:
|
|
90
|
+
base = (data_type or "").lower()
|
|
91
|
+
|
|
92
|
+
if base in ("decimal", "numeric") and num_prec:
|
|
93
|
+
return f"decimal({int(num_prec)},{int(num_scale or 0)})"
|
|
94
|
+
|
|
95
|
+
if base in ("varchar", "nvarchar", "char", "nchar"):
|
|
96
|
+
if char_max_len in (None, 0):
|
|
97
|
+
return _SQLSERVER_TYPE_MAP.get(base, "string")
|
|
98
|
+
try:
|
|
99
|
+
length = int(char_max_len)
|
|
100
|
+
except Exception:
|
|
101
|
+
return _SQLSERVER_TYPE_MAP.get(base, "string")
|
|
102
|
+
if length < 0:
|
|
103
|
+
return "text"
|
|
104
|
+
return f"{base}({length})"
|
|
105
|
+
|
|
106
|
+
return _SQLSERVER_TYPE_MAP.get(base, "string")
|
|
107
|
+
|
|
108
|
+
def _connect(self, config: ConnectorConfig):
|
|
109
|
+
import pyodbc
|
|
110
|
+
|
|
111
|
+
return pyodbc.connect(self._build_conn_string(config), autocommit=True)
|
|
112
|
+
|
|
113
|
+
def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
|
|
114
|
+
try:
|
|
115
|
+
conn = self._connect(config)
|
|
116
|
+
conn.close()
|
|
117
|
+
return True, "Connection successful"
|
|
118
|
+
except ImportError:
|
|
119
|
+
return False, "pyodbc not installed. Run: pip install pyodbc"
|
|
120
|
+
except Exception as e:
|
|
121
|
+
return False, f"Connection failed: {e}"
|
|
122
|
+
|
|
123
|
+
def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
124
|
+
conn = self._connect(config)
|
|
125
|
+
try:
|
|
126
|
+
cur = conn.cursor()
|
|
127
|
+
cur.execute(
|
|
128
|
+
"""
|
|
129
|
+
SELECT s.name AS schema_name,
|
|
130
|
+
(
|
|
131
|
+
SELECT COUNT(*)
|
|
132
|
+
FROM information_schema.tables t
|
|
133
|
+
WHERE t.table_schema = s.name
|
|
134
|
+
AND t.table_type IN ('BASE TABLE', 'VIEW')
|
|
135
|
+
) AS table_count
|
|
136
|
+
FROM sys.schemas s
|
|
137
|
+
WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA')
|
|
138
|
+
ORDER BY s.name
|
|
139
|
+
"""
|
|
140
|
+
)
|
|
141
|
+
return [{"name": row[0], "table_count": int(row[1] or 0)} for row in cur.fetchall()]
|
|
142
|
+
finally:
|
|
143
|
+
conn.close()
|
|
144
|
+
|
|
145
|
+
def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
|
|
146
|
+
schema = config.schema or self.default_schema
|
|
147
|
+
conn = self._connect(config)
|
|
148
|
+
try:
|
|
149
|
+
cur = conn.cursor()
|
|
150
|
+
cur.execute(
|
|
151
|
+
"""
|
|
152
|
+
SELECT t.table_name, t.table_type,
|
|
153
|
+
(
|
|
154
|
+
SELECT COUNT(*)
|
|
155
|
+
FROM information_schema.columns c
|
|
156
|
+
WHERE c.table_schema = t.table_schema
|
|
157
|
+
AND c.table_name = t.table_name
|
|
158
|
+
) AS col_count
|
|
159
|
+
FROM information_schema.tables t
|
|
160
|
+
WHERE t.table_schema = ?
|
|
161
|
+
AND t.table_type IN ('BASE TABLE', 'VIEW')
|
|
162
|
+
ORDER BY t.table_name
|
|
163
|
+
""",
|
|
164
|
+
(schema,),
|
|
165
|
+
)
|
|
166
|
+
results = []
|
|
167
|
+
for row in cur.fetchall():
|
|
168
|
+
ttype = "view" if "VIEW" in str(row[1]).upper() else "table"
|
|
169
|
+
results.append({
|
|
170
|
+
"name": row[0],
|
|
171
|
+
"type": ttype,
|
|
172
|
+
"column_count": int(row[2] or 0),
|
|
173
|
+
"row_count": None,
|
|
174
|
+
})
|
|
175
|
+
return results
|
|
176
|
+
finally:
|
|
177
|
+
conn.close()
|
|
178
|
+
|
|
179
|
+
def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
|
|
180
|
+
conn = self._connect(config)
|
|
181
|
+
try:
|
|
182
|
+
return self._pull(conn, config)
|
|
183
|
+
finally:
|
|
184
|
+
conn.close()
|
|
185
|
+
|
|
186
|
+
def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
|
|
187
|
+
model = self._build_model(config)
|
|
188
|
+
schema_filter = config.schema or self.default_schema
|
|
189
|
+
cur = conn.cursor()
|
|
190
|
+
warnings: List[str] = []
|
|
191
|
+
|
|
192
|
+
cur.execute(
|
|
193
|
+
"""
|
|
194
|
+
SELECT table_name, table_type
|
|
195
|
+
FROM information_schema.tables
|
|
196
|
+
WHERE table_schema = ?
|
|
197
|
+
AND table_type IN ('BASE TABLE', 'VIEW')
|
|
198
|
+
ORDER BY table_name
|
|
199
|
+
""",
|
|
200
|
+
(schema_filter,),
|
|
201
|
+
)
|
|
202
|
+
tables = cur.fetchall()
|
|
203
|
+
|
|
204
|
+
table_entities: Dict[str, Dict[str, Any]] = {}
|
|
205
|
+
for table_name, table_type in tables:
|
|
206
|
+
if not self._should_include_table(table_name, config):
|
|
207
|
+
continue
|
|
208
|
+
entity_name = self._entity_name(table_name)
|
|
209
|
+
entity_type = "view" if str(table_type).upper() == "VIEW" else "table"
|
|
210
|
+
table_entities[table_name] = {
|
|
211
|
+
"name": entity_name,
|
|
212
|
+
"physical_name": table_name,
|
|
213
|
+
"type": entity_type,
|
|
214
|
+
"description": f"Pulled from {self.display_name} {config.database}.{schema_filter}.{table_name} on {date.today().isoformat()}",
|
|
215
|
+
"fields": [],
|
|
216
|
+
}
|
|
217
|
+
if schema_filter != self.default_schema:
|
|
218
|
+
table_entities[table_name]["schema"] = schema_filter
|
|
219
|
+
|
|
220
|
+
cur.execute(
|
|
221
|
+
"""
|
|
222
|
+
SELECT table_name, column_name, data_type, is_nullable,
|
|
223
|
+
column_default, character_maximum_length,
|
|
224
|
+
numeric_precision, numeric_scale
|
|
225
|
+
FROM information_schema.columns
|
|
226
|
+
WHERE table_schema = ?
|
|
227
|
+
ORDER BY table_name, ordinal_position
|
|
228
|
+
""",
|
|
229
|
+
(schema_filter,),
|
|
230
|
+
)
|
|
231
|
+
columns = cur.fetchall()
|
|
232
|
+
total_columns = 0
|
|
233
|
+
|
|
234
|
+
for row in columns:
|
|
235
|
+
tname, col_name, data_type, is_nullable, col_default, char_max_len, num_prec, num_scale = row
|
|
236
|
+
if tname not in table_entities:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
dl_type = self._map_type(data_type, char_max_len, num_prec, num_scale)
|
|
240
|
+
field: Dict[str, Any] = {
|
|
241
|
+
"name": col_name,
|
|
242
|
+
"type": dl_type,
|
|
243
|
+
"nullable": str(is_nullable).upper() == "YES",
|
|
244
|
+
}
|
|
245
|
+
if col_default is not None:
|
|
246
|
+
cleaned = str(col_default).strip()
|
|
247
|
+
if cleaned:
|
|
248
|
+
field["default"] = cleaned
|
|
249
|
+
|
|
250
|
+
table_entities[tname]["fields"].append(field)
|
|
251
|
+
total_columns += 1
|
|
252
|
+
|
|
253
|
+
cur.execute(
|
|
254
|
+
"""
|
|
255
|
+
SELECT tc.table_name, kcu.column_name
|
|
256
|
+
FROM information_schema.table_constraints tc
|
|
257
|
+
JOIN information_schema.key_column_usage kcu
|
|
258
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
259
|
+
AND tc.table_schema = kcu.table_schema
|
|
260
|
+
WHERE tc.constraint_type = 'PRIMARY KEY'
|
|
261
|
+
AND tc.table_schema = ?
|
|
262
|
+
""",
|
|
263
|
+
(schema_filter,),
|
|
264
|
+
)
|
|
265
|
+
for tname, col_name in cur.fetchall():
|
|
266
|
+
if tname in table_entities:
|
|
267
|
+
for f in table_entities[tname]["fields"]:
|
|
268
|
+
if f["name"] == col_name:
|
|
269
|
+
f["primary_key"] = True
|
|
270
|
+
f["nullable"] = False
|
|
271
|
+
|
|
272
|
+
cur.execute(
|
|
273
|
+
"""
|
|
274
|
+
SELECT tc.table_name, kcu.column_name
|
|
275
|
+
FROM information_schema.table_constraints tc
|
|
276
|
+
JOIN information_schema.key_column_usage kcu
|
|
277
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
278
|
+
AND tc.table_schema = kcu.table_schema
|
|
279
|
+
WHERE tc.constraint_type = 'UNIQUE'
|
|
280
|
+
AND tc.table_schema = ?
|
|
281
|
+
""",
|
|
282
|
+
(schema_filter,),
|
|
283
|
+
)
|
|
284
|
+
for tname, col_name in cur.fetchall():
|
|
285
|
+
if tname in table_entities:
|
|
286
|
+
for f in table_entities[tname]["fields"]:
|
|
287
|
+
if f["name"] == col_name:
|
|
288
|
+
f["unique"] = True
|
|
289
|
+
|
|
290
|
+
cur.execute(
|
|
291
|
+
"""
|
|
292
|
+
SELECT
|
|
293
|
+
fk.name AS constraint_name,
|
|
294
|
+
tr.name AS child_table,
|
|
295
|
+
cr.name AS child_column,
|
|
296
|
+
tp.name AS parent_table,
|
|
297
|
+
cp.name AS parent_column
|
|
298
|
+
FROM sys.foreign_keys fk
|
|
299
|
+
JOIN sys.foreign_key_columns fkc
|
|
300
|
+
ON fk.object_id = fkc.constraint_object_id
|
|
301
|
+
JOIN sys.tables tr
|
|
302
|
+
ON fkc.parent_object_id = tr.object_id
|
|
303
|
+
JOIN sys.schemas sr
|
|
304
|
+
ON tr.schema_id = sr.schema_id
|
|
305
|
+
JOIN sys.columns cr
|
|
306
|
+
ON tr.object_id = cr.object_id
|
|
307
|
+
AND fkc.parent_column_id = cr.column_id
|
|
308
|
+
JOIN sys.tables tp
|
|
309
|
+
ON fkc.referenced_object_id = tp.object_id
|
|
310
|
+
JOIN sys.columns cp
|
|
311
|
+
ON tp.object_id = cp.object_id
|
|
312
|
+
AND fkc.referenced_column_id = cp.column_id
|
|
313
|
+
WHERE sr.name = ?
|
|
314
|
+
""",
|
|
315
|
+
(schema_filter,),
|
|
316
|
+
)
|
|
317
|
+
fk_rows = cur.fetchall()
|
|
318
|
+
relationships: List[Dict[str, Any]] = []
|
|
319
|
+
for constraint_name, child_table, child_col, parent_table, parent_col in fk_rows:
|
|
320
|
+
if child_table in table_entities:
|
|
321
|
+
for f in table_entities[child_table]["fields"]:
|
|
322
|
+
if f["name"] == child_col:
|
|
323
|
+
f["foreign_key"] = True
|
|
324
|
+
parent_entity = self._entity_name(parent_table)
|
|
325
|
+
child_entity = self._entity_name(child_table)
|
|
326
|
+
relationships.append(
|
|
327
|
+
{
|
|
328
|
+
"name": constraint_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col}_fk",
|
|
329
|
+
"from": f"{parent_entity}.{parent_col}",
|
|
330
|
+
"to": f"{child_entity}.{child_col}",
|
|
331
|
+
"cardinality": "one_to_many",
|
|
332
|
+
}
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
indexes: List[Dict[str, Any]] = []
|
|
336
|
+
try:
|
|
337
|
+
cur.execute(
|
|
338
|
+
"""
|
|
339
|
+
SELECT
|
|
340
|
+
i.name AS index_name,
|
|
341
|
+
t.name AS table_name,
|
|
342
|
+
i.is_unique,
|
|
343
|
+
STRING_AGG(c.name, ',') WITHIN GROUP (ORDER BY ic.key_ordinal) AS columns_csv
|
|
344
|
+
FROM sys.indexes i
|
|
345
|
+
JOIN sys.tables t
|
|
346
|
+
ON i.object_id = t.object_id
|
|
347
|
+
JOIN sys.schemas s
|
|
348
|
+
ON t.schema_id = s.schema_id
|
|
349
|
+
JOIN sys.index_columns ic
|
|
350
|
+
ON i.object_id = ic.object_id
|
|
351
|
+
AND i.index_id = ic.index_id
|
|
352
|
+
JOIN sys.columns c
|
|
353
|
+
ON ic.object_id = c.object_id
|
|
354
|
+
AND ic.column_id = c.column_id
|
|
355
|
+
WHERE s.name = ?
|
|
356
|
+
AND i.is_primary_key = 0
|
|
357
|
+
AND i.is_hypothetical = 0
|
|
358
|
+
AND i.index_id > 0
|
|
359
|
+
GROUP BY i.name, t.name, i.is_unique
|
|
360
|
+
ORDER BY t.name, i.name
|
|
361
|
+
""",
|
|
362
|
+
(schema_filter,),
|
|
363
|
+
)
|
|
364
|
+
for idx_name, tname, is_unique, columns_csv in cur.fetchall():
|
|
365
|
+
if tname not in table_entities:
|
|
366
|
+
continue
|
|
367
|
+
cols = [c.strip() for c in str(columns_csv or "").split(",") if c.strip()]
|
|
368
|
+
indexes.append(
|
|
369
|
+
{
|
|
370
|
+
"name": idx_name,
|
|
371
|
+
"entity": self._entity_name(tname),
|
|
372
|
+
"fields": cols,
|
|
373
|
+
"unique": bool(is_unique),
|
|
374
|
+
}
|
|
375
|
+
)
|
|
376
|
+
except Exception as e:
|
|
377
|
+
warnings.append(f"Could not fetch index metadata: {e}")
|
|
378
|
+
|
|
379
|
+
entities_list = list(table_entities.values())
|
|
380
|
+
has_any_pk = any(
|
|
381
|
+
f.get("primary_key") for ent in entities_list for f in ent.get("fields", [])
|
|
382
|
+
)
|
|
383
|
+
if not has_any_pk:
|
|
384
|
+
entities_list, pk_msgs = infer_primary_keys(entities_list)
|
|
385
|
+
warnings.extend(pk_msgs)
|
|
386
|
+
|
|
387
|
+
if not relationships:
|
|
388
|
+
inferred_rels, fk_msgs = infer_relationships(entities_list, relationships)
|
|
389
|
+
relationships.extend(inferred_rels)
|
|
390
|
+
warnings.extend(fk_msgs)
|
|
391
|
+
if inferred_rels:
|
|
392
|
+
warnings.insert(
|
|
393
|
+
0,
|
|
394
|
+
f"No FK constraints found — inferred {len(inferred_rels)} relationships from column naming patterns.",
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
model["entities"] = entities_list
|
|
398
|
+
model["relationships"] = relationships
|
|
399
|
+
model["indexes"] = indexes
|
|
400
|
+
|
|
401
|
+
cur.close()
|
|
402
|
+
|
|
403
|
+
return ConnectorResult(
|
|
404
|
+
model=model,
|
|
405
|
+
tables_found=len(table_entities),
|
|
406
|
+
columns_found=total_columns,
|
|
407
|
+
relationships_found=len(relationships),
|
|
408
|
+
indexes_found=len(indexes),
|
|
409
|
+
warnings=warnings,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class SQLServerConnector(_SqlServerBaseConnector):
|
|
414
|
+
connector_type = "sqlserver"
|
|
415
|
+
display_name = "SQL Server"
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class AzureSQLConnector(_SqlServerBaseConnector):
|
|
419
|
+
connector_type = "azure_sql"
|
|
420
|
+
display_name = "Azure SQL"
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
class AzureFabricConnector(_SqlServerBaseConnector):
|
|
424
|
+
connector_type = "azure_fabric"
|
|
425
|
+
display_name = "Microsoft Fabric Warehouse"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""DataLex — file-per-entity, kind-dispatched YAML data modeling layer.
|
|
2
|
+
|
|
3
|
+
This package implements the DataLex specification (see
|
|
4
|
+
/Users/Kranthi/Documents/Claude/Projects/DataLex/skills/datalex-builder/) on top of
|
|
5
|
+
the DataLex core engine.
|
|
6
|
+
|
|
7
|
+
Public surface:
|
|
8
|
+
types — logical type parser (primitives + array/map/struct)
|
|
9
|
+
loader — kind-dispatched streaming loader with source-located errors
|
|
10
|
+
project — DataLexProject: the loaded, validated, resolved project graph
|
|
11
|
+
errors — DataLexError and friends
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from datalex_core.datalex.errors import DataLexError, SourceLocation
|
|
15
|
+
from datalex_core.datalex.types import LogicalType, parse_type
|
|
16
|
+
from datalex_core.datalex.loader import load_project
|
|
17
|
+
from datalex_core.datalex.project import DataLexProject
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"DataLexError",
|
|
21
|
+
"SourceLocation",
|
|
22
|
+
"LogicalType",
|
|
23
|
+
"parse_type",
|
|
24
|
+
"load_project",
|
|
25
|
+
"DataLexProject",
|
|
26
|
+
]
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""DataLex semantic diff with explicit rename tracking via `previous_name:`.
|
|
2
|
+
|
|
3
|
+
The existing `datalex_core/diffing.py` module diffs v3 monolithic models. This module
|
|
4
|
+
operates on DataLexProject entities (layer-scoped) and produces a structured diff
|
|
5
|
+
dict of added / removed / renamed / changed objects.
|
|
6
|
+
|
|
7
|
+
Rename detection is explicit: if entity B in `new` has `previous_name: A` and no
|
|
8
|
+
entity named A exists in `new` but does in `old`, the diff records (A -> B) as a
|
|
9
|
+
rename, not a drop+add. Same rule applies to columns and indexes.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def diff_entities(
|
|
18
|
+
old: Dict[str, Dict[str, Any]],
|
|
19
|
+
new: Dict[str, Dict[str, Any]],
|
|
20
|
+
) -> Dict[str, Any]:
|
|
21
|
+
"""Compare two keyed entity dicts (key = '<layer>:<name>'). Returns a structured diff."""
|
|
22
|
+
added: List[str] = []
|
|
23
|
+
removed: List[str] = []
|
|
24
|
+
renamed: List[Tuple[str, str]] = []
|
|
25
|
+
changed: List[Dict[str, Any]] = []
|
|
26
|
+
|
|
27
|
+
old_keys = set(old.keys())
|
|
28
|
+
new_keys = set(new.keys())
|
|
29
|
+
|
|
30
|
+
# First pass: detect explicit renames via previous_name.
|
|
31
|
+
renames_new_to_old: Dict[str, str] = {}
|
|
32
|
+
for key, ent in new.items():
|
|
33
|
+
prev = ent.get("previous_name")
|
|
34
|
+
if not prev:
|
|
35
|
+
continue
|
|
36
|
+
layer = ent.get("layer", key.split(":")[0] if ":" in key else "physical")
|
|
37
|
+
old_key = f"{layer}:{prev}"
|
|
38
|
+
if old_key in old and old_key not in new:
|
|
39
|
+
renames_new_to_old[key] = old_key
|
|
40
|
+
|
|
41
|
+
renamed_old_set = set(renames_new_to_old.values())
|
|
42
|
+
renamed_new_set = set(renames_new_to_old.keys())
|
|
43
|
+
|
|
44
|
+
for key in sorted(new_keys - old_keys - renamed_new_set):
|
|
45
|
+
added.append(key)
|
|
46
|
+
for key in sorted(old_keys - new_keys - renamed_old_set):
|
|
47
|
+
removed.append(key)
|
|
48
|
+
for new_key, old_key in sorted(renames_new_to_old.items()):
|
|
49
|
+
renamed.append((old_key, new_key))
|
|
50
|
+
|
|
51
|
+
# Compare entities present in both
|
|
52
|
+
for key in sorted(old_keys & new_keys):
|
|
53
|
+
ch = _entity_diff(old[key], new[key])
|
|
54
|
+
if ch:
|
|
55
|
+
changed.append({"entity": key, **ch})
|
|
56
|
+
|
|
57
|
+
# For rename pairs, also diff bodies under the new name
|
|
58
|
+
for new_key, old_key in renames_new_to_old.items():
|
|
59
|
+
ch = _entity_diff(old[old_key], new[new_key])
|
|
60
|
+
if ch:
|
|
61
|
+
changed.append({"entity": new_key, "renamed_from": old_key, **ch})
|
|
62
|
+
|
|
63
|
+
breaking = _breaking_from_diff(removed, changed)
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"added": added,
|
|
67
|
+
"removed": removed,
|
|
68
|
+
"renamed": renamed,
|
|
69
|
+
"changed": changed,
|
|
70
|
+
"breaking": breaking,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _entity_diff(old_ent: Dict[str, Any], new_ent: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
75
|
+
changes: Dict[str, Any] = {}
|
|
76
|
+
|
|
77
|
+
# scalar fields
|
|
78
|
+
for field in ("description", "owner", "domain", "subject_area", "schema", "database", "physical_name"):
|
|
79
|
+
if old_ent.get(field) != new_ent.get(field):
|
|
80
|
+
changes.setdefault("scalar", {})[field] = {
|
|
81
|
+
"from": old_ent.get(field),
|
|
82
|
+
"to": new_ent.get(field),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
col_diff = _columns_diff(old_ent.get("columns", []) or [], new_ent.get("columns", []) or [])
|
|
86
|
+
if col_diff:
|
|
87
|
+
changes["columns"] = col_diff
|
|
88
|
+
|
|
89
|
+
idx_diff = _indexes_diff(old_ent.get("indexes", []) or [], new_ent.get("indexes", []) or [])
|
|
90
|
+
if idx_diff:
|
|
91
|
+
changes["indexes"] = idx_diff
|
|
92
|
+
|
|
93
|
+
return changes or None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _columns_diff(old_cols: List[Dict[str, Any]], new_cols: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
|
97
|
+
old_by_name = {c["name"]: c for c in old_cols if c.get("name")}
|
|
98
|
+
new_by_name = {c["name"]: c for c in new_cols if c.get("name")}
|
|
99
|
+
|
|
100
|
+
rename_pairs: List[Tuple[str, str]] = []
|
|
101
|
+
for name, c in new_by_name.items():
|
|
102
|
+
prev = c.get("previous_name")
|
|
103
|
+
if prev and prev in old_by_name and prev not in new_by_name:
|
|
104
|
+
rename_pairs.append((prev, name))
|
|
105
|
+
renamed_old = {p[0] for p in rename_pairs}
|
|
106
|
+
renamed_new = {p[1] for p in rename_pairs}
|
|
107
|
+
|
|
108
|
+
added = sorted(set(new_by_name) - set(old_by_name) - renamed_new)
|
|
109
|
+
removed = sorted(set(old_by_name) - set(new_by_name) - renamed_old)
|
|
110
|
+
|
|
111
|
+
changed: List[Dict[str, Any]] = []
|
|
112
|
+
for name in sorted(set(old_by_name) & set(new_by_name)):
|
|
113
|
+
ch = _column_scalar_diff(old_by_name[name], new_by_name[name])
|
|
114
|
+
if ch:
|
|
115
|
+
changed.append({"name": name, **ch})
|
|
116
|
+
|
|
117
|
+
for old_name, new_name in rename_pairs:
|
|
118
|
+
ch = _column_scalar_diff(old_by_name[old_name], new_by_name[new_name]) or {}
|
|
119
|
+
changed.append({"name": new_name, "renamed_from": old_name, **ch})
|
|
120
|
+
|
|
121
|
+
out: Dict[str, Any] = {}
|
|
122
|
+
if added:
|
|
123
|
+
out["added"] = added
|
|
124
|
+
if removed:
|
|
125
|
+
out["removed"] = removed
|
|
126
|
+
if rename_pairs:
|
|
127
|
+
out["renamed"] = [{"from": a, "to": b} for a, b in rename_pairs]
|
|
128
|
+
if changed:
|
|
129
|
+
out["changed"] = changed
|
|
130
|
+
return out or None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _column_scalar_diff(old: Dict[str, Any], new: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
134
|
+
out: Dict[str, Any] = {}
|
|
135
|
+
for field in ("type", "nullable", "primary_key", "unique", "default", "sensitivity", "description"):
|
|
136
|
+
if old.get(field) != new.get(field):
|
|
137
|
+
out[field] = {"from": old.get(field), "to": new.get(field)}
|
|
138
|
+
if (old.get("references") or None) != (new.get("references") or None):
|
|
139
|
+
out["references"] = {"from": old.get("references"), "to": new.get("references")}
|
|
140
|
+
return out or None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _indexes_diff(old_idx: List[Dict[str, Any]], new_idx: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
|
144
|
+
old_by_name = {i["name"]: i for i in old_idx if i.get("name")}
|
|
145
|
+
new_by_name = {i["name"]: i for i in new_idx if i.get("name")}
|
|
146
|
+
|
|
147
|
+
rename_pairs: List[Tuple[str, str]] = []
|
|
148
|
+
for name, i in new_by_name.items():
|
|
149
|
+
prev = i.get("previous_name")
|
|
150
|
+
if prev and prev in old_by_name and prev not in new_by_name:
|
|
151
|
+
rename_pairs.append((prev, name))
|
|
152
|
+
renamed_old = {p[0] for p in rename_pairs}
|
|
153
|
+
renamed_new = {p[1] for p in rename_pairs}
|
|
154
|
+
|
|
155
|
+
added = sorted(set(new_by_name) - set(old_by_name) - renamed_new)
|
|
156
|
+
removed = sorted(set(old_by_name) - set(new_by_name) - renamed_old)
|
|
157
|
+
|
|
158
|
+
out: Dict[str, Any] = {}
|
|
159
|
+
if added:
|
|
160
|
+
out["added"] = added
|
|
161
|
+
if removed:
|
|
162
|
+
out["removed"] = removed
|
|
163
|
+
if rename_pairs:
|
|
164
|
+
out["renamed"] = [{"from": a, "to": b} for a, b in rename_pairs]
|
|
165
|
+
return out or None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _breaking_from_diff(removed: List[str], changed: List[Dict[str, Any]]) -> List[str]:
|
|
169
|
+
"""Flag changes that break consumers. First pass heuristics — extended in Phase B."""
|
|
170
|
+
breaking: List[str] = []
|
|
171
|
+
for key in removed:
|
|
172
|
+
breaking.append(f"Entity removed: {key}")
|
|
173
|
+
for ch in changed:
|
|
174
|
+
ent = ch.get("entity")
|
|
175
|
+
cols = ch.get("columns") or {}
|
|
176
|
+
for rem in cols.get("removed", []):
|
|
177
|
+
breaking.append(f"Column removed: {ent}.{rem}")
|
|
178
|
+
for c in cols.get("changed", []):
|
|
179
|
+
t = c.get("type")
|
|
180
|
+
if t and t.get("from") and t.get("to") and t["from"] != t["to"]:
|
|
181
|
+
breaking.append(f"Column type changed: {ent}.{c['name']} ({t['from']} -> {t['to']})")
|
|
182
|
+
nn = c.get("nullable")
|
|
183
|
+
if nn and nn.get("from") is True and nn.get("to") is False:
|
|
184
|
+
breaking.append(f"Column became NOT NULL without a migration: {ent}.{c['name']}")
|
|
185
|
+
idx = ch.get("indexes") or {}
|
|
186
|
+
for rem in idx.get("removed", []):
|
|
187
|
+
breaking.append(f"Index removed: {ent}.{rem}")
|
|
188
|
+
return breaking
|