datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,425 @@
1
+ """SQL Server-family connectors (SQL Server, Azure SQL, Microsoft Fabric Warehouse)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import date
6
+ from typing import Any, Dict, List, Tuple
7
+
8
+ from datalex_core.connectors.base import (
9
+ BaseConnector,
10
+ ConnectorConfig,
11
+ ConnectorResult,
12
+ infer_primary_keys,
13
+ infer_relationships,
14
+ )
15
+
16
+
17
+ _SQLSERVER_TYPE_MAP = {
18
+ "int": "integer",
19
+ "bigint": "bigint",
20
+ "smallint": "smallint",
21
+ "tinyint": "tinyint",
22
+ "bit": "boolean",
23
+ "decimal": "decimal",
24
+ "numeric": "decimal",
25
+ "money": "decimal",
26
+ "smallmoney": "decimal",
27
+ "float": "float",
28
+ "real": "float",
29
+ "char": "string",
30
+ "nchar": "string",
31
+ "varchar": "string",
32
+ "nvarchar": "string",
33
+ "text": "text",
34
+ "ntext": "text",
35
+ "date": "date",
36
+ "datetime": "timestamp",
37
+ "datetime2": "timestamp",
38
+ "smalldatetime": "timestamp",
39
+ "time": "time",
40
+ "datetimeoffset": "timestamp",
41
+ "uniqueidentifier": "uuid",
42
+ "binary": "binary",
43
+ "varbinary": "binary",
44
+ "image": "binary",
45
+ "xml": "string",
46
+ "sql_variant": "string",
47
+ "geography": "string",
48
+ "geometry": "string",
49
+ "hierarchyid": "string",
50
+ "json": "json",
51
+ }
52
+
53
+
54
+ class _SqlServerBaseConnector(BaseConnector):
55
+ required_package = "pyodbc"
56
+ default_port = 1433
57
+ default_schema = "dbo"
58
+
59
+ def _build_conn_string(self, config: ConnectorConfig) -> str:
60
+ server = config.host or "localhost"
61
+ port = config.port or self.default_port
62
+ if port:
63
+ server = f"{server},{port}"
64
+
65
+ driver = config.extra.get("odbc_driver", "ODBC Driver 18 for SQL Server")
66
+ database = config.database or "master"
67
+ encrypt = str(config.extra.get("encrypt", "yes"))
68
+ trust = str(config.extra.get("trust_server_certificate", "yes"))
69
+
70
+ parts = [
71
+ f"DRIVER={{{driver}}}",
72
+ f"SERVER={server}",
73
+ f"DATABASE={database}",
74
+ f"Encrypt={encrypt}",
75
+ f"TrustServerCertificate={trust}",
76
+ "Connection Timeout=10",
77
+ ]
78
+
79
+ if config.user:
80
+ parts.extend([
81
+ f"UID={config.user}",
82
+ f"PWD={config.password or ''}",
83
+ ])
84
+ else:
85
+ parts.append("Trusted_Connection=yes")
86
+
87
+ return ";".join(parts)
88
+
89
+ def _map_type(self, data_type: str, char_max_len: Any, num_prec: Any, num_scale: Any) -> str:
90
+ base = (data_type or "").lower()
91
+
92
+ if base in ("decimal", "numeric") and num_prec:
93
+ return f"decimal({int(num_prec)},{int(num_scale or 0)})"
94
+
95
+ if base in ("varchar", "nvarchar", "char", "nchar"):
96
+ if char_max_len in (None, 0):
97
+ return _SQLSERVER_TYPE_MAP.get(base, "string")
98
+ try:
99
+ length = int(char_max_len)
100
+ except Exception:
101
+ return _SQLSERVER_TYPE_MAP.get(base, "string")
102
+ if length < 0:
103
+ return "text"
104
+ return f"{base}({length})"
105
+
106
+ return _SQLSERVER_TYPE_MAP.get(base, "string")
107
+
108
+ def _connect(self, config: ConnectorConfig):
109
+ import pyodbc
110
+
111
+ return pyodbc.connect(self._build_conn_string(config), autocommit=True)
112
+
113
+ def test_connection(self, config: ConnectorConfig) -> Tuple[bool, str]:
114
+ try:
115
+ conn = self._connect(config)
116
+ conn.close()
117
+ return True, "Connection successful"
118
+ except ImportError:
119
+ return False, "pyodbc not installed. Run: pip install pyodbc"
120
+ except Exception as e:
121
+ return False, f"Connection failed: {e}"
122
+
123
+ def list_schemas(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
124
+ conn = self._connect(config)
125
+ try:
126
+ cur = conn.cursor()
127
+ cur.execute(
128
+ """
129
+ SELECT s.name AS schema_name,
130
+ (
131
+ SELECT COUNT(*)
132
+ FROM information_schema.tables t
133
+ WHERE t.table_schema = s.name
134
+ AND t.table_type IN ('BASE TABLE', 'VIEW')
135
+ ) AS table_count
136
+ FROM sys.schemas s
137
+ WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA')
138
+ ORDER BY s.name
139
+ """
140
+ )
141
+ return [{"name": row[0], "table_count": int(row[1] or 0)} for row in cur.fetchall()]
142
+ finally:
143
+ conn.close()
144
+
145
+ def list_tables(self, config: ConnectorConfig) -> List[Dict[str, Any]]:
146
+ schema = config.schema or self.default_schema
147
+ conn = self._connect(config)
148
+ try:
149
+ cur = conn.cursor()
150
+ cur.execute(
151
+ """
152
+ SELECT t.table_name, t.table_type,
153
+ (
154
+ SELECT COUNT(*)
155
+ FROM information_schema.columns c
156
+ WHERE c.table_schema = t.table_schema
157
+ AND c.table_name = t.table_name
158
+ ) AS col_count
159
+ FROM information_schema.tables t
160
+ WHERE t.table_schema = ?
161
+ AND t.table_type IN ('BASE TABLE', 'VIEW')
162
+ ORDER BY t.table_name
163
+ """,
164
+ (schema,),
165
+ )
166
+ results = []
167
+ for row in cur.fetchall():
168
+ ttype = "view" if "VIEW" in str(row[1]).upper() else "table"
169
+ results.append({
170
+ "name": row[0],
171
+ "type": ttype,
172
+ "column_count": int(row[2] or 0),
173
+ "row_count": None,
174
+ })
175
+ return results
176
+ finally:
177
+ conn.close()
178
+
179
+ def pull_schema(self, config: ConnectorConfig) -> ConnectorResult:
180
+ conn = self._connect(config)
181
+ try:
182
+ return self._pull(conn, config)
183
+ finally:
184
+ conn.close()
185
+
186
+ def _pull(self, conn: Any, config: ConnectorConfig) -> ConnectorResult:
187
+ model = self._build_model(config)
188
+ schema_filter = config.schema or self.default_schema
189
+ cur = conn.cursor()
190
+ warnings: List[str] = []
191
+
192
+ cur.execute(
193
+ """
194
+ SELECT table_name, table_type
195
+ FROM information_schema.tables
196
+ WHERE table_schema = ?
197
+ AND table_type IN ('BASE TABLE', 'VIEW')
198
+ ORDER BY table_name
199
+ """,
200
+ (schema_filter,),
201
+ )
202
+ tables = cur.fetchall()
203
+
204
+ table_entities: Dict[str, Dict[str, Any]] = {}
205
+ for table_name, table_type in tables:
206
+ if not self._should_include_table(table_name, config):
207
+ continue
208
+ entity_name = self._entity_name(table_name)
209
+ entity_type = "view" if str(table_type).upper() == "VIEW" else "table"
210
+ table_entities[table_name] = {
211
+ "name": entity_name,
212
+ "physical_name": table_name,
213
+ "type": entity_type,
214
+ "description": f"Pulled from {self.display_name} {config.database}.{schema_filter}.{table_name} on {date.today().isoformat()}",
215
+ "fields": [],
216
+ }
217
+ if schema_filter != self.default_schema:
218
+ table_entities[table_name]["schema"] = schema_filter
219
+
220
+ cur.execute(
221
+ """
222
+ SELECT table_name, column_name, data_type, is_nullable,
223
+ column_default, character_maximum_length,
224
+ numeric_precision, numeric_scale
225
+ FROM information_schema.columns
226
+ WHERE table_schema = ?
227
+ ORDER BY table_name, ordinal_position
228
+ """,
229
+ (schema_filter,),
230
+ )
231
+ columns = cur.fetchall()
232
+ total_columns = 0
233
+
234
+ for row in columns:
235
+ tname, col_name, data_type, is_nullable, col_default, char_max_len, num_prec, num_scale = row
236
+ if tname not in table_entities:
237
+ continue
238
+
239
+ dl_type = self._map_type(data_type, char_max_len, num_prec, num_scale)
240
+ field: Dict[str, Any] = {
241
+ "name": col_name,
242
+ "type": dl_type,
243
+ "nullable": str(is_nullable).upper() == "YES",
244
+ }
245
+ if col_default is not None:
246
+ cleaned = str(col_default).strip()
247
+ if cleaned:
248
+ field["default"] = cleaned
249
+
250
+ table_entities[tname]["fields"].append(field)
251
+ total_columns += 1
252
+
253
+ cur.execute(
254
+ """
255
+ SELECT tc.table_name, kcu.column_name
256
+ FROM information_schema.table_constraints tc
257
+ JOIN information_schema.key_column_usage kcu
258
+ ON tc.constraint_name = kcu.constraint_name
259
+ AND tc.table_schema = kcu.table_schema
260
+ WHERE tc.constraint_type = 'PRIMARY KEY'
261
+ AND tc.table_schema = ?
262
+ """,
263
+ (schema_filter,),
264
+ )
265
+ for tname, col_name in cur.fetchall():
266
+ if tname in table_entities:
267
+ for f in table_entities[tname]["fields"]:
268
+ if f["name"] == col_name:
269
+ f["primary_key"] = True
270
+ f["nullable"] = False
271
+
272
+ cur.execute(
273
+ """
274
+ SELECT tc.table_name, kcu.column_name
275
+ FROM information_schema.table_constraints tc
276
+ JOIN information_schema.key_column_usage kcu
277
+ ON tc.constraint_name = kcu.constraint_name
278
+ AND tc.table_schema = kcu.table_schema
279
+ WHERE tc.constraint_type = 'UNIQUE'
280
+ AND tc.table_schema = ?
281
+ """,
282
+ (schema_filter,),
283
+ )
284
+ for tname, col_name in cur.fetchall():
285
+ if tname in table_entities:
286
+ for f in table_entities[tname]["fields"]:
287
+ if f["name"] == col_name:
288
+ f["unique"] = True
289
+
290
+ cur.execute(
291
+ """
292
+ SELECT
293
+ fk.name AS constraint_name,
294
+ tr.name AS child_table,
295
+ cr.name AS child_column,
296
+ tp.name AS parent_table,
297
+ cp.name AS parent_column
298
+ FROM sys.foreign_keys fk
299
+ JOIN sys.foreign_key_columns fkc
300
+ ON fk.object_id = fkc.constraint_object_id
301
+ JOIN sys.tables tr
302
+ ON fkc.parent_object_id = tr.object_id
303
+ JOIN sys.schemas sr
304
+ ON tr.schema_id = sr.schema_id
305
+ JOIN sys.columns cr
306
+ ON tr.object_id = cr.object_id
307
+ AND fkc.parent_column_id = cr.column_id
308
+ JOIN sys.tables tp
309
+ ON fkc.referenced_object_id = tp.object_id
310
+ JOIN sys.columns cp
311
+ ON tp.object_id = cp.object_id
312
+ AND fkc.referenced_column_id = cp.column_id
313
+ WHERE sr.name = ?
314
+ """,
315
+ (schema_filter,),
316
+ )
317
+ fk_rows = cur.fetchall()
318
+ relationships: List[Dict[str, Any]] = []
319
+ for constraint_name, child_table, child_col, parent_table, parent_col in fk_rows:
320
+ if child_table in table_entities:
321
+ for f in table_entities[child_table]["fields"]:
322
+ if f["name"] == child_col:
323
+ f["foreign_key"] = True
324
+ parent_entity = self._entity_name(parent_table)
325
+ child_entity = self._entity_name(child_table)
326
+ relationships.append(
327
+ {
328
+ "name": constraint_name or f"{parent_entity.lower()}_{child_entity.lower()}_{child_col}_fk",
329
+ "from": f"{parent_entity}.{parent_col}",
330
+ "to": f"{child_entity}.{child_col}",
331
+ "cardinality": "one_to_many",
332
+ }
333
+ )
334
+
335
+ indexes: List[Dict[str, Any]] = []
336
+ try:
337
+ cur.execute(
338
+ """
339
+ SELECT
340
+ i.name AS index_name,
341
+ t.name AS table_name,
342
+ i.is_unique,
343
+ STRING_AGG(c.name, ',') WITHIN GROUP (ORDER BY ic.key_ordinal) AS columns_csv
344
+ FROM sys.indexes i
345
+ JOIN sys.tables t
346
+ ON i.object_id = t.object_id
347
+ JOIN sys.schemas s
348
+ ON t.schema_id = s.schema_id
349
+ JOIN sys.index_columns ic
350
+ ON i.object_id = ic.object_id
351
+ AND i.index_id = ic.index_id
352
+ JOIN sys.columns c
353
+ ON ic.object_id = c.object_id
354
+ AND ic.column_id = c.column_id
355
+ WHERE s.name = ?
356
+ AND i.is_primary_key = 0
357
+ AND i.is_hypothetical = 0
358
+ AND i.index_id > 0
359
+ GROUP BY i.name, t.name, i.is_unique
360
+ ORDER BY t.name, i.name
361
+ """,
362
+ (schema_filter,),
363
+ )
364
+ for idx_name, tname, is_unique, columns_csv in cur.fetchall():
365
+ if tname not in table_entities:
366
+ continue
367
+ cols = [c.strip() for c in str(columns_csv or "").split(",") if c.strip()]
368
+ indexes.append(
369
+ {
370
+ "name": idx_name,
371
+ "entity": self._entity_name(tname),
372
+ "fields": cols,
373
+ "unique": bool(is_unique),
374
+ }
375
+ )
376
+ except Exception as e:
377
+ warnings.append(f"Could not fetch index metadata: {e}")
378
+
379
+ entities_list = list(table_entities.values())
380
+ has_any_pk = any(
381
+ f.get("primary_key") for ent in entities_list for f in ent.get("fields", [])
382
+ )
383
+ if not has_any_pk:
384
+ entities_list, pk_msgs = infer_primary_keys(entities_list)
385
+ warnings.extend(pk_msgs)
386
+
387
+ if not relationships:
388
+ inferred_rels, fk_msgs = infer_relationships(entities_list, relationships)
389
+ relationships.extend(inferred_rels)
390
+ warnings.extend(fk_msgs)
391
+ if inferred_rels:
392
+ warnings.insert(
393
+ 0,
394
+ f"No FK constraints found — inferred {len(inferred_rels)} relationships from column naming patterns.",
395
+ )
396
+
397
+ model["entities"] = entities_list
398
+ model["relationships"] = relationships
399
+ model["indexes"] = indexes
400
+
401
+ cur.close()
402
+
403
+ return ConnectorResult(
404
+ model=model,
405
+ tables_found=len(table_entities),
406
+ columns_found=total_columns,
407
+ relationships_found=len(relationships),
408
+ indexes_found=len(indexes),
409
+ warnings=warnings,
410
+ )
411
+
412
+
413
+ class SQLServerConnector(_SqlServerBaseConnector):
414
+ connector_type = "sqlserver"
415
+ display_name = "SQL Server"
416
+
417
+
418
+ class AzureSQLConnector(_SqlServerBaseConnector):
419
+ connector_type = "azure_sql"
420
+ display_name = "Azure SQL"
421
+
422
+
423
+ class AzureFabricConnector(_SqlServerBaseConnector):
424
+ connector_type = "azure_fabric"
425
+ display_name = "Microsoft Fabric Warehouse"
@@ -0,0 +1,26 @@
1
+ """DataLex — file-per-entity, kind-dispatched YAML data modeling layer.
2
+
3
+ This package implements the DataLex specification (see
4
+ /Users/Kranthi/Documents/Claude/Projects/DataLex/skills/datalex-builder/) on top of
5
+ the DataLex core engine.
6
+
7
+ Public surface:
8
+ types — logical type parser (primitives + array/map/struct)
9
+ loader — kind-dispatched streaming loader with source-located errors
10
+ project — DataLexProject: the loaded, validated, resolved project graph
11
+ errors — DataLexError and friends
12
+ """
13
+
14
+ from datalex_core.datalex.errors import DataLexError, SourceLocation
15
+ from datalex_core.datalex.types import LogicalType, parse_type
16
+ from datalex_core.datalex.loader import load_project
17
+ from datalex_core.datalex.project import DataLexProject
18
+
19
+ __all__ = [
20
+ "DataLexError",
21
+ "SourceLocation",
22
+ "LogicalType",
23
+ "parse_type",
24
+ "load_project",
25
+ "DataLexProject",
26
+ ]
@@ -0,0 +1,188 @@
1
+ """DataLex semantic diff with explicit rename tracking via `previous_name:`.
2
+
3
+ The existing `datalex_core/diffing.py` module diffs v3 monolithic models. This module
4
+ operates on DataLexProject entities (layer-scoped) and produces a structured diff
5
+ dict of added / removed / renamed / changed objects.
6
+
7
+ Rename detection is explicit: if entity B in `new` has `previous_name: A` and no
8
+ entity named A exists in `new` but does in `old`, the diff records (A -> B) as a
9
+ rename, not a drop+add. Same rule applies to columns and indexes.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any, Dict, List, Optional, Tuple
15
+
16
+
17
+ def diff_entities(
18
+ old: Dict[str, Dict[str, Any]],
19
+ new: Dict[str, Dict[str, Any]],
20
+ ) -> Dict[str, Any]:
21
+ """Compare two keyed entity dicts (key = '<layer>:<name>'). Returns a structured diff."""
22
+ added: List[str] = []
23
+ removed: List[str] = []
24
+ renamed: List[Tuple[str, str]] = []
25
+ changed: List[Dict[str, Any]] = []
26
+
27
+ old_keys = set(old.keys())
28
+ new_keys = set(new.keys())
29
+
30
+ # First pass: detect explicit renames via previous_name.
31
+ renames_new_to_old: Dict[str, str] = {}
32
+ for key, ent in new.items():
33
+ prev = ent.get("previous_name")
34
+ if not prev:
35
+ continue
36
+ layer = ent.get("layer", key.split(":")[0] if ":" in key else "physical")
37
+ old_key = f"{layer}:{prev}"
38
+ if old_key in old and old_key not in new:
39
+ renames_new_to_old[key] = old_key
40
+
41
+ renamed_old_set = set(renames_new_to_old.values())
42
+ renamed_new_set = set(renames_new_to_old.keys())
43
+
44
+ for key in sorted(new_keys - old_keys - renamed_new_set):
45
+ added.append(key)
46
+ for key in sorted(old_keys - new_keys - renamed_old_set):
47
+ removed.append(key)
48
+ for new_key, old_key in sorted(renames_new_to_old.items()):
49
+ renamed.append((old_key, new_key))
50
+
51
+ # Compare entities present in both
52
+ for key in sorted(old_keys & new_keys):
53
+ ch = _entity_diff(old[key], new[key])
54
+ if ch:
55
+ changed.append({"entity": key, **ch})
56
+
57
+ # For rename pairs, also diff bodies under the new name
58
+ for new_key, old_key in renames_new_to_old.items():
59
+ ch = _entity_diff(old[old_key], new[new_key])
60
+ if ch:
61
+ changed.append({"entity": new_key, "renamed_from": old_key, **ch})
62
+
63
+ breaking = _breaking_from_diff(removed, changed)
64
+
65
+ return {
66
+ "added": added,
67
+ "removed": removed,
68
+ "renamed": renamed,
69
+ "changed": changed,
70
+ "breaking": breaking,
71
+ }
72
+
73
+
74
+ def _entity_diff(old_ent: Dict[str, Any], new_ent: Dict[str, Any]) -> Optional[Dict[str, Any]]:
75
+ changes: Dict[str, Any] = {}
76
+
77
+ # scalar fields
78
+ for field in ("description", "owner", "domain", "subject_area", "schema", "database", "physical_name"):
79
+ if old_ent.get(field) != new_ent.get(field):
80
+ changes.setdefault("scalar", {})[field] = {
81
+ "from": old_ent.get(field),
82
+ "to": new_ent.get(field),
83
+ }
84
+
85
+ col_diff = _columns_diff(old_ent.get("columns", []) or [], new_ent.get("columns", []) or [])
86
+ if col_diff:
87
+ changes["columns"] = col_diff
88
+
89
+ idx_diff = _indexes_diff(old_ent.get("indexes", []) or [], new_ent.get("indexes", []) or [])
90
+ if idx_diff:
91
+ changes["indexes"] = idx_diff
92
+
93
+ return changes or None
94
+
95
+
96
+ def _columns_diff(old_cols: List[Dict[str, Any]], new_cols: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
97
+ old_by_name = {c["name"]: c for c in old_cols if c.get("name")}
98
+ new_by_name = {c["name"]: c for c in new_cols if c.get("name")}
99
+
100
+ rename_pairs: List[Tuple[str, str]] = []
101
+ for name, c in new_by_name.items():
102
+ prev = c.get("previous_name")
103
+ if prev and prev in old_by_name and prev not in new_by_name:
104
+ rename_pairs.append((prev, name))
105
+ renamed_old = {p[0] for p in rename_pairs}
106
+ renamed_new = {p[1] for p in rename_pairs}
107
+
108
+ added = sorted(set(new_by_name) - set(old_by_name) - renamed_new)
109
+ removed = sorted(set(old_by_name) - set(new_by_name) - renamed_old)
110
+
111
+ changed: List[Dict[str, Any]] = []
112
+ for name in sorted(set(old_by_name) & set(new_by_name)):
113
+ ch = _column_scalar_diff(old_by_name[name], new_by_name[name])
114
+ if ch:
115
+ changed.append({"name": name, **ch})
116
+
117
+ for old_name, new_name in rename_pairs:
118
+ ch = _column_scalar_diff(old_by_name[old_name], new_by_name[new_name]) or {}
119
+ changed.append({"name": new_name, "renamed_from": old_name, **ch})
120
+
121
+ out: Dict[str, Any] = {}
122
+ if added:
123
+ out["added"] = added
124
+ if removed:
125
+ out["removed"] = removed
126
+ if rename_pairs:
127
+ out["renamed"] = [{"from": a, "to": b} for a, b in rename_pairs]
128
+ if changed:
129
+ out["changed"] = changed
130
+ return out or None
131
+
132
+
133
+ def _column_scalar_diff(old: Dict[str, Any], new: Dict[str, Any]) -> Optional[Dict[str, Any]]:
134
+ out: Dict[str, Any] = {}
135
+ for field in ("type", "nullable", "primary_key", "unique", "default", "sensitivity", "description"):
136
+ if old.get(field) != new.get(field):
137
+ out[field] = {"from": old.get(field), "to": new.get(field)}
138
+ if (old.get("references") or None) != (new.get("references") or None):
139
+ out["references"] = {"from": old.get("references"), "to": new.get("references")}
140
+ return out or None
141
+
142
+
143
+ def _indexes_diff(old_idx: List[Dict[str, Any]], new_idx: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
144
+ old_by_name = {i["name"]: i for i in old_idx if i.get("name")}
145
+ new_by_name = {i["name"]: i for i in new_idx if i.get("name")}
146
+
147
+ rename_pairs: List[Tuple[str, str]] = []
148
+ for name, i in new_by_name.items():
149
+ prev = i.get("previous_name")
150
+ if prev and prev in old_by_name and prev not in new_by_name:
151
+ rename_pairs.append((prev, name))
152
+ renamed_old = {p[0] for p in rename_pairs}
153
+ renamed_new = {p[1] for p in rename_pairs}
154
+
155
+ added = sorted(set(new_by_name) - set(old_by_name) - renamed_new)
156
+ removed = sorted(set(old_by_name) - set(new_by_name) - renamed_old)
157
+
158
+ out: Dict[str, Any] = {}
159
+ if added:
160
+ out["added"] = added
161
+ if removed:
162
+ out["removed"] = removed
163
+ if rename_pairs:
164
+ out["renamed"] = [{"from": a, "to": b} for a, b in rename_pairs]
165
+ return out or None
166
+
167
+
168
+ def _breaking_from_diff(removed: List[str], changed: List[Dict[str, Any]]) -> List[str]:
169
+ """Flag changes that break consumers. First pass heuristics — extended in Phase B."""
170
+ breaking: List[str] = []
171
+ for key in removed:
172
+ breaking.append(f"Entity removed: {key}")
173
+ for ch in changed:
174
+ ent = ch.get("entity")
175
+ cols = ch.get("columns") or {}
176
+ for rem in cols.get("removed", []):
177
+ breaking.append(f"Column removed: {ent}.{rem}")
178
+ for c in cols.get("changed", []):
179
+ t = c.get("type")
180
+ if t and t.get("from") and t.get("to") and t["from"] != t["to"]:
181
+ breaking.append(f"Column type changed: {ent}.{c['name']} ({t['from']} -> {t['to']})")
182
+ nn = c.get("nullable")
183
+ if nn and nn.get("from") is True and nn.get("to") is False:
184
+ breaking.append(f"Column became NOT NULL without a migration: {ent}.{c['name']}")
185
+ idx = ch.get("indexes") or {}
186
+ for rem in idx.get("removed", []):
187
+ breaking.append(f"Index removed: {ent}.{rem}")
188
+ return breaking