datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
datalex_core/policy.py ADDED
@@ -0,0 +1,1037 @@
1
+ import re
2
+ from pathlib import Path
3
+ from typing import Any, Dict, Iterable, List, Optional, Set
4
+
5
+ import yaml
6
+
7
+ from datalex_core.issues import Issue
8
+ from datalex_core.modeling import normalize_model
9
+
10
+
11
+ def load_policy_pack(path: str) -> Dict[str, Any]:
12
+ policy_path = Path(path)
13
+ if not policy_path.exists():
14
+ raise FileNotFoundError(f"Policy pack not found: {path}")
15
+
16
+ with policy_path.open("r", encoding="utf-8") as handle:
17
+ loaded = yaml.safe_load(handle)
18
+
19
+ if loaded is None:
20
+ return {}
21
+
22
+ if not isinstance(loaded, dict):
23
+ raise ValueError("Policy pack must parse to a YAML object at root.")
24
+
25
+ return loaded
26
+
27
+
28
+ def _policy_issue(severity: str, code: str, message: str, path: str = "/") -> Issue:
29
+ return Issue(severity=severity, code=code, message=message, path=path)
30
+
31
+
32
+ def _normalize_list(value: Any) -> List[str]:
33
+ if isinstance(value, list):
34
+ return [str(item) for item in value if str(item).strip()]
35
+ if isinstance(value, str) and value.strip():
36
+ return [value]
37
+ return []
38
+
39
+
40
+ def _field_refs(model: Dict[str, Any]) -> Set[str]:
41
+ refs: Set[str] = set()
42
+ for entity in model.get("entities", []):
43
+ entity_name = entity.get("name", "")
44
+ for field in entity.get("fields", []):
45
+ field_name = field.get("name", "")
46
+ if entity_name and field_name:
47
+ refs.add(f"{entity_name}.{field_name}")
48
+ return refs
49
+
50
+
51
+ def _classification(model: Dict[str, Any]) -> Dict[str, str]:
52
+ governance = model.get("governance", {})
53
+ classification = governance.get("classification", {})
54
+ if isinstance(classification, dict):
55
+ return {str(k): str(v) for k, v in classification.items()}
56
+ return {}
57
+
58
+
59
+ def _require_entity_tags(
60
+ model: Dict[str, Any],
61
+ severity: str,
62
+ policy_id: str,
63
+ params: Dict[str, Any],
64
+ ) -> List[Issue]:
65
+ required_tags = set(_normalize_list(params.get("tags")))
66
+ mode = str(params.get("mode", "any")).lower()
67
+
68
+ if not required_tags:
69
+ return [
70
+ _policy_issue(
71
+ "error",
72
+ f"POLICY_{policy_id}_MISCONFIGURED",
73
+ f"Policy '{policy_id}' must define at least one required tag.",
74
+ "/policies",
75
+ )
76
+ ]
77
+
78
+ issues: List[Issue] = []
79
+ for entity in model.get("entities", []):
80
+ entity_name = str(entity.get("name", ""))
81
+ entity_tags = set(_normalize_list(entity.get("tags", [])))
82
+
83
+ if mode == "all":
84
+ matches = required_tags.issubset(entity_tags)
85
+ else:
86
+ matches = bool(required_tags.intersection(entity_tags))
87
+
88
+ if not matches:
89
+ issues.append(
90
+ _policy_issue(
91
+ severity,
92
+ f"POLICY_{policy_id}",
93
+ (
94
+ f"Entity '{entity_name}' must include "
95
+ f"{'all' if mode == 'all' else 'at least one'} of tags {sorted(required_tags)}."
96
+ ),
97
+ f"/entities/{entity_name}",
98
+ )
99
+ )
100
+
101
+ return issues
102
+
103
+
104
+ def _require_field_descriptions(
105
+ model: Dict[str, Any],
106
+ severity: str,
107
+ policy_id: str,
108
+ params: Dict[str, Any],
109
+ ) -> List[Issue]:
110
+ exempt_primary_key = bool(params.get("exempt_primary_key", True))
111
+ issues: List[Issue] = []
112
+
113
+ for entity in model.get("entities", []):
114
+ entity_name = str(entity.get("name", ""))
115
+ for field in entity.get("fields", []):
116
+ field_name = str(field.get("name", ""))
117
+ if exempt_primary_key and field.get("primary_key") is True:
118
+ continue
119
+ description = field.get("description")
120
+ if not isinstance(description, str) or not description.strip():
121
+ issues.append(
122
+ _policy_issue(
123
+ severity,
124
+ f"POLICY_{policy_id}",
125
+ f"Field '{entity_name}.{field_name}' is missing a description.",
126
+ f"/entities/{entity_name}/fields/{field_name}",
127
+ )
128
+ )
129
+
130
+ return issues
131
+
132
+
133
+ def _classification_required_for_tags(
134
+ model: Dict[str, Any],
135
+ severity: str,
136
+ policy_id: str,
137
+ params: Dict[str, Any],
138
+ ) -> List[Issue]:
139
+ tracked_tags = set(_normalize_list(params.get("field_tags")))
140
+ allowed_classifications = set(_normalize_list(params.get("allowed_classifications")))
141
+ name_regex = params.get("field_name_regex")
142
+
143
+ compiled_pattern: Optional[re.Pattern[str]] = None
144
+ if isinstance(name_regex, str) and name_regex.strip():
145
+ try:
146
+ compiled_pattern = re.compile(name_regex)
147
+ except re.error:
148
+ return [
149
+ _policy_issue(
150
+ "error",
151
+ f"POLICY_{policy_id}_MISCONFIGURED",
152
+ f"Policy '{policy_id}' has invalid regex '{name_regex}'.",
153
+ "/policies",
154
+ )
155
+ ]
156
+
157
+ classification = _classification(model)
158
+ issues: List[Issue] = []
159
+
160
+ for entity in model.get("entities", []):
161
+ entity_name = str(entity.get("name", ""))
162
+ for field in entity.get("fields", []):
163
+ field_name = str(field.get("name", ""))
164
+ ref = f"{entity_name}.{field_name}"
165
+ field_tags = set(_normalize_list(field.get("tags")))
166
+
167
+ by_tag = bool(tracked_tags and tracked_tags.intersection(field_tags))
168
+ by_name = bool(compiled_pattern and compiled_pattern.search(field_name))
169
+ if not by_tag and not by_name:
170
+ continue
171
+
172
+ value = classification.get(ref)
173
+ if value is None:
174
+ issues.append(
175
+ _policy_issue(
176
+ severity,
177
+ f"POLICY_{policy_id}",
178
+ f"Field '{ref}' requires governance.classification.",
179
+ "/governance/classification",
180
+ )
181
+ )
182
+ continue
183
+
184
+ if allowed_classifications and value not in allowed_classifications:
185
+ issues.append(
186
+ _policy_issue(
187
+ severity,
188
+ f"POLICY_{policy_id}",
189
+ (
190
+ f"Field '{ref}' classification '{value}' is not allowed. "
191
+ f"Expected one of {sorted(allowed_classifications)}."
192
+ ),
193
+ "/governance/classification",
194
+ )
195
+ )
196
+
197
+ return issues
198
+
199
+
200
+ def _rule_target_required(
201
+ model: Dict[str, Any],
202
+ severity: str,
203
+ policy_id: str,
204
+ params: Dict[str, Any],
205
+ ) -> List[Issue]:
206
+ target_types = set(_normalize_list(params.get("field_types")))
207
+ refs = _field_refs(model)
208
+ rule_targets = {
209
+ str(rule.get("target", ""))
210
+ for rule in model.get("rules", [])
211
+ if isinstance(rule, dict)
212
+ }
213
+
214
+ issues: List[Issue] = []
215
+ for entity in model.get("entities", []):
216
+ entity_name = str(entity.get("name", ""))
217
+ for field in entity.get("fields", []):
218
+ field_name = str(field.get("name", ""))
219
+ ref = f"{entity_name}.{field_name}"
220
+ if ref not in refs:
221
+ continue
222
+
223
+ field_type = str(field.get("type", "")).lower()
224
+ if target_types and field_type not in target_types:
225
+ continue
226
+
227
+ if ref not in rule_targets:
228
+ issues.append(
229
+ _policy_issue(
230
+ severity,
231
+ f"POLICY_{policy_id}",
232
+ f"Field '{ref}' requires at least one rule target entry.",
233
+ "/rules",
234
+ )
235
+ )
236
+
237
+ return issues
238
+
239
+
240
+ def _naming_convention(
241
+ model: Dict[str, Any],
242
+ severity: str,
243
+ policy_id: str,
244
+ params: Dict[str, Any],
245
+ ) -> List[Issue]:
246
+ entity_pattern_str = params.get("entity_pattern")
247
+ field_pattern_str = params.get("field_pattern")
248
+ relationship_pattern_str = params.get("relationship_pattern")
249
+ index_pattern_str = params.get("index_pattern")
250
+
251
+ patterns: Dict[str, Optional[re.Pattern[str]]] = {}
252
+ issues: List[Issue] = []
253
+
254
+ for label, pat_str in [
255
+ ("entity_pattern", entity_pattern_str),
256
+ ("field_pattern", field_pattern_str),
257
+ ("relationship_pattern", relationship_pattern_str),
258
+ ("index_pattern", index_pattern_str),
259
+ ]:
260
+ if pat_str is None:
261
+ patterns[label] = None
262
+ continue
263
+ if not isinstance(pat_str, str) or not pat_str.strip():
264
+ patterns[label] = None
265
+ continue
266
+ try:
267
+ patterns[label] = re.compile(pat_str)
268
+ except re.error:
269
+ return [
270
+ _policy_issue(
271
+ "error",
272
+ f"POLICY_{policy_id}_MISCONFIGURED",
273
+ f"Policy '{policy_id}' has invalid regex for {label}: '{pat_str}'.",
274
+ "/policies",
275
+ )
276
+ ]
277
+
278
+ if not any(patterns.values()):
279
+ return [
280
+ _policy_issue(
281
+ "error",
282
+ f"POLICY_{policy_id}_MISCONFIGURED",
283
+ f"Policy '{policy_id}' must define at least one naming pattern (entity_pattern, field_pattern, relationship_pattern, index_pattern).",
284
+ "/policies",
285
+ )
286
+ ]
287
+
288
+ ep = patterns.get("entity_pattern")
289
+ fp = patterns.get("field_pattern")
290
+ rp = patterns.get("relationship_pattern")
291
+ ip = patterns.get("index_pattern")
292
+
293
+ for entity in model.get("entities", []):
294
+ entity_name = str(entity.get("name", ""))
295
+ if ep and not ep.fullmatch(entity_name):
296
+ issues.append(
297
+ _policy_issue(
298
+ severity,
299
+ f"POLICY_{policy_id}",
300
+ f"Entity name '{entity_name}' does not match pattern '{entity_pattern_str}'.",
301
+ f"/entities/{entity_name}",
302
+ )
303
+ )
304
+ if fp:
305
+ for field in entity.get("fields", []):
306
+ field_name = str(field.get("name", ""))
307
+ if not fp.fullmatch(field_name):
308
+ issues.append(
309
+ _policy_issue(
310
+ severity,
311
+ f"POLICY_{policy_id}",
312
+ f"Field name '{entity_name}.{field_name}' does not match pattern '{field_pattern_str}'.",
313
+ f"/entities/{entity_name}/fields/{field_name}",
314
+ )
315
+ )
316
+
317
+ if rp:
318
+ for rel in model.get("relationships", []):
319
+ rel_name = str(rel.get("name", ""))
320
+ if not rp.fullmatch(rel_name):
321
+ issues.append(
322
+ _policy_issue(
323
+ severity,
324
+ f"POLICY_{policy_id}",
325
+ f"Relationship name '{rel_name}' does not match pattern '{relationship_pattern_str}'.",
326
+ f"/relationships/{rel_name}",
327
+ )
328
+ )
329
+
330
+ if ip:
331
+ for idx in model.get("indexes", []):
332
+ idx_name = str(idx.get("name", ""))
333
+ if not ip.fullmatch(idx_name):
334
+ issues.append(
335
+ _policy_issue(
336
+ severity,
337
+ f"POLICY_{policy_id}",
338
+ f"Index name '{idx_name}' does not match pattern '{index_pattern_str}'.",
339
+ f"/indexes/{idx_name}",
340
+ )
341
+ )
342
+
343
+ return issues
344
+
345
+
346
+ def _require_indexes(
347
+ model: Dict[str, Any],
348
+ severity: str,
349
+ policy_id: str,
350
+ params: Dict[str, Any],
351
+ ) -> List[Issue]:
352
+ min_fields = int(params.get("min_fields", 5))
353
+ entity_types = set(_normalize_list(params.get("entity_types", ["table"])))
354
+
355
+ indexed_entities: Set[str] = set()
356
+ for idx in model.get("indexes", []):
357
+ ent = str(idx.get("entity", ""))
358
+ if ent:
359
+ indexed_entities.add(ent)
360
+
361
+ issues: List[Issue] = []
362
+ for entity in model.get("entities", []):
363
+ entity_name = str(entity.get("name", ""))
364
+ entity_type = str(entity.get("type", "table")).lower()
365
+ if entity_types and entity_type not in entity_types:
366
+ continue
367
+ field_count = len(entity.get("fields", []))
368
+ if field_count >= min_fields and entity_name not in indexed_entities:
369
+ issues.append(
370
+ _policy_issue(
371
+ severity,
372
+ f"POLICY_{policy_id}",
373
+ f"Entity '{entity_name}' has {field_count} fields (>= {min_fields}) but no indexes defined.",
374
+ f"/entities/{entity_name}",
375
+ )
376
+ )
377
+
378
+ return issues
379
+
380
+
381
+ def _require_owner(
382
+ model: Dict[str, Any],
383
+ severity: str,
384
+ policy_id: str,
385
+ params: Dict[str, Any],
386
+ ) -> List[Issue]:
387
+ entity_types = set(_normalize_list(params.get("entity_types", [])))
388
+ require_email = bool(params.get("require_email", False))
389
+ email_pattern = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
390
+
391
+ issues: List[Issue] = []
392
+ for entity in model.get("entities", []):
393
+ entity_name = str(entity.get("name", ""))
394
+ entity_type = str(entity.get("type", "table")).lower()
395
+ if entity_types and entity_type not in entity_types:
396
+ continue
397
+
398
+ owner = entity.get("owner")
399
+ if not owner or (isinstance(owner, str) and not owner.strip()):
400
+ issues.append(
401
+ _policy_issue(
402
+ severity,
403
+ f"POLICY_{policy_id}",
404
+ f"Entity '{entity_name}' is missing an owner.",
405
+ f"/entities/{entity_name}",
406
+ )
407
+ )
408
+ elif require_email and isinstance(owner, str) and not email_pattern.match(owner.strip()):
409
+ issues.append(
410
+ _policy_issue(
411
+ severity,
412
+ f"POLICY_{policy_id}",
413
+ f"Entity '{entity_name}' owner '{owner}' is not a valid email address.",
414
+ f"/entities/{entity_name}",
415
+ )
416
+ )
417
+
418
+ return issues
419
+
420
+
421
+ def _require_sla(
422
+ model: Dict[str, Any],
423
+ severity: str,
424
+ policy_id: str,
425
+ params: Dict[str, Any],
426
+ ) -> List[Issue]:
427
+ entity_types = set(_normalize_list(params.get("entity_types", ["table"])))
428
+ required_tags = set(_normalize_list(params.get("required_tags", [])))
429
+ require_freshness = bool(params.get("require_freshness", True))
430
+ require_quality_score = bool(params.get("require_quality_score", False))
431
+
432
+ issues: List[Issue] = []
433
+ for entity in model.get("entities", []):
434
+ entity_name = str(entity.get("name", ""))
435
+ entity_type = str(entity.get("type", "table")).lower()
436
+ entity_tags = set(_normalize_list(entity.get("tags", [])))
437
+
438
+ if entity_types and entity_type not in entity_types:
439
+ continue
440
+ if required_tags and not required_tags.intersection(entity_tags):
441
+ continue
442
+
443
+ sla = entity.get("sla")
444
+ if not isinstance(sla, dict) or not sla:
445
+ issues.append(
446
+ _policy_issue(
447
+ severity,
448
+ f"POLICY_{policy_id}",
449
+ f"Entity '{entity_name}' is missing an SLA definition.",
450
+ f"/entities/{entity_name}/sla",
451
+ )
452
+ )
453
+ continue
454
+
455
+ if require_freshness and not sla.get("freshness"):
456
+ issues.append(
457
+ _policy_issue(
458
+ severity,
459
+ f"POLICY_{policy_id}",
460
+ f"Entity '{entity_name}' SLA is missing 'freshness'.",
461
+ f"/entities/{entity_name}/sla",
462
+ )
463
+ )
464
+
465
+ if require_quality_score and sla.get("quality_score") is None:
466
+ issues.append(
467
+ _policy_issue(
468
+ severity,
469
+ f"POLICY_{policy_id}",
470
+ f"Entity '{entity_name}' SLA is missing 'quality_score'.",
471
+ f"/entities/{entity_name}/sla",
472
+ )
473
+ )
474
+
475
+ return issues
476
+
477
+
478
+ def _deprecation_check(
479
+ model: Dict[str, Any],
480
+ severity: str,
481
+ policy_id: str,
482
+ params: Dict[str, Any],
483
+ ) -> List[Issue]:
484
+ require_message = bool(params.get("require_message", True))
485
+ check_references = bool(params.get("check_references", True))
486
+
487
+ deprecated_fields: Set[str] = set()
488
+ issues: List[Issue] = []
489
+
490
+ for entity in model.get("entities", []):
491
+ entity_name = str(entity.get("name", ""))
492
+ for field in entity.get("fields", []):
493
+ field_name = str(field.get("name", ""))
494
+ if field.get("deprecated") is True:
495
+ ref = f"{entity_name}.{field_name}"
496
+ deprecated_fields.add(ref)
497
+ if require_message:
498
+ msg = field.get("deprecated_message")
499
+ if not isinstance(msg, str) or not msg.strip():
500
+ issues.append(
501
+ _policy_issue(
502
+ severity,
503
+ f"POLICY_{policy_id}",
504
+ f"Deprecated field '{ref}' is missing a deprecated_message with migration guidance.",
505
+ f"/entities/{entity_name}/fields/{field_name}",
506
+ )
507
+ )
508
+
509
+ if check_references and deprecated_fields:
510
+ for rel in model.get("relationships", []):
511
+ rel_name = str(rel.get("name", ""))
512
+ from_ref = str(rel.get("from", ""))
513
+ to_ref = str(rel.get("to", ""))
514
+ if from_ref in deprecated_fields:
515
+ issues.append(
516
+ _policy_issue(
517
+ severity,
518
+ f"POLICY_{policy_id}",
519
+ f"Relationship '{rel_name}' references deprecated field '{from_ref}'.",
520
+ f"/relationships/{rel_name}",
521
+ )
522
+ )
523
+ if to_ref in deprecated_fields:
524
+ issues.append(
525
+ _policy_issue(
526
+ severity,
527
+ f"POLICY_{policy_id}",
528
+ f"Relationship '{rel_name}' references deprecated field '{to_ref}'.",
529
+ f"/relationships/{rel_name}",
530
+ )
531
+ )
532
+
533
+ for idx in model.get("indexes", []):
534
+ idx_name = str(idx.get("name", ""))
535
+ idx_entity = str(idx.get("entity", ""))
536
+ for idx_field in _normalize_list(idx.get("fields", [])):
537
+ ref = f"{idx_entity}.{idx_field}"
538
+ if ref in deprecated_fields:
539
+ issues.append(
540
+ _policy_issue(
541
+ severity,
542
+ f"POLICY_{policy_id}",
543
+ f"Index '{idx_name}' references deprecated field '{ref}'.",
544
+ f"/indexes/{idx_name}",
545
+ )
546
+ )
547
+
548
+ return issues
549
+
550
+
551
+ def _custom_expression(
552
+ model: Dict[str, Any],
553
+ severity: str,
554
+ policy_id: str,
555
+ params: Dict[str, Any],
556
+ ) -> List[Issue]:
557
+ scope = str(params.get("scope", "entity")).lower()
558
+ expression = str(params.get("expression", "")).strip()
559
+ message_template = str(params.get("message", "")).strip()
560
+
561
+ if not expression:
562
+ return [
563
+ _policy_issue(
564
+ "error",
565
+ f"POLICY_{policy_id}_MISCONFIGURED",
566
+ f"Policy '{policy_id}' must define an 'expression'.",
567
+ "/policies",
568
+ )
569
+ ]
570
+
571
+ issues: List[Issue] = []
572
+
573
+ if scope == "entity":
574
+ for entity in model.get("entities", []):
575
+ entity_name = str(entity.get("name", ""))
576
+ ctx = {
577
+ "name": entity_name,
578
+ "type": str(entity.get("type", "table")),
579
+ "tags": _normalize_list(entity.get("tags", [])),
580
+ "field_count": len(entity.get("fields", [])),
581
+ "has_owner": bool(entity.get("owner")),
582
+ "has_sla": bool(entity.get("sla")),
583
+ "has_description": bool(entity.get("description")),
584
+ "schema": str(entity.get("schema", "")),
585
+ "subject_area": str(entity.get("subject_area", "")),
586
+ }
587
+ try:
588
+ result = eval(expression, {"__builtins__": {}}, ctx) # noqa: S307
589
+ except Exception:
590
+ return [
591
+ _policy_issue(
592
+ "error",
593
+ f"POLICY_{policy_id}_MISCONFIGURED",
594
+ f"Policy '{policy_id}' expression failed for entity '{entity_name}': '{expression}'.",
595
+ "/policies",
596
+ )
597
+ ]
598
+ if not result:
599
+ msg = message_template.replace("{name}", entity_name) if message_template else (
600
+ f"Entity '{entity_name}' failed custom policy check: {expression}"
601
+ )
602
+ issues.append(
603
+ _policy_issue(severity, f"POLICY_{policy_id}", msg, f"/entities/{entity_name}")
604
+ )
605
+
606
+ elif scope == "field":
607
+ for entity in model.get("entities", []):
608
+ entity_name = str(entity.get("name", ""))
609
+ for field in entity.get("fields", []):
610
+ field_name = str(field.get("name", ""))
611
+ ref = f"{entity_name}.{field_name}"
612
+ ctx = {
613
+ "name": field_name,
614
+ "entity": entity_name,
615
+ "type": str(field.get("type", "")),
616
+ "nullable": bool(field.get("nullable", True)),
617
+ "primary_key": bool(field.get("primary_key", False)),
618
+ "unique": bool(field.get("unique", False)),
619
+ "has_description": bool(field.get("description")),
620
+ "deprecated": bool(field.get("deprecated", False)),
621
+ "sensitivity": str(field.get("sensitivity", "")),
622
+ "has_default": field.get("default") is not None,
623
+ "has_check": bool(field.get("check")),
624
+ "computed": bool(field.get("computed", False)),
625
+ "foreign_key": bool(field.get("foreign_key", False)),
626
+ "tags": _normalize_list(field.get("tags", [])),
627
+ }
628
+ try:
629
+ result = eval(expression, {"__builtins__": {}}, ctx) # noqa: S307
630
+ except Exception:
631
+ return [
632
+ _policy_issue(
633
+ "error",
634
+ f"POLICY_{policy_id}_MISCONFIGURED",
635
+ f"Policy '{policy_id}' expression failed for field '{ref}': '{expression}'.",
636
+ "/policies",
637
+ )
638
+ ]
639
+ if not result:
640
+ msg = message_template.replace("{name}", ref) if message_template else (
641
+ f"Field '{ref}' failed custom policy check: {expression}"
642
+ )
643
+ issues.append(
644
+ _policy_issue(
645
+ severity, f"POLICY_{policy_id}", msg,
646
+ f"/entities/{entity_name}/fields/{field_name}",
647
+ )
648
+ )
649
+
650
+ elif scope == "model":
651
+ model_meta = model.get("model", {})
652
+ ctx = {
653
+ "name": str(model_meta.get("name", "")),
654
+ "version": str(model_meta.get("version", "")),
655
+ "domain": str(model_meta.get("domain", "")),
656
+ "state": str(model_meta.get("state", "")),
657
+ "layer": str(model_meta.get("layer", "")),
658
+ "entity_count": len(model.get("entities", [])),
659
+ "relationship_count": len(model.get("relationships", [])),
660
+ "index_count": len(model.get("indexes", [])),
661
+ "metric_count": len(model.get("metrics", [])),
662
+ "has_governance": bool(model.get("governance")),
663
+ "has_glossary": bool(model.get("glossary")),
664
+ "has_rules": bool(model.get("rules")),
665
+ "has_metrics": bool(model.get("metrics")),
666
+ }
667
+ try:
668
+ result = eval(expression, {"__builtins__": {}}, ctx) # noqa: S307
669
+ except Exception:
670
+ return [
671
+ _policy_issue(
672
+ "error",
673
+ f"POLICY_{policy_id}_MISCONFIGURED",
674
+ f"Policy '{policy_id}' expression failed: '{expression}'.",
675
+ "/policies",
676
+ )
677
+ ]
678
+ if not result:
679
+ msg = message_template.replace("{name}", ctx["name"]) if message_template else (
680
+ f"Model failed custom policy check: {expression}"
681
+ )
682
+ issues.append(_policy_issue(severity, f"POLICY_{policy_id}", msg, "/model"))
683
+
684
+ else:
685
+ return [
686
+ _policy_issue(
687
+ "error",
688
+ f"POLICY_{policy_id}_MISCONFIGURED",
689
+ f"Policy '{policy_id}' has invalid scope '{scope}'. Expected 'entity', 'field', or 'model'.",
690
+ "/policies",
691
+ )
692
+ ]
693
+
694
+ return issues
695
+
696
+
697
+ def _modeling_convention(
698
+ model: Dict[str, Any],
699
+ severity: str,
700
+ policy_id: str,
701
+ params: Dict[str, Any],
702
+ ) -> List[Issue]:
703
+ normalized = normalize_model(model)
704
+ issues: List[Issue] = []
705
+
706
+ allowed_model_kinds = set(_normalize_list(params.get("allowed_model_kinds")))
707
+ allowed_layers = set(_normalize_list(params.get("allowed_layers")))
708
+ allowed_entity_types = set(_normalize_list(params.get("allowed_entity_types")))
709
+ require_candidate_keys_for_types = set(_normalize_list(params.get("require_candidate_keys_for_types")))
710
+ require_dimension_refs_for_types = set(_normalize_list(params.get("require_dimension_refs_for_types")))
711
+ require_data_vault_metadata = bool(params.get("require_data_vault_metadata", False))
712
+
713
+ model_meta = normalized.get("model", {})
714
+ model_kind = str(model_meta.get("kind", "physical"))
715
+ model_layer = str(model_meta.get("layer", ""))
716
+
717
+ if allowed_model_kinds and model_kind not in allowed_model_kinds:
718
+ issues.append(
719
+ _policy_issue(
720
+ severity,
721
+ f"POLICY_{policy_id}",
722
+ f"Model kind '{model_kind}' is not allowed. Expected one of {sorted(allowed_model_kinds)}.",
723
+ "/model/kind",
724
+ )
725
+ )
726
+
727
+ if allowed_layers and model_layer not in allowed_layers:
728
+ issues.append(
729
+ _policy_issue(
730
+ severity,
731
+ f"POLICY_{policy_id}",
732
+ f"Model layer '{model_layer or '(none)'}' is not allowed. Expected one of {sorted(allowed_layers)}.",
733
+ "/model/layer",
734
+ )
735
+ )
736
+
737
+ entities = normalized.get("entities", [])
738
+ entity_map = {
739
+ str(entity.get("name", "")): entity
740
+ for entity in entities
741
+ if isinstance(entity, dict) and entity.get("name")
742
+ }
743
+
744
+ def has_field(entity: Dict[str, Any], field_name: str) -> bool:
745
+ return any(str(field.get("name", "")) == field_name for field in entity.get("fields", []))
746
+
747
+ for entity in entities:
748
+ entity_name = str(entity.get("name", ""))
749
+ entity_type = str(entity.get("type", "table"))
750
+
751
+ if allowed_entity_types and entity_type not in allowed_entity_types:
752
+ issues.append(
753
+ _policy_issue(
754
+ severity,
755
+ f"POLICY_{policy_id}",
756
+ f"Entity '{entity_name}' type '{entity_type}' is not allowed. Expected one of {sorted(allowed_entity_types)}.",
757
+ f"/entities/{entity_name}/type",
758
+ )
759
+ )
760
+
761
+ if entity_type in require_candidate_keys_for_types:
762
+ has_candidate_keys = bool(entity.get("candidate_keys"))
763
+ has_primary_key = any(field.get("primary_key") is True for field in entity.get("fields", []))
764
+ if not has_candidate_keys and not has_primary_key:
765
+ issues.append(
766
+ _policy_issue(
767
+ severity,
768
+ f"POLICY_{policy_id}",
769
+ f"Entity '{entity_name}' must declare candidate_keys or a primary key.",
770
+ f"/entities/{entity_name}/candidate_keys",
771
+ )
772
+ )
773
+
774
+ if entity_type in require_dimension_refs_for_types:
775
+ dimension_refs = entity.get("dimension_refs")
776
+ if not isinstance(dimension_refs, list) or not dimension_refs:
777
+ issues.append(
778
+ _policy_issue(
779
+ severity,
780
+ f"POLICY_{policy_id}",
781
+ f"Entity '{entity_name}' must declare dimension_refs.",
782
+ f"/entities/{entity_name}/dimension_refs",
783
+ )
784
+ )
785
+
786
+ if require_data_vault_metadata:
787
+ if entity_type == "hub":
788
+ business_keys = entity.get("business_keys")
789
+ hash_key = str(entity.get("hash_key", "")).strip()
790
+ if not isinstance(business_keys, list) or not business_keys:
791
+ issues.append(
792
+ _policy_issue(
793
+ severity,
794
+ f"POLICY_{policy_id}",
795
+ f"Hub '{entity_name}' must declare business_keys.",
796
+ f"/entities/{entity_name}/business_keys",
797
+ )
798
+ )
799
+ if not hash_key or not has_field(entity, hash_key):
800
+ issues.append(
801
+ _policy_issue(
802
+ severity,
803
+ f"POLICY_{policy_id}",
804
+ f"Hub '{entity_name}' must declare a valid hash_key field.",
805
+ f"/entities/{entity_name}/hash_key",
806
+ )
807
+ )
808
+ elif entity_type == "link":
809
+ link_refs = entity.get("link_refs")
810
+ if not isinstance(link_refs, list) or len(link_refs) < 2:
811
+ issues.append(
812
+ _policy_issue(
813
+ severity,
814
+ f"POLICY_{policy_id}",
815
+ f"Link '{entity_name}' must reference at least two hubs in link_refs.",
816
+ f"/entities/{entity_name}/link_refs",
817
+ )
818
+ )
819
+ else:
820
+ for ref_name in link_refs:
821
+ referenced = entity_map.get(str(ref_name))
822
+ if referenced is None or str(referenced.get("type", "")) != "hub":
823
+ issues.append(
824
+ _policy_issue(
825
+ severity,
826
+ f"POLICY_{policy_id}",
827
+ f"Link '{entity_name}' link_refs entry '{ref_name}' must reference a hub.",
828
+ f"/entities/{entity_name}/link_refs",
829
+ )
830
+ )
831
+ hash_key = str(entity.get("hash_key", "")).strip()
832
+ if not hash_key or not has_field(entity, hash_key):
833
+ issues.append(
834
+ _policy_issue(
835
+ severity,
836
+ f"POLICY_{policy_id}",
837
+ f"Link '{entity_name}' must declare a valid hash_key field.",
838
+ f"/entities/{entity_name}/hash_key",
839
+ )
840
+ )
841
+ elif entity_type == "satellite":
842
+ parent_entity = str(entity.get("parent_entity", "")).strip()
843
+ hash_diff_fields = entity.get("hash_diff_fields")
844
+ if not parent_entity:
845
+ issues.append(
846
+ _policy_issue(
847
+ severity,
848
+ f"POLICY_{policy_id}",
849
+ f"Satellite '{entity_name}' must declare parent_entity.",
850
+ f"/entities/{entity_name}/parent_entity",
851
+ )
852
+ )
853
+ else:
854
+ parent = entity_map.get(parent_entity)
855
+ if parent is None or str(parent.get("type", "")) not in {"hub", "link"}:
856
+ issues.append(
857
+ _policy_issue(
858
+ severity,
859
+ f"POLICY_{policy_id}",
860
+ f"Satellite '{entity_name}' parent_entity '{parent_entity}' must reference a hub or link.",
861
+ f"/entities/{entity_name}/parent_entity",
862
+ )
863
+ )
864
+ if not isinstance(hash_diff_fields, list) or not hash_diff_fields:
865
+ issues.append(
866
+ _policy_issue(
867
+ severity,
868
+ f"POLICY_{policy_id}",
869
+ f"Satellite '{entity_name}' must declare hash_diff_fields.",
870
+ f"/entities/{entity_name}/hash_diff_fields",
871
+ )
872
+ )
873
+
874
+ if entity_type in {"hub", "link", "satellite"}:
875
+ for prop_name in ("load_timestamp_field", "record_source_field"):
876
+ field_name = str(entity.get(prop_name, "")).strip()
877
+ if not field_name or not has_field(entity, field_name):
878
+ issues.append(
879
+ _policy_issue(
880
+ severity,
881
+ f"POLICY_{policy_id}",
882
+ f"{entity_type.title()} '{entity_name}' must declare a valid {prop_name}.",
883
+ f"/entities/{entity_name}/{prop_name}",
884
+ )
885
+ )
886
+
887
+ return issues
888
+
889
+
890
+ _POLICY_HANDLERS = {
891
+ "require_entity_tags": _require_entity_tags,
892
+ "require_field_descriptions": _require_field_descriptions,
893
+ "classification_required_for_tags": _classification_required_for_tags,
894
+ "rule_target_required": _rule_target_required,
895
+ "naming_convention": _naming_convention,
896
+ "require_indexes": _require_indexes,
897
+ "require_owner": _require_owner,
898
+ "require_sla": _require_sla,
899
+ "deprecation_check": _deprecation_check,
900
+ "custom_expression": _custom_expression,
901
+ "modeling_convention": _modeling_convention,
902
+ }
903
+
904
+
905
+ def merge_policy_packs(*packs: Dict[str, Any]) -> Dict[str, Any]:
906
+ """Merge multiple policy packs with later packs overriding earlier ones.
907
+
908
+ Policies are merged by ``id``: if two packs define a policy with the same
909
+ ``id``, the later definition wins (full replacement). Policies with unique
910
+ ids are appended. The ``pack`` metadata comes from the **last** pack.
911
+ """
912
+ if not packs:
913
+ return {"pack": {"name": "merged", "version": "1.0.0"}, "policies": []}
914
+
915
+ merged_pack_meta: Dict[str, Any] = {}
916
+ policy_map: Dict[str, Dict[str, Any]] = {} # keyed by policy id
917
+ order: List[str] = []
918
+
919
+ for pack in packs:
920
+ if not isinstance(pack, dict):
921
+ continue
922
+ pack_meta = pack.get("pack")
923
+ if isinstance(pack_meta, dict):
924
+ merged_pack_meta = pack_meta
925
+
926
+ for policy in pack.get("policies", []):
927
+ if not isinstance(policy, dict):
928
+ continue
929
+ pid = str(policy.get("id", ""))
930
+ if not pid:
931
+ continue
932
+ if pid not in policy_map:
933
+ order.append(pid)
934
+ policy_map[pid] = policy
935
+
936
+ return {
937
+ "pack": merged_pack_meta or {"name": "merged", "version": "1.0.0"},
938
+ "policies": [policy_map[pid] for pid in order if pid in policy_map],
939
+ }
940
+
941
+
942
+ def load_policy_pack_with_inheritance(path: str) -> Dict[str, Any]:
943
+ """Load a policy pack, resolving ``pack.extends`` references.
944
+
945
+ If the pack defines ``pack.extends`` (a string path or list of paths),
946
+ the referenced base packs are loaded first and merged in order, with the
947
+ current pack applied last (highest priority).
948
+ """
949
+ pack = load_policy_pack(path)
950
+ extends = pack.get("pack", {}).get("extends")
951
+ if not extends:
952
+ return pack
953
+
954
+ base_paths = _normalize_list(extends)
955
+ base_dir = Path(path).parent
956
+
957
+ bases: List[Dict[str, Any]] = []
958
+ for bp in base_paths:
959
+ resolved = (base_dir / bp).resolve()
960
+ if resolved.exists():
961
+ bases.append(load_policy_pack_with_inheritance(str(resolved)))
962
+
963
+ bases.append(pack)
964
+ return merge_policy_packs(*bases)
965
+
966
+
967
+ def policy_issues(model: Dict[str, Any], policy_pack: Dict[str, Any]) -> List[Issue]:
968
+ policies = policy_pack.get("policies", [])
969
+ if not isinstance(policies, list):
970
+ return [
971
+ _policy_issue(
972
+ "error",
973
+ "INVALID_POLICY_PACK",
974
+ "Policy pack requires a list at root key 'policies'.",
975
+ "/policies",
976
+ )
977
+ ]
978
+
979
+ issues: List[Issue] = []
980
+ for index, policy in enumerate(policies):
981
+ if not isinstance(policy, dict):
982
+ issues.append(
983
+ _policy_issue(
984
+ "error",
985
+ "INVALID_POLICY",
986
+ f"Policy at index {index} must be an object.",
987
+ f"/policies/{index}",
988
+ )
989
+ )
990
+ continue
991
+
992
+ enabled = bool(policy.get("enabled", True))
993
+ if not enabled:
994
+ continue
995
+
996
+ policy_id = str(policy.get("id") or f"POLICY_{index + 1}")
997
+ policy_type = str(policy.get("type", "")).strip()
998
+ severity = str(policy.get("severity", "error")).lower()
999
+ params = policy.get("params", {})
1000
+
1001
+ if severity not in {"info", "warn", "error"}:
1002
+ issues.append(
1003
+ _policy_issue(
1004
+ "error",
1005
+ f"POLICY_{policy_id}_MISCONFIGURED",
1006
+ f"Policy '{policy_id}' has invalid severity '{severity}'.",
1007
+ f"/policies/{index}",
1008
+ )
1009
+ )
1010
+ continue
1011
+
1012
+ if not isinstance(params, dict):
1013
+ issues.append(
1014
+ _policy_issue(
1015
+ "error",
1016
+ f"POLICY_{policy_id}_MISCONFIGURED",
1017
+ f"Policy '{policy_id}' params must be an object.",
1018
+ f"/policies/{index}/params",
1019
+ )
1020
+ )
1021
+ continue
1022
+
1023
+ handler = _POLICY_HANDLERS.get(policy_type)
1024
+ if handler is None:
1025
+ issues.append(
1026
+ _policy_issue(
1027
+ "warn",
1028
+ f"POLICY_{policy_id}_UNKNOWN_TYPE",
1029
+ f"Unknown policy type '{policy_type}' skipped.",
1030
+ f"/policies/{index}/type",
1031
+ )
1032
+ )
1033
+ continue
1034
+
1035
+ issues.extend(handler(model=model, severity=severity, policy_id=policy_id, params=params))
1036
+
1037
+ return issues