datalex-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalex_cli/__init__.py +1 -0
- datalex_cli/datalex_cli.py +658 -0
- datalex_cli/main.py +2925 -0
- datalex_cli-0.1.1.dist-info/METADATA +228 -0
- datalex_cli-0.1.1.dist-info/RECORD +64 -0
- datalex_cli-0.1.1.dist-info/WHEEL +5 -0
- datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
- datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
- datalex_core/__init__.py +94 -0
- datalex_core/_schemas/datalex/common.schema.json +127 -0
- datalex_core/_schemas/datalex/domain.schema.json +24 -0
- datalex_core/_schemas/datalex/entity.schema.json +158 -0
- datalex_core/_schemas/datalex/model.schema.json +141 -0
- datalex_core/_schemas/datalex/policy.schema.json +70 -0
- datalex_core/_schemas/datalex/project.schema.json +82 -0
- datalex_core/_schemas/datalex/snippet.schema.json +24 -0
- datalex_core/_schemas/datalex/source.schema.json +104 -0
- datalex_core/_schemas/datalex/term.schema.json +30 -0
- datalex_core/canonical.py +166 -0
- datalex_core/completion.py +204 -0
- datalex_core/connectors/__init__.py +39 -0
- datalex_core/connectors/base.py +417 -0
- datalex_core/connectors/bigquery.py +229 -0
- datalex_core/connectors/databricks.py +262 -0
- datalex_core/connectors/mysql.py +266 -0
- datalex_core/connectors/postgres.py +309 -0
- datalex_core/connectors/redshift.py +298 -0
- datalex_core/connectors/snowflake.py +336 -0
- datalex_core/connectors/sqlserver.py +425 -0
- datalex_core/datalex/__init__.py +26 -0
- datalex_core/datalex/diff.py +188 -0
- datalex_core/datalex/errors.py +85 -0
- datalex_core/datalex/loader.py +512 -0
- datalex_core/datalex/migrate_layout.py +382 -0
- datalex_core/datalex/parse_cache.py +102 -0
- datalex_core/datalex/project.py +214 -0
- datalex_core/datalex/types.py +224 -0
- datalex_core/dbt/__init__.py +18 -0
- datalex_core/dbt/emit.py +344 -0
- datalex_core/dbt/manifest.py +329 -0
- datalex_core/dbt/profiles.py +185 -0
- datalex_core/dbt/sync.py +279 -0
- datalex_core/dbt/warehouse.py +215 -0
- datalex_core/dialects/__init__.py +15 -0
- datalex_core/dialects/_common.py +48 -0
- datalex_core/dialects/base.py +47 -0
- datalex_core/dialects/postgres.py +164 -0
- datalex_core/dialects/registry.py +36 -0
- datalex_core/dialects/snowflake.py +129 -0
- datalex_core/diffing.py +358 -0
- datalex_core/docs_generator.py +797 -0
- datalex_core/doctor.py +181 -0
- datalex_core/generators.py +478 -0
- datalex_core/importers.py +1176 -0
- datalex_core/issues.py +23 -0
- datalex_core/loader.py +21 -0
- datalex_core/migrate.py +316 -0
- datalex_core/modeling.py +679 -0
- datalex_core/packages.py +430 -0
- datalex_core/policy.py +1037 -0
- datalex_core/resolver.py +456 -0
- datalex_core/schema.py +54 -0
- datalex_core/semantic.py +1561 -0
datalex_core/policy.py
ADDED
|
@@ -0,0 +1,1037 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, Iterable, List, Optional, Set
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from datalex_core.issues import Issue
|
|
8
|
+
from datalex_core.modeling import normalize_model
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_policy_pack(path: str) -> Dict[str, Any]:
|
|
12
|
+
policy_path = Path(path)
|
|
13
|
+
if not policy_path.exists():
|
|
14
|
+
raise FileNotFoundError(f"Policy pack not found: {path}")
|
|
15
|
+
|
|
16
|
+
with policy_path.open("r", encoding="utf-8") as handle:
|
|
17
|
+
loaded = yaml.safe_load(handle)
|
|
18
|
+
|
|
19
|
+
if loaded is None:
|
|
20
|
+
return {}
|
|
21
|
+
|
|
22
|
+
if not isinstance(loaded, dict):
|
|
23
|
+
raise ValueError("Policy pack must parse to a YAML object at root.")
|
|
24
|
+
|
|
25
|
+
return loaded
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _policy_issue(severity: str, code: str, message: str, path: str = "/") -> Issue:
|
|
29
|
+
return Issue(severity=severity, code=code, message=message, path=path)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _normalize_list(value: Any) -> List[str]:
|
|
33
|
+
if isinstance(value, list):
|
|
34
|
+
return [str(item) for item in value if str(item).strip()]
|
|
35
|
+
if isinstance(value, str) and value.strip():
|
|
36
|
+
return [value]
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _field_refs(model: Dict[str, Any]) -> Set[str]:
|
|
41
|
+
refs: Set[str] = set()
|
|
42
|
+
for entity in model.get("entities", []):
|
|
43
|
+
entity_name = entity.get("name", "")
|
|
44
|
+
for field in entity.get("fields", []):
|
|
45
|
+
field_name = field.get("name", "")
|
|
46
|
+
if entity_name and field_name:
|
|
47
|
+
refs.add(f"{entity_name}.{field_name}")
|
|
48
|
+
return refs
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _classification(model: Dict[str, Any]) -> Dict[str, str]:
|
|
52
|
+
governance = model.get("governance", {})
|
|
53
|
+
classification = governance.get("classification", {})
|
|
54
|
+
if isinstance(classification, dict):
|
|
55
|
+
return {str(k): str(v) for k, v in classification.items()}
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _require_entity_tags(
|
|
60
|
+
model: Dict[str, Any],
|
|
61
|
+
severity: str,
|
|
62
|
+
policy_id: str,
|
|
63
|
+
params: Dict[str, Any],
|
|
64
|
+
) -> List[Issue]:
|
|
65
|
+
required_tags = set(_normalize_list(params.get("tags")))
|
|
66
|
+
mode = str(params.get("mode", "any")).lower()
|
|
67
|
+
|
|
68
|
+
if not required_tags:
|
|
69
|
+
return [
|
|
70
|
+
_policy_issue(
|
|
71
|
+
"error",
|
|
72
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
73
|
+
f"Policy '{policy_id}' must define at least one required tag.",
|
|
74
|
+
"/policies",
|
|
75
|
+
)
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
issues: List[Issue] = []
|
|
79
|
+
for entity in model.get("entities", []):
|
|
80
|
+
entity_name = str(entity.get("name", ""))
|
|
81
|
+
entity_tags = set(_normalize_list(entity.get("tags", [])))
|
|
82
|
+
|
|
83
|
+
if mode == "all":
|
|
84
|
+
matches = required_tags.issubset(entity_tags)
|
|
85
|
+
else:
|
|
86
|
+
matches = bool(required_tags.intersection(entity_tags))
|
|
87
|
+
|
|
88
|
+
if not matches:
|
|
89
|
+
issues.append(
|
|
90
|
+
_policy_issue(
|
|
91
|
+
severity,
|
|
92
|
+
f"POLICY_{policy_id}",
|
|
93
|
+
(
|
|
94
|
+
f"Entity '{entity_name}' must include "
|
|
95
|
+
f"{'all' if mode == 'all' else 'at least one'} of tags {sorted(required_tags)}."
|
|
96
|
+
),
|
|
97
|
+
f"/entities/{entity_name}",
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return issues
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _require_field_descriptions(
|
|
105
|
+
model: Dict[str, Any],
|
|
106
|
+
severity: str,
|
|
107
|
+
policy_id: str,
|
|
108
|
+
params: Dict[str, Any],
|
|
109
|
+
) -> List[Issue]:
|
|
110
|
+
exempt_primary_key = bool(params.get("exempt_primary_key", True))
|
|
111
|
+
issues: List[Issue] = []
|
|
112
|
+
|
|
113
|
+
for entity in model.get("entities", []):
|
|
114
|
+
entity_name = str(entity.get("name", ""))
|
|
115
|
+
for field in entity.get("fields", []):
|
|
116
|
+
field_name = str(field.get("name", ""))
|
|
117
|
+
if exempt_primary_key and field.get("primary_key") is True:
|
|
118
|
+
continue
|
|
119
|
+
description = field.get("description")
|
|
120
|
+
if not isinstance(description, str) or not description.strip():
|
|
121
|
+
issues.append(
|
|
122
|
+
_policy_issue(
|
|
123
|
+
severity,
|
|
124
|
+
f"POLICY_{policy_id}",
|
|
125
|
+
f"Field '{entity_name}.{field_name}' is missing a description.",
|
|
126
|
+
f"/entities/{entity_name}/fields/{field_name}",
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return issues
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _classification_required_for_tags(
|
|
134
|
+
model: Dict[str, Any],
|
|
135
|
+
severity: str,
|
|
136
|
+
policy_id: str,
|
|
137
|
+
params: Dict[str, Any],
|
|
138
|
+
) -> List[Issue]:
|
|
139
|
+
tracked_tags = set(_normalize_list(params.get("field_tags")))
|
|
140
|
+
allowed_classifications = set(_normalize_list(params.get("allowed_classifications")))
|
|
141
|
+
name_regex = params.get("field_name_regex")
|
|
142
|
+
|
|
143
|
+
compiled_pattern: Optional[re.Pattern[str]] = None
|
|
144
|
+
if isinstance(name_regex, str) and name_regex.strip():
|
|
145
|
+
try:
|
|
146
|
+
compiled_pattern = re.compile(name_regex)
|
|
147
|
+
except re.error:
|
|
148
|
+
return [
|
|
149
|
+
_policy_issue(
|
|
150
|
+
"error",
|
|
151
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
152
|
+
f"Policy '{policy_id}' has invalid regex '{name_regex}'.",
|
|
153
|
+
"/policies",
|
|
154
|
+
)
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
classification = _classification(model)
|
|
158
|
+
issues: List[Issue] = []
|
|
159
|
+
|
|
160
|
+
for entity in model.get("entities", []):
|
|
161
|
+
entity_name = str(entity.get("name", ""))
|
|
162
|
+
for field in entity.get("fields", []):
|
|
163
|
+
field_name = str(field.get("name", ""))
|
|
164
|
+
ref = f"{entity_name}.{field_name}"
|
|
165
|
+
field_tags = set(_normalize_list(field.get("tags")))
|
|
166
|
+
|
|
167
|
+
by_tag = bool(tracked_tags and tracked_tags.intersection(field_tags))
|
|
168
|
+
by_name = bool(compiled_pattern and compiled_pattern.search(field_name))
|
|
169
|
+
if not by_tag and not by_name:
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
value = classification.get(ref)
|
|
173
|
+
if value is None:
|
|
174
|
+
issues.append(
|
|
175
|
+
_policy_issue(
|
|
176
|
+
severity,
|
|
177
|
+
f"POLICY_{policy_id}",
|
|
178
|
+
f"Field '{ref}' requires governance.classification.",
|
|
179
|
+
"/governance/classification",
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
if allowed_classifications and value not in allowed_classifications:
|
|
185
|
+
issues.append(
|
|
186
|
+
_policy_issue(
|
|
187
|
+
severity,
|
|
188
|
+
f"POLICY_{policy_id}",
|
|
189
|
+
(
|
|
190
|
+
f"Field '{ref}' classification '{value}' is not allowed. "
|
|
191
|
+
f"Expected one of {sorted(allowed_classifications)}."
|
|
192
|
+
),
|
|
193
|
+
"/governance/classification",
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return issues
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _rule_target_required(
|
|
201
|
+
model: Dict[str, Any],
|
|
202
|
+
severity: str,
|
|
203
|
+
policy_id: str,
|
|
204
|
+
params: Dict[str, Any],
|
|
205
|
+
) -> List[Issue]:
|
|
206
|
+
target_types = set(_normalize_list(params.get("field_types")))
|
|
207
|
+
refs = _field_refs(model)
|
|
208
|
+
rule_targets = {
|
|
209
|
+
str(rule.get("target", ""))
|
|
210
|
+
for rule in model.get("rules", [])
|
|
211
|
+
if isinstance(rule, dict)
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
issues: List[Issue] = []
|
|
215
|
+
for entity in model.get("entities", []):
|
|
216
|
+
entity_name = str(entity.get("name", ""))
|
|
217
|
+
for field in entity.get("fields", []):
|
|
218
|
+
field_name = str(field.get("name", ""))
|
|
219
|
+
ref = f"{entity_name}.{field_name}"
|
|
220
|
+
if ref not in refs:
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
field_type = str(field.get("type", "")).lower()
|
|
224
|
+
if target_types and field_type not in target_types:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
if ref not in rule_targets:
|
|
228
|
+
issues.append(
|
|
229
|
+
_policy_issue(
|
|
230
|
+
severity,
|
|
231
|
+
f"POLICY_{policy_id}",
|
|
232
|
+
f"Field '{ref}' requires at least one rule target entry.",
|
|
233
|
+
"/rules",
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return issues
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _naming_convention(
|
|
241
|
+
model: Dict[str, Any],
|
|
242
|
+
severity: str,
|
|
243
|
+
policy_id: str,
|
|
244
|
+
params: Dict[str, Any],
|
|
245
|
+
) -> List[Issue]:
|
|
246
|
+
entity_pattern_str = params.get("entity_pattern")
|
|
247
|
+
field_pattern_str = params.get("field_pattern")
|
|
248
|
+
relationship_pattern_str = params.get("relationship_pattern")
|
|
249
|
+
index_pattern_str = params.get("index_pattern")
|
|
250
|
+
|
|
251
|
+
patterns: Dict[str, Optional[re.Pattern[str]]] = {}
|
|
252
|
+
issues: List[Issue] = []
|
|
253
|
+
|
|
254
|
+
for label, pat_str in [
|
|
255
|
+
("entity_pattern", entity_pattern_str),
|
|
256
|
+
("field_pattern", field_pattern_str),
|
|
257
|
+
("relationship_pattern", relationship_pattern_str),
|
|
258
|
+
("index_pattern", index_pattern_str),
|
|
259
|
+
]:
|
|
260
|
+
if pat_str is None:
|
|
261
|
+
patterns[label] = None
|
|
262
|
+
continue
|
|
263
|
+
if not isinstance(pat_str, str) or not pat_str.strip():
|
|
264
|
+
patterns[label] = None
|
|
265
|
+
continue
|
|
266
|
+
try:
|
|
267
|
+
patterns[label] = re.compile(pat_str)
|
|
268
|
+
except re.error:
|
|
269
|
+
return [
|
|
270
|
+
_policy_issue(
|
|
271
|
+
"error",
|
|
272
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
273
|
+
f"Policy '{policy_id}' has invalid regex for {label}: '{pat_str}'.",
|
|
274
|
+
"/policies",
|
|
275
|
+
)
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
if not any(patterns.values()):
|
|
279
|
+
return [
|
|
280
|
+
_policy_issue(
|
|
281
|
+
"error",
|
|
282
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
283
|
+
f"Policy '{policy_id}' must define at least one naming pattern (entity_pattern, field_pattern, relationship_pattern, index_pattern).",
|
|
284
|
+
"/policies",
|
|
285
|
+
)
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
ep = patterns.get("entity_pattern")
|
|
289
|
+
fp = patterns.get("field_pattern")
|
|
290
|
+
rp = patterns.get("relationship_pattern")
|
|
291
|
+
ip = patterns.get("index_pattern")
|
|
292
|
+
|
|
293
|
+
for entity in model.get("entities", []):
|
|
294
|
+
entity_name = str(entity.get("name", ""))
|
|
295
|
+
if ep and not ep.fullmatch(entity_name):
|
|
296
|
+
issues.append(
|
|
297
|
+
_policy_issue(
|
|
298
|
+
severity,
|
|
299
|
+
f"POLICY_{policy_id}",
|
|
300
|
+
f"Entity name '{entity_name}' does not match pattern '{entity_pattern_str}'.",
|
|
301
|
+
f"/entities/{entity_name}",
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
if fp:
|
|
305
|
+
for field in entity.get("fields", []):
|
|
306
|
+
field_name = str(field.get("name", ""))
|
|
307
|
+
if not fp.fullmatch(field_name):
|
|
308
|
+
issues.append(
|
|
309
|
+
_policy_issue(
|
|
310
|
+
severity,
|
|
311
|
+
f"POLICY_{policy_id}",
|
|
312
|
+
f"Field name '{entity_name}.{field_name}' does not match pattern '{field_pattern_str}'.",
|
|
313
|
+
f"/entities/{entity_name}/fields/{field_name}",
|
|
314
|
+
)
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
if rp:
|
|
318
|
+
for rel in model.get("relationships", []):
|
|
319
|
+
rel_name = str(rel.get("name", ""))
|
|
320
|
+
if not rp.fullmatch(rel_name):
|
|
321
|
+
issues.append(
|
|
322
|
+
_policy_issue(
|
|
323
|
+
severity,
|
|
324
|
+
f"POLICY_{policy_id}",
|
|
325
|
+
f"Relationship name '{rel_name}' does not match pattern '{relationship_pattern_str}'.",
|
|
326
|
+
f"/relationships/{rel_name}",
|
|
327
|
+
)
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
if ip:
|
|
331
|
+
for idx in model.get("indexes", []):
|
|
332
|
+
idx_name = str(idx.get("name", ""))
|
|
333
|
+
if not ip.fullmatch(idx_name):
|
|
334
|
+
issues.append(
|
|
335
|
+
_policy_issue(
|
|
336
|
+
severity,
|
|
337
|
+
f"POLICY_{policy_id}",
|
|
338
|
+
f"Index name '{idx_name}' does not match pattern '{index_pattern_str}'.",
|
|
339
|
+
f"/indexes/{idx_name}",
|
|
340
|
+
)
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
return issues
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _require_indexes(
|
|
347
|
+
model: Dict[str, Any],
|
|
348
|
+
severity: str,
|
|
349
|
+
policy_id: str,
|
|
350
|
+
params: Dict[str, Any],
|
|
351
|
+
) -> List[Issue]:
|
|
352
|
+
min_fields = int(params.get("min_fields", 5))
|
|
353
|
+
entity_types = set(_normalize_list(params.get("entity_types", ["table"])))
|
|
354
|
+
|
|
355
|
+
indexed_entities: Set[str] = set()
|
|
356
|
+
for idx in model.get("indexes", []):
|
|
357
|
+
ent = str(idx.get("entity", ""))
|
|
358
|
+
if ent:
|
|
359
|
+
indexed_entities.add(ent)
|
|
360
|
+
|
|
361
|
+
issues: List[Issue] = []
|
|
362
|
+
for entity in model.get("entities", []):
|
|
363
|
+
entity_name = str(entity.get("name", ""))
|
|
364
|
+
entity_type = str(entity.get("type", "table")).lower()
|
|
365
|
+
if entity_types and entity_type not in entity_types:
|
|
366
|
+
continue
|
|
367
|
+
field_count = len(entity.get("fields", []))
|
|
368
|
+
if field_count >= min_fields and entity_name not in indexed_entities:
|
|
369
|
+
issues.append(
|
|
370
|
+
_policy_issue(
|
|
371
|
+
severity,
|
|
372
|
+
f"POLICY_{policy_id}",
|
|
373
|
+
f"Entity '{entity_name}' has {field_count} fields (>= {min_fields}) but no indexes defined.",
|
|
374
|
+
f"/entities/{entity_name}",
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
return issues
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _require_owner(
|
|
382
|
+
model: Dict[str, Any],
|
|
383
|
+
severity: str,
|
|
384
|
+
policy_id: str,
|
|
385
|
+
params: Dict[str, Any],
|
|
386
|
+
) -> List[Issue]:
|
|
387
|
+
entity_types = set(_normalize_list(params.get("entity_types", [])))
|
|
388
|
+
require_email = bool(params.get("require_email", False))
|
|
389
|
+
email_pattern = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
|
|
390
|
+
|
|
391
|
+
issues: List[Issue] = []
|
|
392
|
+
for entity in model.get("entities", []):
|
|
393
|
+
entity_name = str(entity.get("name", ""))
|
|
394
|
+
entity_type = str(entity.get("type", "table")).lower()
|
|
395
|
+
if entity_types and entity_type not in entity_types:
|
|
396
|
+
continue
|
|
397
|
+
|
|
398
|
+
owner = entity.get("owner")
|
|
399
|
+
if not owner or (isinstance(owner, str) and not owner.strip()):
|
|
400
|
+
issues.append(
|
|
401
|
+
_policy_issue(
|
|
402
|
+
severity,
|
|
403
|
+
f"POLICY_{policy_id}",
|
|
404
|
+
f"Entity '{entity_name}' is missing an owner.",
|
|
405
|
+
f"/entities/{entity_name}",
|
|
406
|
+
)
|
|
407
|
+
)
|
|
408
|
+
elif require_email and isinstance(owner, str) and not email_pattern.match(owner.strip()):
|
|
409
|
+
issues.append(
|
|
410
|
+
_policy_issue(
|
|
411
|
+
severity,
|
|
412
|
+
f"POLICY_{policy_id}",
|
|
413
|
+
f"Entity '{entity_name}' owner '{owner}' is not a valid email address.",
|
|
414
|
+
f"/entities/{entity_name}",
|
|
415
|
+
)
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
return issues
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def _require_sla(
|
|
422
|
+
model: Dict[str, Any],
|
|
423
|
+
severity: str,
|
|
424
|
+
policy_id: str,
|
|
425
|
+
params: Dict[str, Any],
|
|
426
|
+
) -> List[Issue]:
|
|
427
|
+
entity_types = set(_normalize_list(params.get("entity_types", ["table"])))
|
|
428
|
+
required_tags = set(_normalize_list(params.get("required_tags", [])))
|
|
429
|
+
require_freshness = bool(params.get("require_freshness", True))
|
|
430
|
+
require_quality_score = bool(params.get("require_quality_score", False))
|
|
431
|
+
|
|
432
|
+
issues: List[Issue] = []
|
|
433
|
+
for entity in model.get("entities", []):
|
|
434
|
+
entity_name = str(entity.get("name", ""))
|
|
435
|
+
entity_type = str(entity.get("type", "table")).lower()
|
|
436
|
+
entity_tags = set(_normalize_list(entity.get("tags", [])))
|
|
437
|
+
|
|
438
|
+
if entity_types and entity_type not in entity_types:
|
|
439
|
+
continue
|
|
440
|
+
if required_tags and not required_tags.intersection(entity_tags):
|
|
441
|
+
continue
|
|
442
|
+
|
|
443
|
+
sla = entity.get("sla")
|
|
444
|
+
if not isinstance(sla, dict) or not sla:
|
|
445
|
+
issues.append(
|
|
446
|
+
_policy_issue(
|
|
447
|
+
severity,
|
|
448
|
+
f"POLICY_{policy_id}",
|
|
449
|
+
f"Entity '{entity_name}' is missing an SLA definition.",
|
|
450
|
+
f"/entities/{entity_name}/sla",
|
|
451
|
+
)
|
|
452
|
+
)
|
|
453
|
+
continue
|
|
454
|
+
|
|
455
|
+
if require_freshness and not sla.get("freshness"):
|
|
456
|
+
issues.append(
|
|
457
|
+
_policy_issue(
|
|
458
|
+
severity,
|
|
459
|
+
f"POLICY_{policy_id}",
|
|
460
|
+
f"Entity '{entity_name}' SLA is missing 'freshness'.",
|
|
461
|
+
f"/entities/{entity_name}/sla",
|
|
462
|
+
)
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
if require_quality_score and sla.get("quality_score") is None:
|
|
466
|
+
issues.append(
|
|
467
|
+
_policy_issue(
|
|
468
|
+
severity,
|
|
469
|
+
f"POLICY_{policy_id}",
|
|
470
|
+
f"Entity '{entity_name}' SLA is missing 'quality_score'.",
|
|
471
|
+
f"/entities/{entity_name}/sla",
|
|
472
|
+
)
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
return issues
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _deprecation_check(
|
|
479
|
+
model: Dict[str, Any],
|
|
480
|
+
severity: str,
|
|
481
|
+
policy_id: str,
|
|
482
|
+
params: Dict[str, Any],
|
|
483
|
+
) -> List[Issue]:
|
|
484
|
+
require_message = bool(params.get("require_message", True))
|
|
485
|
+
check_references = bool(params.get("check_references", True))
|
|
486
|
+
|
|
487
|
+
deprecated_fields: Set[str] = set()
|
|
488
|
+
issues: List[Issue] = []
|
|
489
|
+
|
|
490
|
+
for entity in model.get("entities", []):
|
|
491
|
+
entity_name = str(entity.get("name", ""))
|
|
492
|
+
for field in entity.get("fields", []):
|
|
493
|
+
field_name = str(field.get("name", ""))
|
|
494
|
+
if field.get("deprecated") is True:
|
|
495
|
+
ref = f"{entity_name}.{field_name}"
|
|
496
|
+
deprecated_fields.add(ref)
|
|
497
|
+
if require_message:
|
|
498
|
+
msg = field.get("deprecated_message")
|
|
499
|
+
if not isinstance(msg, str) or not msg.strip():
|
|
500
|
+
issues.append(
|
|
501
|
+
_policy_issue(
|
|
502
|
+
severity,
|
|
503
|
+
f"POLICY_{policy_id}",
|
|
504
|
+
f"Deprecated field '{ref}' is missing a deprecated_message with migration guidance.",
|
|
505
|
+
f"/entities/{entity_name}/fields/{field_name}",
|
|
506
|
+
)
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
if check_references and deprecated_fields:
|
|
510
|
+
for rel in model.get("relationships", []):
|
|
511
|
+
rel_name = str(rel.get("name", ""))
|
|
512
|
+
from_ref = str(rel.get("from", ""))
|
|
513
|
+
to_ref = str(rel.get("to", ""))
|
|
514
|
+
if from_ref in deprecated_fields:
|
|
515
|
+
issues.append(
|
|
516
|
+
_policy_issue(
|
|
517
|
+
severity,
|
|
518
|
+
f"POLICY_{policy_id}",
|
|
519
|
+
f"Relationship '{rel_name}' references deprecated field '{from_ref}'.",
|
|
520
|
+
f"/relationships/{rel_name}",
|
|
521
|
+
)
|
|
522
|
+
)
|
|
523
|
+
if to_ref in deprecated_fields:
|
|
524
|
+
issues.append(
|
|
525
|
+
_policy_issue(
|
|
526
|
+
severity,
|
|
527
|
+
f"POLICY_{policy_id}",
|
|
528
|
+
f"Relationship '{rel_name}' references deprecated field '{to_ref}'.",
|
|
529
|
+
f"/relationships/{rel_name}",
|
|
530
|
+
)
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
for idx in model.get("indexes", []):
|
|
534
|
+
idx_name = str(idx.get("name", ""))
|
|
535
|
+
idx_entity = str(idx.get("entity", ""))
|
|
536
|
+
for idx_field in _normalize_list(idx.get("fields", [])):
|
|
537
|
+
ref = f"{idx_entity}.{idx_field}"
|
|
538
|
+
if ref in deprecated_fields:
|
|
539
|
+
issues.append(
|
|
540
|
+
_policy_issue(
|
|
541
|
+
severity,
|
|
542
|
+
f"POLICY_{policy_id}",
|
|
543
|
+
f"Index '{idx_name}' references deprecated field '{ref}'.",
|
|
544
|
+
f"/indexes/{idx_name}",
|
|
545
|
+
)
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
return issues
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _custom_expression(
|
|
552
|
+
model: Dict[str, Any],
|
|
553
|
+
severity: str,
|
|
554
|
+
policy_id: str,
|
|
555
|
+
params: Dict[str, Any],
|
|
556
|
+
) -> List[Issue]:
|
|
557
|
+
scope = str(params.get("scope", "entity")).lower()
|
|
558
|
+
expression = str(params.get("expression", "")).strip()
|
|
559
|
+
message_template = str(params.get("message", "")).strip()
|
|
560
|
+
|
|
561
|
+
if not expression:
|
|
562
|
+
return [
|
|
563
|
+
_policy_issue(
|
|
564
|
+
"error",
|
|
565
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
566
|
+
f"Policy '{policy_id}' must define an 'expression'.",
|
|
567
|
+
"/policies",
|
|
568
|
+
)
|
|
569
|
+
]
|
|
570
|
+
|
|
571
|
+
issues: List[Issue] = []
|
|
572
|
+
|
|
573
|
+
if scope == "entity":
|
|
574
|
+
for entity in model.get("entities", []):
|
|
575
|
+
entity_name = str(entity.get("name", ""))
|
|
576
|
+
ctx = {
|
|
577
|
+
"name": entity_name,
|
|
578
|
+
"type": str(entity.get("type", "table")),
|
|
579
|
+
"tags": _normalize_list(entity.get("tags", [])),
|
|
580
|
+
"field_count": len(entity.get("fields", [])),
|
|
581
|
+
"has_owner": bool(entity.get("owner")),
|
|
582
|
+
"has_sla": bool(entity.get("sla")),
|
|
583
|
+
"has_description": bool(entity.get("description")),
|
|
584
|
+
"schema": str(entity.get("schema", "")),
|
|
585
|
+
"subject_area": str(entity.get("subject_area", "")),
|
|
586
|
+
}
|
|
587
|
+
try:
|
|
588
|
+
result = eval(expression, {"__builtins__": {}}, ctx) # noqa: S307
|
|
589
|
+
except Exception:
|
|
590
|
+
return [
|
|
591
|
+
_policy_issue(
|
|
592
|
+
"error",
|
|
593
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
594
|
+
f"Policy '{policy_id}' expression failed for entity '{entity_name}': '{expression}'.",
|
|
595
|
+
"/policies",
|
|
596
|
+
)
|
|
597
|
+
]
|
|
598
|
+
if not result:
|
|
599
|
+
msg = message_template.replace("{name}", entity_name) if message_template else (
|
|
600
|
+
f"Entity '{entity_name}' failed custom policy check: {expression}"
|
|
601
|
+
)
|
|
602
|
+
issues.append(
|
|
603
|
+
_policy_issue(severity, f"POLICY_{policy_id}", msg, f"/entities/{entity_name}")
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
elif scope == "field":
|
|
607
|
+
for entity in model.get("entities", []):
|
|
608
|
+
entity_name = str(entity.get("name", ""))
|
|
609
|
+
for field in entity.get("fields", []):
|
|
610
|
+
field_name = str(field.get("name", ""))
|
|
611
|
+
ref = f"{entity_name}.{field_name}"
|
|
612
|
+
ctx = {
|
|
613
|
+
"name": field_name,
|
|
614
|
+
"entity": entity_name,
|
|
615
|
+
"type": str(field.get("type", "")),
|
|
616
|
+
"nullable": bool(field.get("nullable", True)),
|
|
617
|
+
"primary_key": bool(field.get("primary_key", False)),
|
|
618
|
+
"unique": bool(field.get("unique", False)),
|
|
619
|
+
"has_description": bool(field.get("description")),
|
|
620
|
+
"deprecated": bool(field.get("deprecated", False)),
|
|
621
|
+
"sensitivity": str(field.get("sensitivity", "")),
|
|
622
|
+
"has_default": field.get("default") is not None,
|
|
623
|
+
"has_check": bool(field.get("check")),
|
|
624
|
+
"computed": bool(field.get("computed", False)),
|
|
625
|
+
"foreign_key": bool(field.get("foreign_key", False)),
|
|
626
|
+
"tags": _normalize_list(field.get("tags", [])),
|
|
627
|
+
}
|
|
628
|
+
try:
|
|
629
|
+
result = eval(expression, {"__builtins__": {}}, ctx) # noqa: S307
|
|
630
|
+
except Exception:
|
|
631
|
+
return [
|
|
632
|
+
_policy_issue(
|
|
633
|
+
"error",
|
|
634
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
635
|
+
f"Policy '{policy_id}' expression failed for field '{ref}': '{expression}'.",
|
|
636
|
+
"/policies",
|
|
637
|
+
)
|
|
638
|
+
]
|
|
639
|
+
if not result:
|
|
640
|
+
msg = message_template.replace("{name}", ref) if message_template else (
|
|
641
|
+
f"Field '{ref}' failed custom policy check: {expression}"
|
|
642
|
+
)
|
|
643
|
+
issues.append(
|
|
644
|
+
_policy_issue(
|
|
645
|
+
severity, f"POLICY_{policy_id}", msg,
|
|
646
|
+
f"/entities/{entity_name}/fields/{field_name}",
|
|
647
|
+
)
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
elif scope == "model":
|
|
651
|
+
model_meta = model.get("model", {})
|
|
652
|
+
ctx = {
|
|
653
|
+
"name": str(model_meta.get("name", "")),
|
|
654
|
+
"version": str(model_meta.get("version", "")),
|
|
655
|
+
"domain": str(model_meta.get("domain", "")),
|
|
656
|
+
"state": str(model_meta.get("state", "")),
|
|
657
|
+
"layer": str(model_meta.get("layer", "")),
|
|
658
|
+
"entity_count": len(model.get("entities", [])),
|
|
659
|
+
"relationship_count": len(model.get("relationships", [])),
|
|
660
|
+
"index_count": len(model.get("indexes", [])),
|
|
661
|
+
"metric_count": len(model.get("metrics", [])),
|
|
662
|
+
"has_governance": bool(model.get("governance")),
|
|
663
|
+
"has_glossary": bool(model.get("glossary")),
|
|
664
|
+
"has_rules": bool(model.get("rules")),
|
|
665
|
+
"has_metrics": bool(model.get("metrics")),
|
|
666
|
+
}
|
|
667
|
+
try:
|
|
668
|
+
result = eval(expression, {"__builtins__": {}}, ctx) # noqa: S307
|
|
669
|
+
except Exception:
|
|
670
|
+
return [
|
|
671
|
+
_policy_issue(
|
|
672
|
+
"error",
|
|
673
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
674
|
+
f"Policy '{policy_id}' expression failed: '{expression}'.",
|
|
675
|
+
"/policies",
|
|
676
|
+
)
|
|
677
|
+
]
|
|
678
|
+
if not result:
|
|
679
|
+
msg = message_template.replace("{name}", ctx["name"]) if message_template else (
|
|
680
|
+
f"Model failed custom policy check: {expression}"
|
|
681
|
+
)
|
|
682
|
+
issues.append(_policy_issue(severity, f"POLICY_{policy_id}", msg, "/model"))
|
|
683
|
+
|
|
684
|
+
else:
|
|
685
|
+
return [
|
|
686
|
+
_policy_issue(
|
|
687
|
+
"error",
|
|
688
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
689
|
+
f"Policy '{policy_id}' has invalid scope '{scope}'. Expected 'entity', 'field', or 'model'.",
|
|
690
|
+
"/policies",
|
|
691
|
+
)
|
|
692
|
+
]
|
|
693
|
+
|
|
694
|
+
return issues
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _modeling_convention(
|
|
698
|
+
model: Dict[str, Any],
|
|
699
|
+
severity: str,
|
|
700
|
+
policy_id: str,
|
|
701
|
+
params: Dict[str, Any],
|
|
702
|
+
) -> List[Issue]:
|
|
703
|
+
normalized = normalize_model(model)
|
|
704
|
+
issues: List[Issue] = []
|
|
705
|
+
|
|
706
|
+
allowed_model_kinds = set(_normalize_list(params.get("allowed_model_kinds")))
|
|
707
|
+
allowed_layers = set(_normalize_list(params.get("allowed_layers")))
|
|
708
|
+
allowed_entity_types = set(_normalize_list(params.get("allowed_entity_types")))
|
|
709
|
+
require_candidate_keys_for_types = set(_normalize_list(params.get("require_candidate_keys_for_types")))
|
|
710
|
+
require_dimension_refs_for_types = set(_normalize_list(params.get("require_dimension_refs_for_types")))
|
|
711
|
+
require_data_vault_metadata = bool(params.get("require_data_vault_metadata", False))
|
|
712
|
+
|
|
713
|
+
model_meta = normalized.get("model", {})
|
|
714
|
+
model_kind = str(model_meta.get("kind", "physical"))
|
|
715
|
+
model_layer = str(model_meta.get("layer", ""))
|
|
716
|
+
|
|
717
|
+
if allowed_model_kinds and model_kind not in allowed_model_kinds:
|
|
718
|
+
issues.append(
|
|
719
|
+
_policy_issue(
|
|
720
|
+
severity,
|
|
721
|
+
f"POLICY_{policy_id}",
|
|
722
|
+
f"Model kind '{model_kind}' is not allowed. Expected one of {sorted(allowed_model_kinds)}.",
|
|
723
|
+
"/model/kind",
|
|
724
|
+
)
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
if allowed_layers and model_layer not in allowed_layers:
|
|
728
|
+
issues.append(
|
|
729
|
+
_policy_issue(
|
|
730
|
+
severity,
|
|
731
|
+
f"POLICY_{policy_id}",
|
|
732
|
+
f"Model layer '{model_layer or '(none)'}' is not allowed. Expected one of {sorted(allowed_layers)}.",
|
|
733
|
+
"/model/layer",
|
|
734
|
+
)
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
entities = normalized.get("entities", [])
|
|
738
|
+
entity_map = {
|
|
739
|
+
str(entity.get("name", "")): entity
|
|
740
|
+
for entity in entities
|
|
741
|
+
if isinstance(entity, dict) and entity.get("name")
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
def has_field(entity: Dict[str, Any], field_name: str) -> bool:
|
|
745
|
+
return any(str(field.get("name", "")) == field_name for field in entity.get("fields", []))
|
|
746
|
+
|
|
747
|
+
for entity in entities:
|
|
748
|
+
entity_name = str(entity.get("name", ""))
|
|
749
|
+
entity_type = str(entity.get("type", "table"))
|
|
750
|
+
|
|
751
|
+
if allowed_entity_types and entity_type not in allowed_entity_types:
|
|
752
|
+
issues.append(
|
|
753
|
+
_policy_issue(
|
|
754
|
+
severity,
|
|
755
|
+
f"POLICY_{policy_id}",
|
|
756
|
+
f"Entity '{entity_name}' type '{entity_type}' is not allowed. Expected one of {sorted(allowed_entity_types)}.",
|
|
757
|
+
f"/entities/{entity_name}/type",
|
|
758
|
+
)
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
if entity_type in require_candidate_keys_for_types:
|
|
762
|
+
has_candidate_keys = bool(entity.get("candidate_keys"))
|
|
763
|
+
has_primary_key = any(field.get("primary_key") is True for field in entity.get("fields", []))
|
|
764
|
+
if not has_candidate_keys and not has_primary_key:
|
|
765
|
+
issues.append(
|
|
766
|
+
_policy_issue(
|
|
767
|
+
severity,
|
|
768
|
+
f"POLICY_{policy_id}",
|
|
769
|
+
f"Entity '{entity_name}' must declare candidate_keys or a primary key.",
|
|
770
|
+
f"/entities/{entity_name}/candidate_keys",
|
|
771
|
+
)
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
if entity_type in require_dimension_refs_for_types:
|
|
775
|
+
dimension_refs = entity.get("dimension_refs")
|
|
776
|
+
if not isinstance(dimension_refs, list) or not dimension_refs:
|
|
777
|
+
issues.append(
|
|
778
|
+
_policy_issue(
|
|
779
|
+
severity,
|
|
780
|
+
f"POLICY_{policy_id}",
|
|
781
|
+
f"Entity '{entity_name}' must declare dimension_refs.",
|
|
782
|
+
f"/entities/{entity_name}/dimension_refs",
|
|
783
|
+
)
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
if require_data_vault_metadata:
|
|
787
|
+
if entity_type == "hub":
|
|
788
|
+
business_keys = entity.get("business_keys")
|
|
789
|
+
hash_key = str(entity.get("hash_key", "")).strip()
|
|
790
|
+
if not isinstance(business_keys, list) or not business_keys:
|
|
791
|
+
issues.append(
|
|
792
|
+
_policy_issue(
|
|
793
|
+
severity,
|
|
794
|
+
f"POLICY_{policy_id}",
|
|
795
|
+
f"Hub '{entity_name}' must declare business_keys.",
|
|
796
|
+
f"/entities/{entity_name}/business_keys",
|
|
797
|
+
)
|
|
798
|
+
)
|
|
799
|
+
if not hash_key or not has_field(entity, hash_key):
|
|
800
|
+
issues.append(
|
|
801
|
+
_policy_issue(
|
|
802
|
+
severity,
|
|
803
|
+
f"POLICY_{policy_id}",
|
|
804
|
+
f"Hub '{entity_name}' must declare a valid hash_key field.",
|
|
805
|
+
f"/entities/{entity_name}/hash_key",
|
|
806
|
+
)
|
|
807
|
+
)
|
|
808
|
+
elif entity_type == "link":
|
|
809
|
+
link_refs = entity.get("link_refs")
|
|
810
|
+
if not isinstance(link_refs, list) or len(link_refs) < 2:
|
|
811
|
+
issues.append(
|
|
812
|
+
_policy_issue(
|
|
813
|
+
severity,
|
|
814
|
+
f"POLICY_{policy_id}",
|
|
815
|
+
f"Link '{entity_name}' must reference at least two hubs in link_refs.",
|
|
816
|
+
f"/entities/{entity_name}/link_refs",
|
|
817
|
+
)
|
|
818
|
+
)
|
|
819
|
+
else:
|
|
820
|
+
for ref_name in link_refs:
|
|
821
|
+
referenced = entity_map.get(str(ref_name))
|
|
822
|
+
if referenced is None or str(referenced.get("type", "")) != "hub":
|
|
823
|
+
issues.append(
|
|
824
|
+
_policy_issue(
|
|
825
|
+
severity,
|
|
826
|
+
f"POLICY_{policy_id}",
|
|
827
|
+
f"Link '{entity_name}' link_refs entry '{ref_name}' must reference a hub.",
|
|
828
|
+
f"/entities/{entity_name}/link_refs",
|
|
829
|
+
)
|
|
830
|
+
)
|
|
831
|
+
hash_key = str(entity.get("hash_key", "")).strip()
|
|
832
|
+
if not hash_key or not has_field(entity, hash_key):
|
|
833
|
+
issues.append(
|
|
834
|
+
_policy_issue(
|
|
835
|
+
severity,
|
|
836
|
+
f"POLICY_{policy_id}",
|
|
837
|
+
f"Link '{entity_name}' must declare a valid hash_key field.",
|
|
838
|
+
f"/entities/{entity_name}/hash_key",
|
|
839
|
+
)
|
|
840
|
+
)
|
|
841
|
+
elif entity_type == "satellite":
|
|
842
|
+
parent_entity = str(entity.get("parent_entity", "")).strip()
|
|
843
|
+
hash_diff_fields = entity.get("hash_diff_fields")
|
|
844
|
+
if not parent_entity:
|
|
845
|
+
issues.append(
|
|
846
|
+
_policy_issue(
|
|
847
|
+
severity,
|
|
848
|
+
f"POLICY_{policy_id}",
|
|
849
|
+
f"Satellite '{entity_name}' must declare parent_entity.",
|
|
850
|
+
f"/entities/{entity_name}/parent_entity",
|
|
851
|
+
)
|
|
852
|
+
)
|
|
853
|
+
else:
|
|
854
|
+
parent = entity_map.get(parent_entity)
|
|
855
|
+
if parent is None or str(parent.get("type", "")) not in {"hub", "link"}:
|
|
856
|
+
issues.append(
|
|
857
|
+
_policy_issue(
|
|
858
|
+
severity,
|
|
859
|
+
f"POLICY_{policy_id}",
|
|
860
|
+
f"Satellite '{entity_name}' parent_entity '{parent_entity}' must reference a hub or link.",
|
|
861
|
+
f"/entities/{entity_name}/parent_entity",
|
|
862
|
+
)
|
|
863
|
+
)
|
|
864
|
+
if not isinstance(hash_diff_fields, list) or not hash_diff_fields:
|
|
865
|
+
issues.append(
|
|
866
|
+
_policy_issue(
|
|
867
|
+
severity,
|
|
868
|
+
f"POLICY_{policy_id}",
|
|
869
|
+
f"Satellite '{entity_name}' must declare hash_diff_fields.",
|
|
870
|
+
f"/entities/{entity_name}/hash_diff_fields",
|
|
871
|
+
)
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
if entity_type in {"hub", "link", "satellite"}:
|
|
875
|
+
for prop_name in ("load_timestamp_field", "record_source_field"):
|
|
876
|
+
field_name = str(entity.get(prop_name, "")).strip()
|
|
877
|
+
if not field_name or not has_field(entity, field_name):
|
|
878
|
+
issues.append(
|
|
879
|
+
_policy_issue(
|
|
880
|
+
severity,
|
|
881
|
+
f"POLICY_{policy_id}",
|
|
882
|
+
f"{entity_type.title()} '{entity_name}' must declare a valid {prop_name}.",
|
|
883
|
+
f"/entities/{entity_name}/{prop_name}",
|
|
884
|
+
)
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
return issues
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
_POLICY_HANDLERS = {
|
|
891
|
+
"require_entity_tags": _require_entity_tags,
|
|
892
|
+
"require_field_descriptions": _require_field_descriptions,
|
|
893
|
+
"classification_required_for_tags": _classification_required_for_tags,
|
|
894
|
+
"rule_target_required": _rule_target_required,
|
|
895
|
+
"naming_convention": _naming_convention,
|
|
896
|
+
"require_indexes": _require_indexes,
|
|
897
|
+
"require_owner": _require_owner,
|
|
898
|
+
"require_sla": _require_sla,
|
|
899
|
+
"deprecation_check": _deprecation_check,
|
|
900
|
+
"custom_expression": _custom_expression,
|
|
901
|
+
"modeling_convention": _modeling_convention,
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
def merge_policy_packs(*packs: Dict[str, Any]) -> Dict[str, Any]:
|
|
906
|
+
"""Merge multiple policy packs with later packs overriding earlier ones.
|
|
907
|
+
|
|
908
|
+
Policies are merged by ``id``: if two packs define a policy with the same
|
|
909
|
+
``id``, the later definition wins (full replacement). Policies with unique
|
|
910
|
+
ids are appended. The ``pack`` metadata comes from the **last** pack.
|
|
911
|
+
"""
|
|
912
|
+
if not packs:
|
|
913
|
+
return {"pack": {"name": "merged", "version": "1.0.0"}, "policies": []}
|
|
914
|
+
|
|
915
|
+
merged_pack_meta: Dict[str, Any] = {}
|
|
916
|
+
policy_map: Dict[str, Dict[str, Any]] = {} # keyed by policy id
|
|
917
|
+
order: List[str] = []
|
|
918
|
+
|
|
919
|
+
for pack in packs:
|
|
920
|
+
if not isinstance(pack, dict):
|
|
921
|
+
continue
|
|
922
|
+
pack_meta = pack.get("pack")
|
|
923
|
+
if isinstance(pack_meta, dict):
|
|
924
|
+
merged_pack_meta = pack_meta
|
|
925
|
+
|
|
926
|
+
for policy in pack.get("policies", []):
|
|
927
|
+
if not isinstance(policy, dict):
|
|
928
|
+
continue
|
|
929
|
+
pid = str(policy.get("id", ""))
|
|
930
|
+
if not pid:
|
|
931
|
+
continue
|
|
932
|
+
if pid not in policy_map:
|
|
933
|
+
order.append(pid)
|
|
934
|
+
policy_map[pid] = policy
|
|
935
|
+
|
|
936
|
+
return {
|
|
937
|
+
"pack": merged_pack_meta or {"name": "merged", "version": "1.0.0"},
|
|
938
|
+
"policies": [policy_map[pid] for pid in order if pid in policy_map],
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
def load_policy_pack_with_inheritance(path: str) -> Dict[str, Any]:
|
|
943
|
+
"""Load a policy pack, resolving ``pack.extends`` references.
|
|
944
|
+
|
|
945
|
+
If the pack defines ``pack.extends`` (a string path or list of paths),
|
|
946
|
+
the referenced base packs are loaded first and merged in order, with the
|
|
947
|
+
current pack applied last (highest priority).
|
|
948
|
+
"""
|
|
949
|
+
pack = load_policy_pack(path)
|
|
950
|
+
extends = pack.get("pack", {}).get("extends")
|
|
951
|
+
if not extends:
|
|
952
|
+
return pack
|
|
953
|
+
|
|
954
|
+
base_paths = _normalize_list(extends)
|
|
955
|
+
base_dir = Path(path).parent
|
|
956
|
+
|
|
957
|
+
bases: List[Dict[str, Any]] = []
|
|
958
|
+
for bp in base_paths:
|
|
959
|
+
resolved = (base_dir / bp).resolve()
|
|
960
|
+
if resolved.exists():
|
|
961
|
+
bases.append(load_policy_pack_with_inheritance(str(resolved)))
|
|
962
|
+
|
|
963
|
+
bases.append(pack)
|
|
964
|
+
return merge_policy_packs(*bases)
|
|
965
|
+
|
|
966
|
+
|
|
967
|
+
def policy_issues(model: Dict[str, Any], policy_pack: Dict[str, Any]) -> List[Issue]:
|
|
968
|
+
policies = policy_pack.get("policies", [])
|
|
969
|
+
if not isinstance(policies, list):
|
|
970
|
+
return [
|
|
971
|
+
_policy_issue(
|
|
972
|
+
"error",
|
|
973
|
+
"INVALID_POLICY_PACK",
|
|
974
|
+
"Policy pack requires a list at root key 'policies'.",
|
|
975
|
+
"/policies",
|
|
976
|
+
)
|
|
977
|
+
]
|
|
978
|
+
|
|
979
|
+
issues: List[Issue] = []
|
|
980
|
+
for index, policy in enumerate(policies):
|
|
981
|
+
if not isinstance(policy, dict):
|
|
982
|
+
issues.append(
|
|
983
|
+
_policy_issue(
|
|
984
|
+
"error",
|
|
985
|
+
"INVALID_POLICY",
|
|
986
|
+
f"Policy at index {index} must be an object.",
|
|
987
|
+
f"/policies/{index}",
|
|
988
|
+
)
|
|
989
|
+
)
|
|
990
|
+
continue
|
|
991
|
+
|
|
992
|
+
enabled = bool(policy.get("enabled", True))
|
|
993
|
+
if not enabled:
|
|
994
|
+
continue
|
|
995
|
+
|
|
996
|
+
policy_id = str(policy.get("id") or f"POLICY_{index + 1}")
|
|
997
|
+
policy_type = str(policy.get("type", "")).strip()
|
|
998
|
+
severity = str(policy.get("severity", "error")).lower()
|
|
999
|
+
params = policy.get("params", {})
|
|
1000
|
+
|
|
1001
|
+
if severity not in {"info", "warn", "error"}:
|
|
1002
|
+
issues.append(
|
|
1003
|
+
_policy_issue(
|
|
1004
|
+
"error",
|
|
1005
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
1006
|
+
f"Policy '{policy_id}' has invalid severity '{severity}'.",
|
|
1007
|
+
f"/policies/{index}",
|
|
1008
|
+
)
|
|
1009
|
+
)
|
|
1010
|
+
continue
|
|
1011
|
+
|
|
1012
|
+
if not isinstance(params, dict):
|
|
1013
|
+
issues.append(
|
|
1014
|
+
_policy_issue(
|
|
1015
|
+
"error",
|
|
1016
|
+
f"POLICY_{policy_id}_MISCONFIGURED",
|
|
1017
|
+
f"Policy '{policy_id}' params must be an object.",
|
|
1018
|
+
f"/policies/{index}/params",
|
|
1019
|
+
)
|
|
1020
|
+
)
|
|
1021
|
+
continue
|
|
1022
|
+
|
|
1023
|
+
handler = _POLICY_HANDLERS.get(policy_type)
|
|
1024
|
+
if handler is None:
|
|
1025
|
+
issues.append(
|
|
1026
|
+
_policy_issue(
|
|
1027
|
+
"warn",
|
|
1028
|
+
f"POLICY_{policy_id}_UNKNOWN_TYPE",
|
|
1029
|
+
f"Unknown policy type '{policy_type}' skipped.",
|
|
1030
|
+
f"/policies/{index}/type",
|
|
1031
|
+
)
|
|
1032
|
+
)
|
|
1033
|
+
continue
|
|
1034
|
+
|
|
1035
|
+
issues.extend(handler(model=model, severity=severity, policy_id=policy_id, params=params))
|
|
1036
|
+
|
|
1037
|
+
return issues
|