lfguard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ """Public API for lfguard."""
2
+
3
+ from .audit import AuditFinding, audit
4
+ from ._version import __version__
5
+ from .lint import LintFinding, lint_desired
6
+ from .models import (
7
+ CurrentState,
8
+ DesiredState,
9
+ Grant,
10
+ GuardrailState,
11
+ LFTagDefinition,
12
+ LFTagValue,
13
+ ResourceRef,
14
+ ResourceTagAssignment,
15
+ )
16
+ from .planner import Change, Plan, PlanOptions, plan
17
+ from .schema import state_json_schema
18
+
19
+ __all__ = [
20
+ "AuditFinding",
21
+ "Change",
22
+ "CurrentState",
23
+ "DesiredState",
24
+ "Grant",
25
+ "GuardrailState",
26
+ "LFTagDefinition",
27
+ "LFTagValue",
28
+ "LintFinding",
29
+ "Plan",
30
+ "PlanOptions",
31
+ "ResourceRef",
32
+ "ResourceTagAssignment",
33
+ "__version__",
34
+ "audit",
35
+ "lint_desired",
36
+ "plan",
37
+ "state_json_schema",
38
+ ]
@@ -0,0 +1,7 @@
1
+ """Module entry point for ``python -m lakeformation_guard``."""
2
+
3
+ from .cli import main
4
+
5
+
6
+ if __name__ == "__main__":
7
+ raise SystemExit(main())
@@ -0,0 +1,3 @@
1
+ """Package version."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,180 @@
1
+ """Audit findings for desired and current Lake Formation state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, List, Mapping, Tuple
7
+
8
+ from .models import CurrentState, DesiredState, Grant, ResourceRef
9
+ from .planner import _grant_index, _grant_target, _lf_tag_index, _resource_tag_index
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class AuditFinding:
14
+ """A drift or policy finding detected during guardrail audit."""
15
+
16
+ code: str
17
+ severity: str
18
+ target: str
19
+ message: str
20
+ details: Mapping[str, Any]
21
+
22
+ def to_dict(self) -> Dict[str, Any]:
23
+ return {
24
+ "code": self.code,
25
+ "severity": self.severity,
26
+ "target": self.target,
27
+ "message": self.message,
28
+ "details": dict(self.details),
29
+ }
30
+
31
+
32
+ def audit(desired: DesiredState, current: CurrentState) -> Tuple[AuditFinding, ...]:
33
+ """Return drift findings without making any changes."""
34
+
35
+ findings: List[AuditFinding] = []
36
+ findings.extend(_audit_lf_tags(desired, current))
37
+ findings.extend(_audit_resource_tags(desired, current))
38
+ findings.extend(_audit_grants(desired, current))
39
+ return tuple(findings)
40
+
41
+
42
+ def _audit_lf_tags(desired: DesiredState, current: CurrentState) -> List[AuditFinding]:
43
+ findings: List[AuditFinding] = []
44
+ desired_tags = _lf_tag_index(desired.lf_tags)
45
+ current_tags = _lf_tag_index(current.lf_tags)
46
+ for key, desired_tag in sorted(desired_tags.items()):
47
+ current_tag = current_tags.get(key)
48
+ if current_tag is None:
49
+ findings.append(
50
+ AuditFinding(
51
+ code="LF_TAG_MISSING",
52
+ severity="error",
53
+ target="lf_tag:{}".format(key),
54
+ message="Desired LF-Tag is missing",
55
+ details={"tag_key": key, "desired_values": list(desired_tag.values)},
56
+ )
57
+ )
58
+ continue
59
+ missing = sorted(set(desired_tag.values) - set(current_tag.values))
60
+ if missing:
61
+ findings.append(
62
+ AuditFinding(
63
+ code="LF_TAG_VALUES_MISSING",
64
+ severity="error",
65
+ target="lf_tag:{}".format(key),
66
+ message="Desired LF-Tag values are missing",
67
+ details={"tag_key": key, "missing_values": missing},
68
+ )
69
+ )
70
+ extra = sorted(set(current_tag.values) - set(desired_tag.values))
71
+ if extra:
72
+ findings.append(
73
+ AuditFinding(
74
+ code="LF_TAG_VALUES_UNMANAGED",
75
+ severity="warning",
76
+ target="lf_tag:{}".format(key),
77
+ message="Current LF-Tag has values not present in desired state",
78
+ details={"tag_key": key, "unmanaged_values": extra},
79
+ )
80
+ )
81
+ return findings
82
+
83
+
84
+ def _audit_resource_tags(desired: DesiredState, current: CurrentState) -> List[AuditFinding]:
85
+ findings: List[AuditFinding] = []
86
+ desired_by_resource = _resource_tag_index(desired.resource_tags)
87
+ current_by_resource = _resource_tag_index(current.resource_tags)
88
+
89
+ for resource, desired_tags in sorted(desired_by_resource.items(), key=lambda item: item[0].identity):
90
+ current_tags = current_by_resource.get(resource, {})
91
+ for key, desired_values in sorted(desired_tags.items()):
92
+ current_values = current_tags.get(key, frozenset())
93
+ missing = sorted(desired_values - current_values)
94
+ if missing:
95
+ findings.append(
96
+ AuditFinding(
97
+ code="RESOURCE_TAG_VALUES_MISSING",
98
+ severity="error",
99
+ target=resource.identity,
100
+ message="Resource is missing desired LF-Tag values",
101
+ details={"resource": resource.to_dict(), "tag_key": key, "missing_values": missing},
102
+ )
103
+ )
104
+ extra = sorted(current_values - desired_values)
105
+ if extra:
106
+ findings.append(
107
+ AuditFinding(
108
+ code="RESOURCE_TAG_VALUES_UNMANAGED",
109
+ severity="warning",
110
+ target=resource.identity,
111
+ message="Resource has LF-Tag values not present in desired state",
112
+ details={"resource": resource.to_dict(), "tag_key": key, "unmanaged_values": extra},
113
+ )
114
+ )
115
+ unmanaged_keys = sorted(set(current_tags) - set(desired_tags))
116
+ for key in unmanaged_keys:
117
+ findings.append(
118
+ AuditFinding(
119
+ code="RESOURCE_TAG_KEY_UNMANAGED",
120
+ severity="warning",
121
+ target=resource.identity,
122
+ message="Resource has LF-Tag key not present in desired state",
123
+ details={"resource": resource.to_dict(), "tag_key": key, "unmanaged_values": sorted(current_tags[key])},
124
+ )
125
+ )
126
+ return findings
127
+
128
+
129
+ def _audit_grants(desired: DesiredState, current: CurrentState) -> List[AuditFinding]:
130
+ findings: List[AuditFinding] = []
131
+ desired_grants = _grant_index(desired.grants)
132
+ current_grants = _grant_index(current.grants)
133
+
134
+ for identity, desired_grant in sorted(desired_grants.items(), key=lambda item: _grant_sort_key(item[0])):
135
+ current_grant = current_grants.get(identity)
136
+ if current_grant is None:
137
+ findings.append(_grant_finding("GRANT_MISSING", "error", desired_grant, "Principal grant is missing", {
138
+ "missing_permissions": list(desired_grant.permissions),
139
+ "missing_grantable_permissions": list(desired_grant.grantable_permissions),
140
+ }))
141
+ continue
142
+ missing_permissions = sorted(set(desired_grant.permissions) - set(current_grant.permissions))
143
+ missing_grantables = sorted(set(desired_grant.grantable_permissions) - set(current_grant.grantable_permissions))
144
+ if missing_permissions or missing_grantables:
145
+ findings.append(_grant_finding("GRANT_PERMISSIONS_MISSING", "error", desired_grant, "Principal is missing desired permissions", {
146
+ "missing_permissions": missing_permissions,
147
+ "missing_grantable_permissions": missing_grantables,
148
+ }))
149
+ extra_permissions = sorted(set(current_grant.permissions) - set(desired_grant.permissions))
150
+ extra_grantables = sorted(set(current_grant.grantable_permissions) - set(desired_grant.grantable_permissions))
151
+ if extra_permissions or extra_grantables:
152
+ findings.append(_grant_finding("GRANT_PERMISSIONS_UNMANAGED", "warning", current_grant, "Principal has permissions not present in desired state", {
153
+ "unmanaged_permissions": extra_permissions,
154
+ "unmanaged_grantable_permissions": extra_grantables,
155
+ }))
156
+
157
+ for identity, current_grant in sorted(current_grants.items(), key=lambda item: _grant_sort_key(item[0])):
158
+ if identity not in desired_grants:
159
+ findings.append(_grant_finding("GRANT_UNMANAGED", "warning", current_grant, "Principal grant is not present in desired state", {
160
+ "permissions": list(current_grant.permissions),
161
+ "grantable_permissions": list(current_grant.grantable_permissions),
162
+ }))
163
+ return findings
164
+
165
+
166
+ def _grant_finding(code: str, severity: str, grant: Grant, message: str, details: Mapping[str, Any]) -> AuditFinding:
167
+ enriched = dict(details)
168
+ enriched["principal"] = grant.principal
169
+ enriched["resource"] = grant.resource.to_dict()
170
+ return AuditFinding(
171
+ code=code,
172
+ severity=severity,
173
+ target=_grant_target(grant),
174
+ message=message,
175
+ details=enriched,
176
+ )
177
+
178
+
179
+ def _grant_sort_key(identity: Tuple[str, ResourceRef]) -> str:
180
+ return "{}:{}".format(identity[0], identity[1].identity)
@@ -0,0 +1,296 @@
1
+ """Optional boto3 adapter for live Lake Formation inventory and apply."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, Iterable, List, Mapping, Optional
7
+
8
+ from .models import CurrentState, DesiredState, Grant, LFTagDefinition, ResourceRef, ResourceTagAssignment
9
+ from .planner import Change, Plan
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class ApplyResult:
14
+ """Result of applying or dry-running a single change."""
15
+
16
+ action: str
17
+ target: str
18
+ applied: bool
19
+ response: Mapping[str, Any]
20
+
21
+ def to_dict(self) -> Dict[str, Any]:
22
+ return {
23
+ "action": self.action,
24
+ "target": self.target,
25
+ "applied": self.applied,
26
+ "response": dict(self.response),
27
+ }
28
+
29
+
30
+ class AWSLakeFormationAdapter:
31
+ """Thin boto3-backed adapter for Lake Formation operations."""
32
+
33
+ def __init__(self, lakeformation_client: Any, *, catalog_id: Optional[str] = None) -> None:
34
+ self.lakeformation = lakeformation_client
35
+ self.catalog_id = catalog_id
36
+
37
+ @classmethod
38
+ def from_boto3(
39
+ cls,
40
+ *,
41
+ profile_name: Optional[str] = None,
42
+ region_name: Optional[str] = None,
43
+ catalog_id: Optional[str] = None,
44
+ ) -> "AWSLakeFormationAdapter":
45
+ try:
46
+ import boto3 # type: ignore
47
+ except ImportError as exc:
48
+ raise RuntimeError(
49
+ "boto3 is required for live AWS operations. Install lfguard[aws]."
50
+ ) from exc
51
+ session = boto3.Session(profile_name=profile_name, region_name=region_name)
52
+ return cls(session.client("lakeformation"), catalog_id=catalog_id)
53
+
54
+ def load_current_state_for(self, desired: DesiredState) -> CurrentState:
55
+ """Load only the current AWS surface needed to compare with desired state."""
56
+
57
+ lf_tags = list(self._load_lf_tags(desired))
58
+ resource_tags = list(self._load_resource_tags(desired))
59
+ grants = list(self._load_grants(desired))
60
+ return CurrentState(lf_tags=tuple(lf_tags), resource_tags=tuple(resource_tags), grants=tuple(grants))
61
+
62
+ def apply(self, change_plan: Plan, *, dry_run: bool = True, allow_destructive: bool = False) -> List[ApplyResult]:
63
+ results: List[ApplyResult] = []
64
+ for change in change_plan.executable_changes(allow_destructive=allow_destructive):
65
+ if dry_run:
66
+ results.append(ApplyResult(change.action, change.target, False, {"dry_run": True}))
67
+ continue
68
+ results.append(self._apply_change(change))
69
+ return results
70
+
71
+ def _load_lf_tags(self, desired: DesiredState) -> Iterable[LFTagDefinition]:
72
+ for tag in desired.lf_tags:
73
+ kwargs = self._with_catalog_id({"TagKey": tag.key})
74
+ try:
75
+ response = self.lakeformation.get_lf_tag(**kwargs)
76
+ except Exception as exc:
77
+ if _is_not_found(exc):
78
+ continue
79
+ raise
80
+ values = response.get("TagValues", ())
81
+ if values:
82
+ yield LFTagDefinition(tag.key, tuple(values))
83
+
84
+ def _load_resource_tags(self, desired: DesiredState) -> Iterable[ResourceTagAssignment]:
85
+ resources = {assignment.resource for assignment in desired.resource_tags}
86
+ resources.update(grant.resource for grant in desired.grants if grant.resource.kind != "lf_tag_policy")
87
+ for resource in sorted(resources):
88
+ kwargs = self._with_catalog_id({"Resource": to_lf_resource(resource)})
89
+ try:
90
+ response = self.lakeformation.get_resource_lf_tags(**kwargs)
91
+ except Exception as exc:
92
+ if _is_not_found(exc):
93
+ continue
94
+ raise
95
+ tags = _extract_resource_tags(response)
96
+ if tags:
97
+ yield ResourceTagAssignment(resource=resource, tags=tags)
98
+
99
+ def _load_grants(self, desired: DesiredState) -> Iterable[Grant]:
100
+ seen = set()
101
+ for desired_grant in desired.grants:
102
+ key = desired_grant.identity
103
+ if key in seen:
104
+ continue
105
+ seen.add(key)
106
+ kwargs = {
107
+ "Principal": {"DataLakePrincipalIdentifier": desired_grant.principal},
108
+ "Resource": to_lf_resource(desired_grant.resource),
109
+ "MaxResults": 100,
110
+ }
111
+ kwargs = self._with_catalog_id(kwargs)
112
+ for item in self._list_permissions(kwargs):
113
+ principal = item.get("Principal", {}).get("DataLakePrincipalIdentifier", desired_grant.principal)
114
+ resource = from_lf_resource(item.get("Resource", {})) or desired_grant.resource
115
+ permissions = tuple(item.get("Permissions", ()))
116
+ grantables = tuple(item.get("PermissionsWithGrantOption", ()))
117
+ if permissions:
118
+ yield Grant(principal=principal, resource=resource, permissions=permissions, grantable_permissions=grantables)
119
+
120
+ def _list_permissions(self, kwargs: Mapping[str, Any]) -> Iterable[Mapping[str, Any]]:
121
+ if hasattr(self.lakeformation, "get_paginator"):
122
+ try:
123
+ paginator = self.lakeformation.get_paginator("list_permissions")
124
+ for page in paginator.paginate(**dict(kwargs)):
125
+ for item in page.get("PrincipalResourcePermissions", ()):
126
+ yield item
127
+ return
128
+ except Exception as exc:
129
+ if not _is_operation_not_pageable(exc):
130
+ raise
131
+ next_token = None
132
+ while True:
133
+ request = dict(kwargs)
134
+ if next_token:
135
+ request["NextToken"] = next_token
136
+ response = self.lakeformation.list_permissions(**request)
137
+ for item in response.get("PrincipalResourcePermissions", ()):
138
+ yield item
139
+ next_token = response.get("NextToken")
140
+ if not next_token:
141
+ break
142
+
143
+ def _apply_change(self, change: Change) -> ApplyResult:
144
+ action = change.action
145
+ payload = dict(change.payload)
146
+ if action == "lf_tag.create":
147
+ response = self.lakeformation.create_lf_tag(**self._with_catalog_id({
148
+ "TagKey": payload["tag_key"],
149
+ "TagValues": payload["tag_values"],
150
+ }))
151
+ elif action == "lf_tag.add_values":
152
+ response = self.lakeformation.update_lf_tag(**self._with_catalog_id({
153
+ "TagKey": payload["tag_key"],
154
+ "TagValuesToAdd": payload["tag_values"],
155
+ }))
156
+ elif action == "lf_tag.remove_values":
157
+ response = self.lakeformation.update_lf_tag(**self._with_catalog_id({
158
+ "TagKey": payload["tag_key"],
159
+ "TagValuesToDelete": payload["tag_values"],
160
+ }))
161
+ elif action == "resource_tag.add_values":
162
+ response = self.lakeformation.add_lf_tags_to_resource(**self._with_catalog_id({
163
+ "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
164
+ "LFTags": _lf_tag_pairs(payload["tags"]),
165
+ }))
166
+ elif action == "resource_tag.remove_values":
167
+ response = self.lakeformation.remove_lf_tags_from_resource(**self._with_catalog_id({
168
+ "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
169
+ "LFTags": _lf_tag_pairs(payload["tags"]),
170
+ }))
171
+ elif action == "grant.add_permissions":
172
+ response = self.lakeformation.grant_permissions(**self._with_catalog_id({
173
+ "Principal": {"DataLakePrincipalIdentifier": payload["principal"]},
174
+ "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
175
+ "Permissions": payload.get("permissions", ()),
176
+ "PermissionsWithGrantOption": payload.get("grantable_permissions", ()),
177
+ }))
178
+ elif action == "grant.revoke_permissions":
179
+ response = self.lakeformation.revoke_permissions(**self._with_catalog_id({
180
+ "Principal": {"DataLakePrincipalIdentifier": payload["principal"]},
181
+ "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
182
+ "Permissions": payload.get("permissions", ()),
183
+ "PermissionsWithGrantOption": payload.get("grantable_permissions", ()),
184
+ }))
185
+ else:
186
+ raise ValueError("Unsupported change action: {}".format(action))
187
+ return ApplyResult(action=change.action, target=change.target, applied=True, response=response or {})
188
+
189
+ def _with_catalog_id(self, kwargs: Mapping[str, Any]) -> Dict[str, Any]:
190
+ request = dict(kwargs)
191
+ if self.catalog_id:
192
+ request.setdefault("CatalogId", self.catalog_id)
193
+ return request
194
+
195
+
196
+ def to_lf_resource(resource: ResourceRef) -> Dict[str, Any]:
197
+ if resource.kind == "catalog":
198
+ return {"Catalog": {}}
199
+ if resource.kind == "database":
200
+ return {"Database": _catalog_scoped({"Name": resource.database_name}, resource)}
201
+ if resource.kind == "table":
202
+ return {"Table": _catalog_scoped({
203
+ "DatabaseName": resource.database_name,
204
+ "Name": resource.table_name,
205
+ }, resource)}
206
+ if resource.kind == "table_with_columns":
207
+ return {"TableWithColumns": _catalog_scoped({
208
+ "DatabaseName": resource.database_name,
209
+ "Name": resource.table_name,
210
+ "ColumnNames": list(resource.columns),
211
+ }, resource)}
212
+ if resource.kind == "data_location":
213
+ return {"DataLocation": _catalog_scoped({"ResourceArn": resource.location}, resource)}
214
+ if resource.kind == "lf_tag_policy":
215
+ return {
216
+ "LFTagPolicy": {
217
+ "ResourceType": resource.resource_type,
218
+ "Expression": [
219
+ {"TagKey": item.key, "TagValues": list(item.values)}
220
+ for item in resource.expression
221
+ ],
222
+ }
223
+ }
224
+ raise ValueError("Unsupported resource kind: {}".format(resource.kind))
225
+
226
+
227
+ def from_lf_resource(raw: Mapping[str, Any]) -> Optional[ResourceRef]:
228
+ if "Catalog" in raw:
229
+ return ResourceRef(kind="catalog")
230
+ if "Database" in raw:
231
+ item = raw["Database"]
232
+ return ResourceRef(kind="database", database_name=item.get("Name"), catalog_id=item.get("CatalogId"))
233
+ if "Table" in raw:
234
+ item = raw["Table"]
235
+ return ResourceRef(kind="table", database_name=item.get("DatabaseName"), table_name=item.get("Name"), catalog_id=item.get("CatalogId"))
236
+ if "TableWithColumns" in raw:
237
+ item = raw["TableWithColumns"]
238
+ return ResourceRef(
239
+ kind="table_with_columns",
240
+ database_name=item.get("DatabaseName"),
241
+ table_name=item.get("Name"),
242
+ columns=tuple(item.get("ColumnNames", ())),
243
+ catalog_id=item.get("CatalogId"),
244
+ )
245
+ if "DataLocation" in raw:
246
+ item = raw["DataLocation"]
247
+ return ResourceRef(kind="data_location", location=item.get("ResourceArn"), catalog_id=item.get("CatalogId"))
248
+ if "LFTagPolicy" in raw:
249
+ item = raw["LFTagPolicy"]
250
+ return ResourceRef.from_dict({
251
+ "kind": "lf_tag_policy",
252
+ "resource_type": item.get("ResourceType"),
253
+ "expression": {
254
+ expr.get("TagKey"): expr.get("TagValues", ())
255
+ for expr in item.get("Expression", ())
256
+ },
257
+ })
258
+ return None
259
+
260
+
261
+ def _catalog_scoped(data: Mapping[str, Any], resource: ResourceRef) -> Dict[str, Any]:
262
+ result = {key: value for key, value in data.items() if value not in (None, "")}
263
+ if resource.catalog_id:
264
+ result["CatalogId"] = resource.catalog_id
265
+ return result
266
+
267
+
268
+ def _lf_tag_pairs(tags: Mapping[str, Iterable[str]]) -> List[Dict[str, Any]]:
269
+ return [{"TagKey": key, "TagValues": list(values)} for key, values in sorted(tags.items())]
270
+
271
+
272
+ def _extract_resource_tags(response: Mapping[str, Any]) -> Dict[str, frozenset]:
273
+ tags: Dict[str, set] = {}
274
+ for key in ("LFTagOnDatabase", "LFTagsOnTable"):
275
+ _merge_lf_tag_pairs(tags, response.get(key, ()))
276
+ for column_tags in response.get("LFTagsOnColumns", ()):
277
+ _merge_lf_tag_pairs(tags, column_tags.get("LFTags", ()))
278
+ return {key: frozenset(values) for key, values in tags.items()}
279
+
280
+
281
+ def _merge_lf_tag_pairs(target: Dict[str, set], pairs: Iterable[Mapping[str, Any]]) -> None:
282
+ for pair in pairs:
283
+ key = pair.get("TagKey")
284
+ values = pair.get("TagValues", ())
285
+ if key and values:
286
+ target.setdefault(str(key), set()).update(str(value) for value in values)
287
+
288
+
289
+ def _is_not_found(exc: Exception) -> bool:
290
+ response = getattr(exc, "response", {})
291
+ code = response.get("Error", {}).get("Code") if isinstance(response, Mapping) else None
292
+ return code in {"EntityNotFoundException", "ResourceNotFoundException", "GlueEncryptionException"}
293
+
294
+
295
+ def _is_operation_not_pageable(exc: Exception) -> bool:
296
+ return exc.__class__.__name__ in {"OperationNotPageableError", "PaginationError"}