PyPI - lfguard - Versions diffs - 0.1.0__py3-none-any.whl - Mend

lfguard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

lakeformation_guard/__init__.py +38 -0
lakeformation_guard/__main__.py +7 -0
lakeformation_guard/_version.py +3 -0
lakeformation_guard/audit.py +180 -0
lakeformation_guard/aws.py +296 -0
lakeformation_guard/cli.py +2485 -0
lakeformation_guard/io.py +130 -0
lakeformation_guard/lint.py +362 -0
lakeformation_guard/models.py +376 -0
lakeformation_guard/planner.py +256 -0
lakeformation_guard/py.typed +1 -0
lakeformation_guard/schema.py +181 -0
lfguard-0.1.0.dist-info/METADATA +418 -0
lfguard-0.1.0.dist-info/RECORD +18 -0
lfguard-0.1.0.dist-info/WHEEL +5 -0
lfguard-0.1.0.dist-info/entry_points.txt +3 -0
lfguard-0.1.0.dist-info/licenses/LICENSE +156 -0
lfguard-0.1.0.dist-info/top_level.txt +1 -0

lakeformation_guard/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""Public API for lfguard."""
+from .audit import AuditFinding, audit
+from ._version import __version__
+from .lint import LintFinding, lint_desired
+from .models import (
+    CurrentState,
+    DesiredState,
+    Grant,
+    GuardrailState,
+    LFTagDefinition,
+    LFTagValue,
+    ResourceRef,
+    ResourceTagAssignment,
+)
+from .planner import Change, Plan, PlanOptions, plan
+from .schema import state_json_schema
+__all__ = [
+    "AuditFinding",
+    "Change",
+    "CurrentState",
+    "DesiredState",
+    "Grant",
+    "GuardrailState",
+    "LFTagDefinition",
+    "LFTagValue",
+    "LintFinding",
+    "Plan",
+    "PlanOptions",
+    "ResourceRef",
+    "ResourceTagAssignment",
+    "__version__",
+    "audit",
+    "lint_desired",
+    "plan",
+    "state_json_schema",
+]

lakeformation_guard/__main__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Module entry point for ``python -m lakeformation_guard``."""
+from .cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

lakeformation_guard/_version.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Package version."""
+__version__ = "0.1.0"

lakeformation_guard/audit.py ADDED Viewed

@@ -0,0 +1,180 @@
+"""Audit findings for desired and current Lake Formation state."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict, List, Mapping, Tuple
+from .models import CurrentState, DesiredState, Grant, ResourceRef
+from .planner import _grant_index, _grant_target, _lf_tag_index, _resource_tag_index
+@dataclass(frozen=True)
+class AuditFinding:
+    """A drift or policy finding detected during guardrail audit."""
+    code: str
+    severity: str
+    target: str
+    message: str
+    details: Mapping[str, Any]
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "code": self.code,
+            "severity": self.severity,
+            "target": self.target,
+            "message": self.message,
+            "details": dict(self.details),
+        }
+def audit(desired: DesiredState, current: CurrentState) -> Tuple[AuditFinding, ...]:
+    """Return drift findings without making any changes."""
+    findings: List[AuditFinding] = []
+    findings.extend(_audit_lf_tags(desired, current))
+    findings.extend(_audit_resource_tags(desired, current))
+    findings.extend(_audit_grants(desired, current))
+    return tuple(findings)
+def _audit_lf_tags(desired: DesiredState, current: CurrentState) -> List[AuditFinding]:
+    findings: List[AuditFinding] = []
+    desired_tags = _lf_tag_index(desired.lf_tags)
+    current_tags = _lf_tag_index(current.lf_tags)
+    for key, desired_tag in sorted(desired_tags.items()):
+        current_tag = current_tags.get(key)
+        if current_tag is None:
+            findings.append(
+                AuditFinding(
+                    code="LF_TAG_MISSING",
+                    severity="error",
+                    target="lf_tag:{}".format(key),
+                    message="Desired LF-Tag is missing",
+                    details={"tag_key": key, "desired_values": list(desired_tag.values)},
+                )
+            )
+            continue
+        missing = sorted(set(desired_tag.values) - set(current_tag.values))
+        if missing:
+            findings.append(
+                AuditFinding(
+                    code="LF_TAG_VALUES_MISSING",
+                    severity="error",
+                    target="lf_tag:{}".format(key),
+                    message="Desired LF-Tag values are missing",
+                    details={"tag_key": key, "missing_values": missing},
+                )
+            )
+        extra = sorted(set(current_tag.values) - set(desired_tag.values))
+        if extra:
+            findings.append(
+                AuditFinding(
+                    code="LF_TAG_VALUES_UNMANAGED",
+                    severity="warning",
+                    target="lf_tag:{}".format(key),
+                    message="Current LF-Tag has values not present in desired state",
+                    details={"tag_key": key, "unmanaged_values": extra},
+                )
+            )
+    return findings
+def _audit_resource_tags(desired: DesiredState, current: CurrentState) -> List[AuditFinding]:
+    findings: List[AuditFinding] = []
+    desired_by_resource = _resource_tag_index(desired.resource_tags)
+    current_by_resource = _resource_tag_index(current.resource_tags)
+    for resource, desired_tags in sorted(desired_by_resource.items(), key=lambda item: item[0].identity):
+        current_tags = current_by_resource.get(resource, {})
+        for key, desired_values in sorted(desired_tags.items()):
+            current_values = current_tags.get(key, frozenset())
+            missing = sorted(desired_values - current_values)
+            if missing:
+                findings.append(
+                    AuditFinding(
+                        code="RESOURCE_TAG_VALUES_MISSING",
+                        severity="error",
+                        target=resource.identity,
+                        message="Resource is missing desired LF-Tag values",
+                        details={"resource": resource.to_dict(), "tag_key": key, "missing_values": missing},
+                    )
+                )
+            extra = sorted(current_values - desired_values)
+            if extra:
+                findings.append(
+                    AuditFinding(
+                        code="RESOURCE_TAG_VALUES_UNMANAGED",
+                        severity="warning",
+                        target=resource.identity,
+                        message="Resource has LF-Tag values not present in desired state",
+                        details={"resource": resource.to_dict(), "tag_key": key, "unmanaged_values": extra},
+                    )
+                )
+        unmanaged_keys = sorted(set(current_tags) - set(desired_tags))
+        for key in unmanaged_keys:
+            findings.append(
+                AuditFinding(
+                    code="RESOURCE_TAG_KEY_UNMANAGED",
+                    severity="warning",
+                    target=resource.identity,
+                    message="Resource has LF-Tag key not present in desired state",
+                    details={"resource": resource.to_dict(), "tag_key": key, "unmanaged_values": sorted(current_tags[key])},
+                )
+            )
+    return findings
+def _audit_grants(desired: DesiredState, current: CurrentState) -> List[AuditFinding]:
+    findings: List[AuditFinding] = []
+    desired_grants = _grant_index(desired.grants)
+    current_grants = _grant_index(current.grants)
+    for identity, desired_grant in sorted(desired_grants.items(), key=lambda item: _grant_sort_key(item[0])):
+        current_grant = current_grants.get(identity)
+        if current_grant is None:
+            findings.append(_grant_finding("GRANT_MISSING", "error", desired_grant, "Principal grant is missing", {
+                "missing_permissions": list(desired_grant.permissions),
+                "missing_grantable_permissions": list(desired_grant.grantable_permissions),
+            }))
+            continue
+        missing_permissions = sorted(set(desired_grant.permissions) - set(current_grant.permissions))
+        missing_grantables = sorted(set(desired_grant.grantable_permissions) - set(current_grant.grantable_permissions))
+        if missing_permissions or missing_grantables:
+            findings.append(_grant_finding("GRANT_PERMISSIONS_MISSING", "error", desired_grant, "Principal is missing desired permissions", {
+                "missing_permissions": missing_permissions,
+                "missing_grantable_permissions": missing_grantables,
+            }))
+        extra_permissions = sorted(set(current_grant.permissions) - set(desired_grant.permissions))
+        extra_grantables = sorted(set(current_grant.grantable_permissions) - set(desired_grant.grantable_permissions))
+        if extra_permissions or extra_grantables:
+            findings.append(_grant_finding("GRANT_PERMISSIONS_UNMANAGED", "warning", current_grant, "Principal has permissions not present in desired state", {
+                "unmanaged_permissions": extra_permissions,
+                "unmanaged_grantable_permissions": extra_grantables,
+            }))
+    for identity, current_grant in sorted(current_grants.items(), key=lambda item: _grant_sort_key(item[0])):
+        if identity not in desired_grants:
+            findings.append(_grant_finding("GRANT_UNMANAGED", "warning", current_grant, "Principal grant is not present in desired state", {
+                "permissions": list(current_grant.permissions),
+                "grantable_permissions": list(current_grant.grantable_permissions),
+            }))
+    return findings
+def _grant_finding(code: str, severity: str, grant: Grant, message: str, details: Mapping[str, Any]) -> AuditFinding:
+    enriched = dict(details)
+    enriched["principal"] = grant.principal
+    enriched["resource"] = grant.resource.to_dict()
+    return AuditFinding(
+        code=code,
+        severity=severity,
+        target=_grant_target(grant),
+        message=message,
+        details=enriched,
+    )
+def _grant_sort_key(identity: Tuple[str, ResourceRef]) -> str:
+    return "{}:{}".format(identity[0], identity[1].identity)

lakeformation_guard/aws.py ADDED Viewed

@@ -0,0 +1,296 @@
+"""Optional boto3 adapter for live Lake Formation inventory and apply."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Mapping, Optional
+from .models import CurrentState, DesiredState, Grant, LFTagDefinition, ResourceRef, ResourceTagAssignment
+from .planner import Change, Plan
+@dataclass(frozen=True)
+class ApplyResult:
+    """Result of applying or dry-running a single change."""
+    action: str
+    target: str
+    applied: bool
+    response: Mapping[str, Any]
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "action": self.action,
+            "target": self.target,
+            "applied": self.applied,
+            "response": dict(self.response),
+        }
+class AWSLakeFormationAdapter:
+    """Thin boto3-backed adapter for Lake Formation operations."""
+    def __init__(self, lakeformation_client: Any, *, catalog_id: Optional[str] = None) -> None:
+        self.lakeformation = lakeformation_client
+        self.catalog_id = catalog_id
+    @classmethod
+    def from_boto3(
+        cls,
+        *,
+        profile_name: Optional[str] = None,
+        region_name: Optional[str] = None,
+        catalog_id: Optional[str] = None,
+    ) -> "AWSLakeFormationAdapter":
+        try:
+            import boto3  # type: ignore
+        except ImportError as exc:
+            raise RuntimeError(
+                "boto3 is required for live AWS operations. Install lfguard[aws]."
+            ) from exc
+        session = boto3.Session(profile_name=profile_name, region_name=region_name)
+        return cls(session.client("lakeformation"), catalog_id=catalog_id)
+    def load_current_state_for(self, desired: DesiredState) -> CurrentState:
+        """Load only the current AWS surface needed to compare with desired state."""
+        lf_tags = list(self._load_lf_tags(desired))
+        resource_tags = list(self._load_resource_tags(desired))
+        grants = list(self._load_grants(desired))
+        return CurrentState(lf_tags=tuple(lf_tags), resource_tags=tuple(resource_tags), grants=tuple(grants))
+    def apply(self, change_plan: Plan, *, dry_run: bool = True, allow_destructive: bool = False) -> List[ApplyResult]:
+        results: List[ApplyResult] = []
+        for change in change_plan.executable_changes(allow_destructive=allow_destructive):
+            if dry_run:
+                results.append(ApplyResult(change.action, change.target, False, {"dry_run": True}))
+                continue
+            results.append(self._apply_change(change))
+        return results
+    def _load_lf_tags(self, desired: DesiredState) -> Iterable[LFTagDefinition]:
+        for tag in desired.lf_tags:
+            kwargs = self._with_catalog_id({"TagKey": tag.key})
+            try:
+                response = self.lakeformation.get_lf_tag(**kwargs)
+            except Exception as exc:
+                if _is_not_found(exc):
+                    continue
+                raise
+            values = response.get("TagValues", ())
+            if values:
+                yield LFTagDefinition(tag.key, tuple(values))
+    def _load_resource_tags(self, desired: DesiredState) -> Iterable[ResourceTagAssignment]:
+        resources = {assignment.resource for assignment in desired.resource_tags}
+        resources.update(grant.resource for grant in desired.grants if grant.resource.kind != "lf_tag_policy")
+        for resource in sorted(resources):
+            kwargs = self._with_catalog_id({"Resource": to_lf_resource(resource)})
+            try:
+                response = self.lakeformation.get_resource_lf_tags(**kwargs)
+            except Exception as exc:
+                if _is_not_found(exc):
+                    continue
+                raise
+            tags = _extract_resource_tags(response)
+            if tags:
+                yield ResourceTagAssignment(resource=resource, tags=tags)
+    def _load_grants(self, desired: DesiredState) -> Iterable[Grant]:
+        seen = set()
+        for desired_grant in desired.grants:
+            key = desired_grant.identity
+            if key in seen:
+                continue
+            seen.add(key)
+            kwargs = {
+                "Principal": {"DataLakePrincipalIdentifier": desired_grant.principal},
+                "Resource": to_lf_resource(desired_grant.resource),
+                "MaxResults": 100,
+            }
+            kwargs = self._with_catalog_id(kwargs)
+            for item in self._list_permissions(kwargs):
+                principal = item.get("Principal", {}).get("DataLakePrincipalIdentifier", desired_grant.principal)
+                resource = from_lf_resource(item.get("Resource", {})) or desired_grant.resource
+                permissions = tuple(item.get("Permissions", ()))
+                grantables = tuple(item.get("PermissionsWithGrantOption", ()))
+                if permissions:
+                    yield Grant(principal=principal, resource=resource, permissions=permissions, grantable_permissions=grantables)
+    def _list_permissions(self, kwargs: Mapping[str, Any]) -> Iterable[Mapping[str, Any]]:
+        if hasattr(self.lakeformation, "get_paginator"):
+            try:
+                paginator = self.lakeformation.get_paginator("list_permissions")
+                for page in paginator.paginate(**dict(kwargs)):
+                    for item in page.get("PrincipalResourcePermissions", ()):
+                        yield item
+                return
+            except Exception as exc:
+                if not _is_operation_not_pageable(exc):
+                    raise
+        next_token = None
+        while True:
+            request = dict(kwargs)
+            if next_token:
+                request["NextToken"] = next_token
+            response = self.lakeformation.list_permissions(**request)
+            for item in response.get("PrincipalResourcePermissions", ()):
+                yield item
+            next_token = response.get("NextToken")
+            if not next_token:
+                break
+    def _apply_change(self, change: Change) -> ApplyResult:
+        action = change.action
+        payload = dict(change.payload)
+        if action == "lf_tag.create":
+            response = self.lakeformation.create_lf_tag(**self._with_catalog_id({
+                "TagKey": payload["tag_key"],
+                "TagValues": payload["tag_values"],
+            }))
+        elif action == "lf_tag.add_values":
+            response = self.lakeformation.update_lf_tag(**self._with_catalog_id({
+                "TagKey": payload["tag_key"],
+                "TagValuesToAdd": payload["tag_values"],
+            }))
+        elif action == "lf_tag.remove_values":
+            response = self.lakeformation.update_lf_tag(**self._with_catalog_id({
+                "TagKey": payload["tag_key"],
+                "TagValuesToDelete": payload["tag_values"],
+            }))
+        elif action == "resource_tag.add_values":
+            response = self.lakeformation.add_lf_tags_to_resource(**self._with_catalog_id({
+                "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
+                "LFTags": _lf_tag_pairs(payload["tags"]),
+            }))
+        elif action == "resource_tag.remove_values":
+            response = self.lakeformation.remove_lf_tags_from_resource(**self._with_catalog_id({
+                "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
+                "LFTags": _lf_tag_pairs(payload["tags"]),
+            }))
+        elif action == "grant.add_permissions":
+            response = self.lakeformation.grant_permissions(**self._with_catalog_id({
+                "Principal": {"DataLakePrincipalIdentifier": payload["principal"]},
+                "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
+                "Permissions": payload.get("permissions", ()),
+                "PermissionsWithGrantOption": payload.get("grantable_permissions", ()),
+            }))
+        elif action == "grant.revoke_permissions":
+            response = self.lakeformation.revoke_permissions(**self._with_catalog_id({
+                "Principal": {"DataLakePrincipalIdentifier": payload["principal"]},
+                "Resource": to_lf_resource(ResourceRef.from_dict(payload["resource"])),
+                "Permissions": payload.get("permissions", ()),
+                "PermissionsWithGrantOption": payload.get("grantable_permissions", ()),
+            }))
+        else:
+            raise ValueError("Unsupported change action: {}".format(action))
+        return ApplyResult(action=change.action, target=change.target, applied=True, response=response or {})
+    def _with_catalog_id(self, kwargs: Mapping[str, Any]) -> Dict[str, Any]:
+        request = dict(kwargs)
+        if self.catalog_id:
+            request.setdefault("CatalogId", self.catalog_id)
+        return request
+def to_lf_resource(resource: ResourceRef) -> Dict[str, Any]:
+    if resource.kind == "catalog":
+        return {"Catalog": {}}
+    if resource.kind == "database":
+        return {"Database": _catalog_scoped({"Name": resource.database_name}, resource)}
+    if resource.kind == "table":
+        return {"Table": _catalog_scoped({
+            "DatabaseName": resource.database_name,
+            "Name": resource.table_name,
+        }, resource)}
+    if resource.kind == "table_with_columns":
+        return {"TableWithColumns": _catalog_scoped({
+            "DatabaseName": resource.database_name,
+            "Name": resource.table_name,
+            "ColumnNames": list(resource.columns),
+        }, resource)}
+    if resource.kind == "data_location":
+        return {"DataLocation": _catalog_scoped({"ResourceArn": resource.location}, resource)}
+    if resource.kind == "lf_tag_policy":
+        return {
+            "LFTagPolicy": {
+                "ResourceType": resource.resource_type,
+                "Expression": [
+                    {"TagKey": item.key, "TagValues": list(item.values)}
+                    for item in resource.expression
+                ],
+            }
+        }
+    raise ValueError("Unsupported resource kind: {}".format(resource.kind))
+def from_lf_resource(raw: Mapping[str, Any]) -> Optional[ResourceRef]:
+    if "Catalog" in raw:
+        return ResourceRef(kind="catalog")
+    if "Database" in raw:
+        item = raw["Database"]
+        return ResourceRef(kind="database", database_name=item.get("Name"), catalog_id=item.get("CatalogId"))
+    if "Table" in raw:
+        item = raw["Table"]
+        return ResourceRef(kind="table", database_name=item.get("DatabaseName"), table_name=item.get("Name"), catalog_id=item.get("CatalogId"))
+    if "TableWithColumns" in raw:
+        item = raw["TableWithColumns"]
+        return ResourceRef(
+            kind="table_with_columns",
+            database_name=item.get("DatabaseName"),
+            table_name=item.get("Name"),
+            columns=tuple(item.get("ColumnNames", ())),
+            catalog_id=item.get("CatalogId"),
+        )
+    if "DataLocation" in raw:
+        item = raw["DataLocation"]
+        return ResourceRef(kind="data_location", location=item.get("ResourceArn"), catalog_id=item.get("CatalogId"))
+    if "LFTagPolicy" in raw:
+        item = raw["LFTagPolicy"]
+        return ResourceRef.from_dict({
+            "kind": "lf_tag_policy",
+            "resource_type": item.get("ResourceType"),
+            "expression": {
+                expr.get("TagKey"): expr.get("TagValues", ())
+                for expr in item.get("Expression", ())
+            },
+        })
+    return None
+def _catalog_scoped(data: Mapping[str, Any], resource: ResourceRef) -> Dict[str, Any]:
+    result = {key: value for key, value in data.items() if value not in (None, "")}
+    if resource.catalog_id:
+        result["CatalogId"] = resource.catalog_id
+    return result
+def _lf_tag_pairs(tags: Mapping[str, Iterable[str]]) -> List[Dict[str, Any]]:
+    return [{"TagKey": key, "TagValues": list(values)} for key, values in sorted(tags.items())]
+def _extract_resource_tags(response: Mapping[str, Any]) -> Dict[str, frozenset]:
+    tags: Dict[str, set] = {}
+    for key in ("LFTagOnDatabase", "LFTagsOnTable"):
+        _merge_lf_tag_pairs(tags, response.get(key, ()))
+    for column_tags in response.get("LFTagsOnColumns", ()):
+        _merge_lf_tag_pairs(tags, column_tags.get("LFTags", ()))
+    return {key: frozenset(values) for key, values in tags.items()}
+def _merge_lf_tag_pairs(target: Dict[str, set], pairs: Iterable[Mapping[str, Any]]) -> None:
+    for pair in pairs:
+        key = pair.get("TagKey")
+        values = pair.get("TagValues", ())
+        if key and values:
+            target.setdefault(str(key), set()).update(str(value) for value in values)
+def _is_not_found(exc: Exception) -> bool:
+    response = getattr(exc, "response", {})
+    code = response.get("Error", {}).get("Code") if isinstance(response, Mapping) else None
+    return code in {"EntityNotFoundException", "ResourceNotFoundException", "GlueEncryptionException"}
+def _is_operation_not_pageable(exc: Exception) -> bool:
+    return exc.__class__.__name__ in {"OperationNotPageableError", "PaginationError"}