labrun-checks 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labrun_checks-0.2.0/PKG-INFO +10 -0
- labrun_checks-0.2.0/README.md +70 -0
- labrun_checks-0.2.0/pyproject.toml +22 -0
- labrun_checks-0.2.0/setup.cfg +4 -0
- labrun_checks-0.2.0/src/labrun_checks/__init__.py +69 -0
- labrun_checks-0.2.0/src/labrun_checks/_api.py +47 -0
- labrun_checks-0.2.0/src/labrun_checks/_cleanup.py +97 -0
- labrun_checks-0.2.0/src/labrun_checks/_state.py +25 -0
- labrun_checks-0.2.0/src/labrun_checks/_types.py +56 -0
- labrun_checks-0.2.0/src/labrun_checks/adapters/__init__.py +23 -0
- labrun_checks-0.2.0/src/labrun_checks/adapters/aws.py +256 -0
- labrun_checks-0.2.0/src/labrun_checks/adapters/azure.py +9 -0
- labrun_checks-0.2.0/src/labrun_checks/adapters/databricks.py +9 -0
- labrun_checks-0.2.0/src/labrun_checks/adapters/snowflake.py +9 -0
- labrun_checks-0.2.0/src/labrun_checks.egg-info/PKG-INFO +10 -0
- labrun_checks-0.2.0/src/labrun_checks.egg-info/SOURCES.txt +19 -0
- labrun_checks-0.2.0/src/labrun_checks.egg-info/dependency_links.txt +1 -0
- labrun_checks-0.2.0/src/labrun_checks.egg-info/requires.txt +6 -0
- labrun_checks-0.2.0/src/labrun_checks.egg-info/top_level.txt +1 -0
- labrun_checks-0.2.0/tests/test_checks.py +287 -0
- labrun_checks-0.2.0/tests/test_cleanup.py +270 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: labrun-checks
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Checkpoint validation for Labrun labs
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Requires-Dist: requests>=2.28
|
|
7
|
+
Requires-Dist: boto3>=1.26
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
10
|
+
Requires-Dist: responses>=0.23; extra == "dev"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# labrun-checks
|
|
2
|
+
|
|
3
|
+
Checkpoint validation and cleanup verification for labrun labs.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
pip install labrun-checks
|
|
7
|
+
|
|
8
|
+
## Usage in notebooks
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
from labrun_checks import register, check, cleanup, run_cleanup
|
|
12
|
+
from labrun_checks import CleanupResource
|
|
13
|
+
|
|
14
|
+
# 1. Register session (called once in setup cell)
|
|
15
|
+
register(
|
|
16
|
+
session_token=TOKEN,
|
|
17
|
+
lab_id="s3-raw-ingestion",
|
|
18
|
+
credentials={
|
|
19
|
+
"aws_access_key_id": AWS_KEY,
|
|
20
|
+
"aws_secret_access_key": AWS_SECRET,
|
|
21
|
+
"region": "us-east-1",
|
|
22
|
+
},
|
|
23
|
+
)
|
|
24
|
+
# credentials is optional. When provided, it attaches to the session and is
|
|
25
|
+
# available inside checkpoint validators as session.credentials.
|
|
26
|
+
|
|
27
|
+
# 2. Run checkpoint validations
|
|
28
|
+
result = check(1, my_validator_fn)
|
|
29
|
+
|
|
30
|
+
# 3. Run three-phase cleanup
|
|
31
|
+
CLEANUP_MANIFEST = [
|
|
32
|
+
CleanupResource(type="s3_bucket", id="my-bucket", cli_delete_command="aws s3 rb s3://my-bucket --force"),
|
|
33
|
+
CleanupResource(type="glue_database", id="my_db", cli_delete_command="aws glue delete-database --name my_db"),
|
|
34
|
+
]
|
|
35
|
+
cleanup_result = run_cleanup(CLEANUP_MANIFEST)
|
|
36
|
+
|
|
37
|
+
# 4. Report cleanup status
|
|
38
|
+
cleanup(status=cleanup_result.status)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Cleanup protocol
|
|
42
|
+
|
|
43
|
+
`run_cleanup` implements the three-phase protocol from RESOURCE-SAFETY-SPEC Section 3.6:
|
|
44
|
+
|
|
45
|
+
1. **Teardown**: Delete every resource in the manifest. Already-gone errors are swallowed. Other errors surface with the exact CLI command to run manually.
|
|
46
|
+
2. **Verification**: Describe every resource to confirm deletion. Any resource still alive is reported.
|
|
47
|
+
3. **Tag audit**: Query AWS Resource Groups Tagging API for resources tagged with this lab. Orphans from previous attempts are reported.
|
|
48
|
+
|
|
49
|
+
Returns a `CleanupResult` with `status` ("clean" or "dirty"), `warnings`, and `orphans`.
|
|
50
|
+
|
|
51
|
+
## Adapters
|
|
52
|
+
|
|
53
|
+
The cleanup protocol uses dependency-injected adapters. `AwsCleanupAdapter` is the default. Custom adapters must implement the `CleanupAdapter` protocol:
|
|
54
|
+
|
|
55
|
+
- `delete_resource(credentials, resource) -> None`
|
|
56
|
+
- `describe_resource(credentials, resource) -> bool`
|
|
57
|
+
- `tag_scan(credentials, lab_slug, region) -> list[str]`
|
|
58
|
+
|
|
59
|
+
## Supported resource types
|
|
60
|
+
|
|
61
|
+
- `s3_bucket`
|
|
62
|
+
- `glue_database`
|
|
63
|
+
- `glue_crawler`
|
|
64
|
+
- `glue_table` (requires `parent` field for database name)
|
|
65
|
+
- `kinesis_stream`
|
|
66
|
+
- `iam_role` (detaches managed policies and deletes inline policies before deleting the role)
|
|
67
|
+
|
|
68
|
+
## Version
|
|
69
|
+
|
|
70
|
+
0.2.0
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "labrun-checks"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "Checkpoint validation for Labrun labs"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"requests>=2.28",
|
|
12
|
+
"boto3>=1.26",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = [
|
|
17
|
+
"pytest>=7",
|
|
18
|
+
"responses>=0.23",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[tool.setuptools.packages.find]
|
|
22
|
+
where = ["src"]
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
from ._api import post_checkpoint, post_cleanup_complete
|
|
6
|
+
from ._cleanup import run_cleanup
|
|
7
|
+
from ._state import get_session, set_session
|
|
8
|
+
from ._types import (
|
|
9
|
+
CheckpointResult,
|
|
10
|
+
CheckpointSession,
|
|
11
|
+
CleanupAdapter,
|
|
12
|
+
CleanupResource,
|
|
13
|
+
CleanupResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"register",
|
|
18
|
+
"check",
|
|
19
|
+
"cleanup",
|
|
20
|
+
"run_cleanup",
|
|
21
|
+
"CheckpointResult",
|
|
22
|
+
"CheckpointSession",
|
|
23
|
+
"CleanupAdapter",
|
|
24
|
+
"CleanupResource",
|
|
25
|
+
"CleanupResult",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def register(session_token: str, lab_id: str, credentials: dict | None = None) -> None:
|
|
30
|
+
set_session(CheckpointSession(session_token=session_token, lab_id=lab_id, credentials=credentials or {}))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def check(checkpoint_id: int, validator_fn: Callable[[CheckpointSession], CheckpointResult]) -> CheckpointResult:
|
|
34
|
+
session = get_session()
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
result = validator_fn(session)
|
|
38
|
+
except Exception as exc:
|
|
39
|
+
result = CheckpointResult("error", error_reason=repr(exc))
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
post_checkpoint(session.session_token, session.lab_id, checkpoint_id, result)
|
|
43
|
+
except Exception as exc:
|
|
44
|
+
return CheckpointResult(
|
|
45
|
+
"error",
|
|
46
|
+
error_reason=f"Network error recording checkpoint: {repr(exc)}",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def cleanup(status: str) -> CheckpointResult:
|
|
53
|
+
if status not in ("clean", "dirty"):
|
|
54
|
+
return CheckpointResult(
|
|
55
|
+
"error",
|
|
56
|
+
error_reason=f"status must be 'clean' or 'dirty', got {status!r}",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
session = get_session()
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
post_cleanup_complete(session.session_token, session.lab_id, status)
|
|
63
|
+
except Exception as exc:
|
|
64
|
+
return CheckpointResult(
|
|
65
|
+
"error",
|
|
66
|
+
error_reason=f"Network error posting cleanup event: {repr(exc)}",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return CheckpointResult("pass" if status == "clean" else "fail")
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from ._types import CheckpointResult
|
|
9
|
+
|
|
10
|
+
_BASE_URL = os.environ.get("LABRUN_API_BASE_URL", "https://labrun.dev")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def post_checkpoint(
|
|
14
|
+
session_token: str,
|
|
15
|
+
lab_id: str,
|
|
16
|
+
checkpoint_id: int,
|
|
17
|
+
result: CheckpointResult,
|
|
18
|
+
) -> None:
|
|
19
|
+
payload = {
|
|
20
|
+
"lab_id": lab_id,
|
|
21
|
+
"checkpoint_id": checkpoint_id,
|
|
22
|
+
"status": result.status,
|
|
23
|
+
"timestamp": int(time.time()),
|
|
24
|
+
}
|
|
25
|
+
headers = {"Authorization": f"Bearer {session_token}"}
|
|
26
|
+
response = requests.post(
|
|
27
|
+
f"{_BASE_URL}/api/labs/checkpoint",
|
|
28
|
+
json=payload,
|
|
29
|
+
headers=headers,
|
|
30
|
+
timeout=10,
|
|
31
|
+
)
|
|
32
|
+
response.raise_for_status()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def post_cleanup_complete(session_token: str, lab_id: str, status: str) -> None:
|
|
36
|
+
payload = {
|
|
37
|
+
"lab_id": lab_id,
|
|
38
|
+
"status": status,
|
|
39
|
+
}
|
|
40
|
+
headers = {"Authorization": f"Bearer {session_token}"}
|
|
41
|
+
response = requests.post(
|
|
42
|
+
f"{_BASE_URL}/api/labs/cleanup-complete",
|
|
43
|
+
json=payload,
|
|
44
|
+
headers=headers,
|
|
45
|
+
timeout=10,
|
|
46
|
+
)
|
|
47
|
+
response.raise_for_status()
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Three-phase cleanup orchestrator per RESOURCE-SAFETY-SPEC Section 3.6."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from ._state import get_session
|
|
5
|
+
from ._types import CleanupAdapter, CleanupResource, CleanupResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def run_cleanup(
|
|
9
|
+
manifest: list[CleanupResource],
|
|
10
|
+
adapter: CleanupAdapter | None = None,
|
|
11
|
+
) -> CleanupResult:
|
|
12
|
+
"""Run the three-phase cleanup protocol.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
manifest: Resources to clean up, ordered critical-first.
|
|
16
|
+
adapter: Cloud adapter implementing delete/describe/tag_scan.
|
|
17
|
+
Defaults to AwsCleanupAdapter if None.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
CleanupResult with status 'clean' or 'dirty', plus any warnings/orphans.
|
|
21
|
+
"""
|
|
22
|
+
session = get_session()
|
|
23
|
+
lab_slug = session.lab_id
|
|
24
|
+
|
|
25
|
+
if adapter is None:
|
|
26
|
+
from .adapters.aws import AwsCleanupAdapter
|
|
27
|
+
adapter = AwsCleanupAdapter()
|
|
28
|
+
|
|
29
|
+
credentials = session.credentials
|
|
30
|
+
warnings: list[str] = []
|
|
31
|
+
orphans: list[str] = []
|
|
32
|
+
|
|
33
|
+
# Phase 1: Teardown
|
|
34
|
+
for resource in manifest:
|
|
35
|
+
try:
|
|
36
|
+
adapter.delete_resource(credentials, resource)
|
|
37
|
+
except Exception as exc:
|
|
38
|
+
cmd = resource.cli_delete_command or f"(no CLI command provided for {resource.type} {resource.id!r})"
|
|
39
|
+
warnings.append(
|
|
40
|
+
f"FAILED TO DELETE: {resource.type} {resource.id!r}. "
|
|
41
|
+
f"Error: {exc}. Run manually: {cmd}"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Phase 2: Verification scan
|
|
45
|
+
for resource in manifest:
|
|
46
|
+
try:
|
|
47
|
+
still_exists = adapter.describe_resource(credentials, resource)
|
|
48
|
+
except Exception as exc:
|
|
49
|
+
warnings.append(
|
|
50
|
+
f"VERIFICATION ERROR: Could not check {resource.type} {resource.id!r}. "
|
|
51
|
+
f"Error: {exc}"
|
|
52
|
+
)
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
if still_exists:
|
|
56
|
+
cmd = resource.cli_delete_command or f"(no CLI command provided for {resource.type} {resource.id!r})"
|
|
57
|
+
warnings.append(
|
|
58
|
+
f"WARNING: {resource.type} {resource.id!r} is still alive. "
|
|
59
|
+
f"Run this to kill it: {cmd}"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Phase 3: Tag audit
|
|
63
|
+
region = credentials.get("region", "us-east-1")
|
|
64
|
+
manifest_ids = {r.id for r in manifest}
|
|
65
|
+
try:
|
|
66
|
+
tagged_arns = adapter.tag_scan(credentials, lab_slug, region)
|
|
67
|
+
except Exception as exc:
|
|
68
|
+
warnings.append(f"TAG AUDIT ERROR: Could not scan tags. Error: {exc}")
|
|
69
|
+
tagged_arns = []
|
|
70
|
+
|
|
71
|
+
for arn in tagged_arns:
|
|
72
|
+
resource_id = _extract_resource_id_from_arn(arn)
|
|
73
|
+
if resource_id not in manifest_ids:
|
|
74
|
+
orphans.append(
|
|
75
|
+
f"ORPHAN FOUND: {arn} is tagged as belonging to this lab "
|
|
76
|
+
f"but was not in this session's manifest. It may be left over "
|
|
77
|
+
f"from a previous attempt."
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
status = "clean" if (not warnings and not orphans) else "dirty"
|
|
81
|
+
return CleanupResult(status=status, warnings=warnings, orphans=orphans)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _extract_resource_id_from_arn(arn: str) -> str:
|
|
85
|
+
"""Best-effort extraction of a human-readable resource ID from an ARN.
|
|
86
|
+
|
|
87
|
+
ARN formats vary by service:
|
|
88
|
+
arn:aws:s3:::bucket-name -> bucket-name
|
|
89
|
+
arn:aws:glue:region:acct:database/db-name -> db-name
|
|
90
|
+
arn:aws:kinesis:region:acct:stream/name -> name
|
|
91
|
+
Falls back to the full ARN if no pattern matches.
|
|
92
|
+
"""
|
|
93
|
+
if ":::" in arn:
|
|
94
|
+
return arn.split(":::")[-1]
|
|
95
|
+
if "/" in arn:
|
|
96
|
+
return arn.split("/")[-1]
|
|
97
|
+
return arn
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from ._types import CheckpointSession
|
|
6
|
+
|
|
7
|
+
_session: Optional[CheckpointSession] = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_session() -> CheckpointSession:
|
|
11
|
+
if _session is None:
|
|
12
|
+
raise RuntimeError(
|
|
13
|
+
"Call labrun_checks.register(session_token, lab_id) before using check() or cleanup()."
|
|
14
|
+
)
|
|
15
|
+
return _session
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def set_session(session: CheckpointSession) -> None:
|
|
19
|
+
global _session
|
|
20
|
+
_session = session
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def clear() -> None:
|
|
24
|
+
global _session
|
|
25
|
+
_session = None
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Literal, Optional, Protocol, runtime_checkable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Status = Literal["pass", "fail", "error"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class CheckpointResult:
|
|
12
|
+
status: Status
|
|
13
|
+
hint_tier_available: int = 0
|
|
14
|
+
error_reason: Optional[str] = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class CheckpointSession:
|
|
19
|
+
session_token: str
|
|
20
|
+
lab_id: str
|
|
21
|
+
credentials: dict = field(default_factory=dict)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class CleanupResource:
|
|
26
|
+
"""Single resource to be cleaned up. Ordering in manifest is critical-first."""
|
|
27
|
+
type: str
|
|
28
|
+
id: str
|
|
29
|
+
region: str = "us-east-1"
|
|
30
|
+
parent: Optional[str] = None
|
|
31
|
+
cli_delete_command: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class CleanupResult:
|
|
36
|
+
"""Returned by run_cleanup. Notebook passes result.status to cleanup()."""
|
|
37
|
+
status: Literal["clean", "dirty"]
|
|
38
|
+
warnings: list[str] = field(default_factory=list)
|
|
39
|
+
orphans: list[str] = field(default_factory=list)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@runtime_checkable
|
|
43
|
+
class CleanupAdapter(Protocol):
|
|
44
|
+
"""Interface that cloud-specific adapters must implement."""
|
|
45
|
+
|
|
46
|
+
def delete_resource(self, credentials: dict, resource: CleanupResource) -> None:
|
|
47
|
+
"""Delete a single resource. Raise on failure. Swallow already-gone errors."""
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
def describe_resource(self, credentials: dict, resource: CleanupResource) -> bool:
|
|
51
|
+
"""Return True if resource still exists, False if confirmed gone."""
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
def tag_scan(self, credentials: dict, lab_slug: str, region: str) -> list[str]:
|
|
55
|
+
"""Return list of ARNs tagged with labrun:lab-slug=<lab_slug>."""
|
|
56
|
+
...
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from .aws import (
|
|
2
|
+
AwsCleanupAdapter,
|
|
3
|
+
make_glue_client,
|
|
4
|
+
make_kinesis_client,
|
|
5
|
+
make_s3_client,
|
|
6
|
+
make_sts_client,
|
|
7
|
+
make_tagging_client,
|
|
8
|
+
)
|
|
9
|
+
from .azure import make_azure_resource_client
|
|
10
|
+
from .databricks import make_databricks_client
|
|
11
|
+
from .snowflake import make_snowflake_connection
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"AwsCleanupAdapter",
|
|
15
|
+
"make_s3_client",
|
|
16
|
+
"make_glue_client",
|
|
17
|
+
"make_kinesis_client",
|
|
18
|
+
"make_sts_client",
|
|
19
|
+
"make_tagging_client",
|
|
20
|
+
"make_databricks_client",
|
|
21
|
+
"make_snowflake_connection",
|
|
22
|
+
"make_azure_resource_client",
|
|
23
|
+
]
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""AWS boto3 adapter for labrun-checks checkpoints and cleanup."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Optional
|
|
5
|
+
|
|
6
|
+
import boto3
|
|
7
|
+
from botocore.exceptions import ClientError
|
|
8
|
+
|
|
9
|
+
from .._types import CleanupResource
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from mypy_boto3_s3 import S3Client
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
_account_id_cache: Optional[str] = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _make_client(service: str, credentials: dict):
|
|
19
|
+
return boto3.client(
|
|
20
|
+
service,
|
|
21
|
+
aws_access_key_id=credentials["aws_access_key_id"],
|
|
22
|
+
aws_secret_access_key=credentials["aws_secret_access_key"],
|
|
23
|
+
region_name=credentials.get("region", "us-east-1"),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def make_s3_client(credentials: dict) -> "S3Client":
|
|
28
|
+
return _make_client("s3", credentials)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def make_glue_client(credentials: dict):
|
|
32
|
+
return _make_client("glue", credentials)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def make_kinesis_client(credentials: dict):
|
|
36
|
+
return _make_client("kinesis", credentials)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def make_sts_client(credentials: dict):
|
|
40
|
+
return _make_client("sts", credentials)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def make_tagging_client(credentials: dict):
|
|
44
|
+
return _make_client("resourcegroupstaggingapi", credentials)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def make_iam_client(credentials: dict):
|
|
48
|
+
return _make_client("iam", credentials)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_account_id(credentials: dict) -> str:
|
|
52
|
+
global _account_id_cache
|
|
53
|
+
if _account_id_cache is None:
|
|
54
|
+
sts = make_sts_client(credentials)
|
|
55
|
+
_account_id_cache = sts.get_caller_identity()["Account"]
|
|
56
|
+
return _account_id_cache
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def clear_account_id_cache() -> None:
|
|
60
|
+
global _account_id_cache
|
|
61
|
+
_account_id_cache = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _is_already_gone(exc: ClientError) -> bool:
|
|
65
|
+
code = exc.response.get("Error", {}).get("Code", "")
|
|
66
|
+
gone_codes = {
|
|
67
|
+
"NoSuchBucket",
|
|
68
|
+
"NoSuchKey",
|
|
69
|
+
"NoSuchEntity",
|
|
70
|
+
"EntityNotFoundException",
|
|
71
|
+
"ResourceNotFoundException",
|
|
72
|
+
"ResourceNotFoundFault",
|
|
73
|
+
"StreamNotFoundFault",
|
|
74
|
+
}
|
|
75
|
+
return code in gone_codes
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class AwsCleanupAdapter:
|
|
79
|
+
"""Implements CleanupAdapter protocol for AWS resources."""
|
|
80
|
+
|
|
81
|
+
def delete_resource(self, credentials: dict, resource: CleanupResource) -> None:
|
|
82
|
+
try:
|
|
83
|
+
dispatcher = _DELETE_DISPATCH.get(resource.type)
|
|
84
|
+
if dispatcher is None:
|
|
85
|
+
raise ValueError(f"Unknown resource type: {resource.type!r}")
|
|
86
|
+
dispatcher(credentials, resource)
|
|
87
|
+
except ClientError as exc:
|
|
88
|
+
if _is_already_gone(exc):
|
|
89
|
+
return
|
|
90
|
+
raise
|
|
91
|
+
|
|
92
|
+
def describe_resource(self, credentials: dict, resource: CleanupResource) -> bool:
|
|
93
|
+
try:
|
|
94
|
+
dispatcher = _DESCRIBE_DISPATCH.get(resource.type)
|
|
95
|
+
if dispatcher is None:
|
|
96
|
+
raise ValueError(f"Unknown resource type: {resource.type!r}")
|
|
97
|
+
dispatcher(credentials, resource)
|
|
98
|
+
return True
|
|
99
|
+
except ClientError as exc:
|
|
100
|
+
if _is_already_gone(exc):
|
|
101
|
+
return False
|
|
102
|
+
raise
|
|
103
|
+
|
|
104
|
+
def tag_scan(self, credentials: dict, lab_slug: str, region: str) -> list[str]:
|
|
105
|
+
creds_with_region = {**credentials, "region": region}
|
|
106
|
+
client = make_tagging_client(creds_with_region)
|
|
107
|
+
tag_filters = [{"Key": "labrun:lab-slug", "Values": [lab_slug]}]
|
|
108
|
+
arns: list[str] = []
|
|
109
|
+
paginator = client.get_paginator("get_resources")
|
|
110
|
+
for page in paginator.paginate(TagFilters=tag_filters):
|
|
111
|
+
for item in page.get("ResourceTagMappingList", []):
|
|
112
|
+
arn = item.get("ResourceARN", "")
|
|
113
|
+
if _is_transitional_resource(creds_with_region, arn):
|
|
114
|
+
continue
|
|
115
|
+
arns.append(arn)
|
|
116
|
+
return arns
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _is_transitional_resource(credentials: dict, arn: str) -> bool:
|
|
120
|
+
"""Phase 3 false-positive guard: skip resources in a deleting state."""
|
|
121
|
+
try:
|
|
122
|
+
if ":s3:::" in arn:
|
|
123
|
+
bucket_name = arn.split(":::")[-1]
|
|
124
|
+
s3 = make_s3_client(credentials)
|
|
125
|
+
s3.head_bucket(Bucket=bucket_name)
|
|
126
|
+
return False
|
|
127
|
+
if ":glue:" in arn and "/database/" in arn:
|
|
128
|
+
db_name = arn.split("/database/")[-1].split("/")[0]
|
|
129
|
+
glue = make_glue_client(credentials)
|
|
130
|
+
glue.get_database(Name=db_name)
|
|
131
|
+
return False
|
|
132
|
+
except ClientError as exc:
|
|
133
|
+
if _is_already_gone(exc):
|
|
134
|
+
return True
|
|
135
|
+
return False
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _delete_s3_bucket(credentials: dict, resource: CleanupResource) -> None:
|
|
139
|
+
s3 = make_s3_client(credentials)
|
|
140
|
+
objects = s3.list_objects_v2(Bucket=resource.id)
|
|
141
|
+
for obj in objects.get("Contents", []):
|
|
142
|
+
s3.delete_object(Bucket=resource.id, Key=obj["Key"])
|
|
143
|
+
s3.delete_bucket(Bucket=resource.id)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _delete_glue_database(credentials: dict, resource: CleanupResource) -> None:
|
|
147
|
+
glue = make_glue_client(credentials)
|
|
148
|
+
glue.delete_database(Name=resource.id)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _delete_glue_crawler(credentials: dict, resource: CleanupResource) -> None:
|
|
152
|
+
glue = make_glue_client(credentials)
|
|
153
|
+
glue.delete_crawler(Name=resource.id)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _delete_glue_table(credentials: dict, resource: CleanupResource) -> None:
|
|
157
|
+
glue = make_glue_client(credentials)
|
|
158
|
+
if resource.parent is None:
|
|
159
|
+
raise ValueError(f"Glue table {resource.id!r} requires parent (database name)")
|
|
160
|
+
glue.delete_table(DatabaseName=resource.parent, Name=resource.id)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _delete_kinesis_stream(credentials: dict, resource: CleanupResource) -> None:
|
|
164
|
+
kinesis = make_kinesis_client(credentials)
|
|
165
|
+
kinesis.delete_stream(StreamName=resource.id, EnforceConsumerDeletion=True)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _delete_iam_role(credentials: dict, resource: CleanupResource) -> None:
|
|
169
|
+
iam = make_iam_client(credentials)
|
|
170
|
+
role_name = resource.id
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
attached = iam.list_attached_role_policies(RoleName=role_name)
|
|
174
|
+
except ClientError as exc:
|
|
175
|
+
if _is_already_gone(exc):
|
|
176
|
+
return
|
|
177
|
+
raise
|
|
178
|
+
for policy in attached.get("AttachedPolicies", []):
|
|
179
|
+
try:
|
|
180
|
+
iam.detach_role_policy(RoleName=role_name, PolicyArn=policy["PolicyArn"])
|
|
181
|
+
except ClientError as exc:
|
|
182
|
+
if _is_already_gone(exc):
|
|
183
|
+
return
|
|
184
|
+
raise
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
inline = iam.list_role_policies(RoleName=role_name)
|
|
188
|
+
except ClientError as exc:
|
|
189
|
+
if _is_already_gone(exc):
|
|
190
|
+
return
|
|
191
|
+
raise
|
|
192
|
+
for policy_name in inline.get("PolicyNames", []):
|
|
193
|
+
try:
|
|
194
|
+
iam.delete_role_policy(RoleName=role_name, PolicyName=policy_name)
|
|
195
|
+
except ClientError as exc:
|
|
196
|
+
if _is_already_gone(exc):
|
|
197
|
+
return
|
|
198
|
+
raise
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
iam.delete_role(RoleName=role_name)
|
|
202
|
+
except ClientError as exc:
|
|
203
|
+
if _is_already_gone(exc):
|
|
204
|
+
return
|
|
205
|
+
raise
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _describe_s3_bucket(credentials: dict, resource: CleanupResource) -> None:
|
|
209
|
+
s3 = make_s3_client(credentials)
|
|
210
|
+
s3.head_bucket(Bucket=resource.id)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _describe_glue_database(credentials: dict, resource: CleanupResource) -> None:
|
|
214
|
+
glue = make_glue_client(credentials)
|
|
215
|
+
glue.get_database(Name=resource.id)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _describe_glue_crawler(credentials: dict, resource: CleanupResource) -> None:
|
|
219
|
+
glue = make_glue_client(credentials)
|
|
220
|
+
glue.get_crawler(Name=resource.id)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _describe_glue_table(credentials: dict, resource: CleanupResource) -> None:
|
|
224
|
+
glue = make_glue_client(credentials)
|
|
225
|
+
if resource.parent is None:
|
|
226
|
+
raise ValueError(f"Glue table {resource.id!r} requires parent (database name)")
|
|
227
|
+
glue.get_table(DatabaseName=resource.parent, Name=resource.id)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _describe_kinesis_stream(credentials: dict, resource: CleanupResource) -> None:
|
|
231
|
+
kinesis = make_kinesis_client(credentials)
|
|
232
|
+
kinesis.describe_stream(StreamName=resource.id)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _describe_iam_role(credentials: dict, resource: CleanupResource) -> None:
|
|
236
|
+
iam = make_iam_client(credentials)
|
|
237
|
+
iam.get_role(RoleName=resource.id)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
_DELETE_DISPATCH = {
|
|
241
|
+
"s3_bucket": _delete_s3_bucket,
|
|
242
|
+
"glue_database": _delete_glue_database,
|
|
243
|
+
"glue_crawler": _delete_glue_crawler,
|
|
244
|
+
"glue_table": _delete_glue_table,
|
|
245
|
+
"kinesis_stream": _delete_kinesis_stream,
|
|
246
|
+
"iam_role": _delete_iam_role,
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
_DESCRIBE_DISPATCH = {
|
|
250
|
+
"s3_bucket": _describe_s3_bucket,
|
|
251
|
+
"glue_database": _describe_glue_database,
|
|
252
|
+
"glue_crawler": _describe_glue_crawler,
|
|
253
|
+
"glue_table": _describe_glue_table,
|
|
254
|
+
"kinesis_stream": _describe_kinesis_stream,
|
|
255
|
+
"iam_role": _describe_iam_role,
|
|
256
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Azure adapter stub for labrun-checks. Not yet implemented."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def make_azure_resource_client(credentials: dict):
|
|
6
|
+
raise NotImplementedError(
|
|
7
|
+
"The Azure adapter is not yet available in labrun-checks. "
|
|
8
|
+
"Azure labs ship in a future release. Contact support@labrun.dev if this blocks you."
|
|
9
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Databricks adapter stub for labrun-checks. Not yet implemented."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def make_databricks_client(credentials: dict):
|
|
6
|
+
raise NotImplementedError(
|
|
7
|
+
"The Databricks adapter is not yet available in labrun-checks. "
|
|
8
|
+
"Databricks labs ship in a future release. Contact support@labrun.dev if this blocks you."
|
|
9
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Snowflake adapter stub for labrun-checks. Not yet implemented."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def make_snowflake_connection(credentials: dict):
|
|
6
|
+
raise NotImplementedError(
|
|
7
|
+
"The Snowflake adapter is not yet available in labrun-checks. "
|
|
8
|
+
"Snowflake labs ship in a future release. Contact support@labrun.dev if this blocks you."
|
|
9
|
+
)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: labrun-checks
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Checkpoint validation for Labrun labs
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Requires-Dist: requests>=2.28
|
|
7
|
+
Requires-Dist: boto3>=1.26
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
10
|
+
Requires-Dist: responses>=0.23; extra == "dev"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/labrun_checks/__init__.py
|
|
4
|
+
src/labrun_checks/_api.py
|
|
5
|
+
src/labrun_checks/_cleanup.py
|
|
6
|
+
src/labrun_checks/_state.py
|
|
7
|
+
src/labrun_checks/_types.py
|
|
8
|
+
src/labrun_checks.egg-info/PKG-INFO
|
|
9
|
+
src/labrun_checks.egg-info/SOURCES.txt
|
|
10
|
+
src/labrun_checks.egg-info/dependency_links.txt
|
|
11
|
+
src/labrun_checks.egg-info/requires.txt
|
|
12
|
+
src/labrun_checks.egg-info/top_level.txt
|
|
13
|
+
src/labrun_checks/adapters/__init__.py
|
|
14
|
+
src/labrun_checks/adapters/aws.py
|
|
15
|
+
src/labrun_checks/adapters/azure.py
|
|
16
|
+
src/labrun_checks/adapters/databricks.py
|
|
17
|
+
src/labrun_checks/adapters/snowflake.py
|
|
18
|
+
tests/test_checks.py
|
|
19
|
+
tests/test_cleanup.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
labrun_checks
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for labrun_checks public API.
|
|
3
|
+
|
|
4
|
+
Covers:
|
|
5
|
+
- Happy path: register -> check -> returns pass, POSTs correct payload
|
|
6
|
+
- Fail path: validator returns fail, propagated correctly
|
|
7
|
+
- Network-failure path: POST fails -> check returns error, not fail
|
|
8
|
+
- Validator exception path: validator raises -> check returns error
|
|
9
|
+
- Credentials never transmitted: POST body contains no credential values
|
|
10
|
+
- cleanup(status='clean'): returns pass, posts to /api/labs/cleanup-complete
|
|
11
|
+
- cleanup(status='dirty'): returns fail
|
|
12
|
+
- cleanup(status=invalid): returns error without posting
|
|
13
|
+
- cleanup payload regression: no event_type, no timestamp
|
|
14
|
+
- cleanup network failure: returns error
|
|
15
|
+
- cleanup HTTP error: returns error
|
|
16
|
+
- Stubs raise NotImplementedError
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from unittest.mock import MagicMock, call, patch
|
|
21
|
+
|
|
22
|
+
import pytest
|
|
23
|
+
import requests
|
|
24
|
+
|
|
25
|
+
import labrun_checks
|
|
26
|
+
from labrun_checks._types import CheckpointResult
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _pass_validator(session):
|
|
30
|
+
return CheckpointResult("pass")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _fail_validator(session):
|
|
34
|
+
return CheckpointResult("fail", hint_tier_available=1)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _error_validator(session):
|
|
38
|
+
raise ConnectionError("boto3 network wobble")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class TestRegisterAndCheck:
|
|
42
|
+
def test_register_stores_state(self):
|
|
43
|
+
labrun_checks.register("tok-123", "lab-01")
|
|
44
|
+
session = labrun_checks._state.get_session()
|
|
45
|
+
assert session.session_token == "tok-123"
|
|
46
|
+
assert session.lab_id == "lab-01"
|
|
47
|
+
|
|
48
|
+
def test_check_without_register_raises(self):
|
|
49
|
+
with pytest.raises(RuntimeError, match="register"):
|
|
50
|
+
labrun_checks.check(1, _pass_validator)
|
|
51
|
+
|
|
52
|
+
def test_happy_path_returns_pass(self):
|
|
53
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
54
|
+
|
|
55
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
56
|
+
mock_post.return_value.raise_for_status = MagicMock()
|
|
57
|
+
result = labrun_checks.check(1, _pass_validator)
|
|
58
|
+
|
|
59
|
+
assert result.status == "pass"
|
|
60
|
+
assert result.hint_tier_available == 0
|
|
61
|
+
assert result.error_reason is None
|
|
62
|
+
|
|
63
|
+
def test_happy_path_posts_correct_payload(self):
|
|
64
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
65
|
+
|
|
66
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
67
|
+
mock_post.return_value.raise_for_status = MagicMock()
|
|
68
|
+
labrun_checks.check(2, _pass_validator)
|
|
69
|
+
|
|
70
|
+
mock_post.assert_called_once()
|
|
71
|
+
_, kwargs = mock_post.call_args
|
|
72
|
+
body = kwargs["json"]
|
|
73
|
+
assert body["lab_id"] == "lab-01"
|
|
74
|
+
assert body["checkpoint_id"] == 2
|
|
75
|
+
assert body["status"] == "pass"
|
|
76
|
+
assert "timestamp" in body
|
|
77
|
+
|
|
78
|
+
def test_happy_path_sends_token_in_header_not_body(self):
|
|
79
|
+
labrun_checks.register("super-secret-token", "lab-01")
|
|
80
|
+
|
|
81
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
82
|
+
mock_post.return_value.raise_for_status = MagicMock()
|
|
83
|
+
labrun_checks.check(1, _pass_validator)
|
|
84
|
+
|
|
85
|
+
_, kwargs = mock_post.call_args
|
|
86
|
+
body = kwargs["json"]
|
|
87
|
+
assert "super-secret-token" not in str(body)
|
|
88
|
+
assert kwargs["headers"]["Authorization"] == "Bearer super-secret-token"
|
|
89
|
+
|
|
90
|
+
def test_credentials_never_transmitted(self):
|
|
91
|
+
# Deliberately non-AKIA placeholders so secret scanners (gitleaks,
|
|
92
|
+
# GitHub push protection) don't flag this test file. The values still
|
|
93
|
+
# exercise the "credentials must not leave the machine" assertion.
|
|
94
|
+
labrun_checks.register(
|
|
95
|
+
"tok-abc",
|
|
96
|
+
"lab-01",
|
|
97
|
+
credentials={
|
|
98
|
+
"aws_access_key_id": "TEST_KEY_ID_NOT_REAL_XYZ123",
|
|
99
|
+
"aws_secret_access_key": "TEST_SECRET_KEY_NOT_REAL_XYZ123_abcdefghij0123456789",
|
|
100
|
+
},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
104
|
+
mock_post.return_value.raise_for_status = MagicMock()
|
|
105
|
+
labrun_checks.check(1, _pass_validator)
|
|
106
|
+
|
|
107
|
+
_, kwargs = mock_post.call_args
|
|
108
|
+
serialized = str(kwargs["json"]) + str(kwargs.get("headers", {}))
|
|
109
|
+
assert "TEST_KEY_ID_NOT_REAL_XYZ123" not in serialized
|
|
110
|
+
assert "TEST_SECRET_KEY_NOT_REAL_XYZ123" not in serialized
|
|
111
|
+
|
|
112
|
+
def test_fail_validator_returns_fail(self):
|
|
113
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
114
|
+
|
|
115
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
116
|
+
mock_post.return_value.raise_for_status = MagicMock()
|
|
117
|
+
result = labrun_checks.check(1, _fail_validator)
|
|
118
|
+
|
|
119
|
+
assert result.status == "fail"
|
|
120
|
+
assert result.hint_tier_available == 1
|
|
121
|
+
|
|
122
|
+
def test_network_failure_returns_error_not_fail(self):
|
|
123
|
+
"""If the POST to our API fails, check() must return error, not fail."""
|
|
124
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
125
|
+
|
|
126
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
127
|
+
mock_post.side_effect = requests.exceptions.ConnectionError("unreachable")
|
|
128
|
+
result = labrun_checks.check(1, _fail_validator)
|
|
129
|
+
|
|
130
|
+
assert result.status == "error"
|
|
131
|
+
assert result.status != "fail"
|
|
132
|
+
|
|
133
|
+
def test_network_failure_on_pass_returns_error(self):
|
|
134
|
+
"""Even if validator passes, a POST failure yields error so student can retry."""
|
|
135
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
136
|
+
|
|
137
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
138
|
+
mock_post.side_effect = requests.exceptions.Timeout("timeout")
|
|
139
|
+
result = labrun_checks.check(1, _pass_validator)
|
|
140
|
+
|
|
141
|
+
assert result.status == "error"
|
|
142
|
+
|
|
143
|
+
def test_validator_exception_returns_error(self):
|
|
144
|
+
"""If the validator itself raises (e.g. boto3 network error), return error not fail."""
|
|
145
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
146
|
+
|
|
147
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
148
|
+
mock_post.return_value.raise_for_status = MagicMock()
|
|
149
|
+
result = labrun_checks.check(1, _error_validator)
|
|
150
|
+
|
|
151
|
+
assert result.status == "error"
|
|
152
|
+
assert "ConnectionError" in result.error_reason
|
|
153
|
+
|
|
154
|
+
def test_validator_exception_still_posts(self):
|
|
155
|
+
"""A validator exception should still POST the error status to the API."""
|
|
156
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
157
|
+
|
|
158
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
159
|
+
mock_post.return_value.raise_for_status = MagicMock()
|
|
160
|
+
labrun_checks.check(1, _error_validator)
|
|
161
|
+
|
|
162
|
+
mock_post.assert_called_once()
|
|
163
|
+
_, kwargs = mock_post.call_args
|
|
164
|
+
assert kwargs["json"]["status"] == "error"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _mock_ok_response():
|
|
168
|
+
response = MagicMock()
|
|
169
|
+
response.status_code = 200
|
|
170
|
+
response.raise_for_status = MagicMock(return_value=None)
|
|
171
|
+
return response
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _mock_http_error_response():
|
|
175
|
+
response = MagicMock()
|
|
176
|
+
response.status_code = 400
|
|
177
|
+
response.raise_for_status = MagicMock(
|
|
178
|
+
side_effect=requests.exceptions.HTTPError("400 Bad Request")
|
|
179
|
+
)
|
|
180
|
+
return response
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class TestCleanup:
|
|
184
|
+
def test_cleanup_clean_returns_pass(self):
|
|
185
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
186
|
+
|
|
187
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
188
|
+
mock_post.return_value = _mock_ok_response()
|
|
189
|
+
result = labrun_checks.cleanup(status="clean")
|
|
190
|
+
|
|
191
|
+
assert result.status == "pass"
|
|
192
|
+
|
|
193
|
+
def test_cleanup_dirty_returns_fail(self):
|
|
194
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
195
|
+
|
|
196
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
197
|
+
mock_post.return_value = _mock_ok_response()
|
|
198
|
+
result = labrun_checks.cleanup(status="dirty")
|
|
199
|
+
|
|
200
|
+
assert result.status == "fail"
|
|
201
|
+
|
|
202
|
+
def test_cleanup_invalid_status_returns_error_without_posting(self):
|
|
203
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
204
|
+
|
|
205
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
206
|
+
result = labrun_checks.cleanup(status="maybe")
|
|
207
|
+
|
|
208
|
+
assert result.status == "error"
|
|
209
|
+
mock_post.assert_not_called()
|
|
210
|
+
assert "must be 'clean' or 'dirty'" in result.error_reason
|
|
211
|
+
|
|
212
|
+
def test_cleanup_posts_correct_payload_for_clean(self):
|
|
213
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
214
|
+
|
|
215
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
216
|
+
mock_post.return_value = _mock_ok_response()
|
|
217
|
+
labrun_checks.cleanup(status="clean")
|
|
218
|
+
|
|
219
|
+
mock_post.assert_called_once()
|
|
220
|
+
args, kwargs = mock_post.call_args
|
|
221
|
+
assert kwargs["json"] == {"lab_id": "lab-01", "status": "clean"}
|
|
222
|
+
url = args[0]
|
|
223
|
+
assert url.endswith("/api/labs/cleanup-complete")
|
|
224
|
+
assert "/api/labs/checkpoint" not in url
|
|
225
|
+
assert kwargs["headers"]["Authorization"] == "Bearer tok-abc"
|
|
226
|
+
|
|
227
|
+
def test_cleanup_posts_correct_payload_for_dirty(self):
|
|
228
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
229
|
+
|
|
230
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
231
|
+
mock_post.return_value = _mock_ok_response()
|
|
232
|
+
labrun_checks.cleanup(status="dirty")
|
|
233
|
+
|
|
234
|
+
mock_post.assert_called_once()
|
|
235
|
+
args, kwargs = mock_post.call_args
|
|
236
|
+
assert kwargs["json"] == {"lab_id": "lab-01", "status": "dirty"}
|
|
237
|
+
url = args[0]
|
|
238
|
+
assert url.endswith("/api/labs/cleanup-complete")
|
|
239
|
+
assert "/api/labs/checkpoint" not in url
|
|
240
|
+
assert kwargs["headers"]["Authorization"] == "Bearer tok-abc"
|
|
241
|
+
|
|
242
|
+
def test_cleanup_payload_has_no_event_type_or_timestamp(self):
|
|
243
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
244
|
+
|
|
245
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
246
|
+
mock_post.return_value = _mock_ok_response()
|
|
247
|
+
labrun_checks.cleanup(status="clean")
|
|
248
|
+
|
|
249
|
+
_, kwargs = mock_post.call_args
|
|
250
|
+
assert "event_type" not in kwargs["json"]
|
|
251
|
+
assert "timestamp" not in kwargs["json"]
|
|
252
|
+
|
|
253
|
+
def test_cleanup_network_failure_returns_error(self):
|
|
254
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
255
|
+
|
|
256
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
257
|
+
mock_post.side_effect = requests.exceptions.ConnectionError("down")
|
|
258
|
+
result = labrun_checks.cleanup(status="clean")
|
|
259
|
+
|
|
260
|
+
assert result.status == "error"
|
|
261
|
+
assert "Network error" in result.error_reason
|
|
262
|
+
|
|
263
|
+
def test_cleanup_http_error_returns_error(self):
|
|
264
|
+
labrun_checks.register("tok-abc", "lab-01")
|
|
265
|
+
|
|
266
|
+
with patch("labrun_checks._api.requests.post") as mock_post:
|
|
267
|
+
mock_post.return_value = _mock_http_error_response()
|
|
268
|
+
result = labrun_checks.cleanup(status="clean")
|
|
269
|
+
|
|
270
|
+
assert result.status == "error"
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class TestAdapterStubs:
|
|
274
|
+
def test_databricks_stub_raises(self):
|
|
275
|
+
from labrun_checks.adapters.databricks import make_databricks_client
|
|
276
|
+
with pytest.raises(NotImplementedError, match="Databricks"):
|
|
277
|
+
make_databricks_client({})
|
|
278
|
+
|
|
279
|
+
def test_snowflake_stub_raises(self):
|
|
280
|
+
from labrun_checks.adapters.snowflake import make_snowflake_connection
|
|
281
|
+
with pytest.raises(NotImplementedError, match="Snowflake"):
|
|
282
|
+
make_snowflake_connection({})
|
|
283
|
+
|
|
284
|
+
def test_azure_stub_raises(self):
|
|
285
|
+
from labrun_checks.adapters.azure import make_azure_resource_client
|
|
286
|
+
with pytest.raises(NotImplementedError, match="Azure"):
|
|
287
|
+
make_azure_resource_client({})
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Tests for run_cleanup: three-phase teardown/verify/tag-audit protocol."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
import labrun_checks
|
|
7
|
+
from labrun_checks._types import CleanupResource, CleanupResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FakeAdapter:
|
|
11
|
+
"""Test double implementing CleanupAdapter protocol."""
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self.deleted: list[str] = []
|
|
15
|
+
self.delete_side_effects: dict[str, Exception] = {}
|
|
16
|
+
self.describe_results: dict[str, bool] = {}
|
|
17
|
+
self.describe_side_effects: dict[str, Exception] = {}
|
|
18
|
+
self.tag_scan_result: list[str] = []
|
|
19
|
+
self.tag_scan_error: Exception | None = None
|
|
20
|
+
|
|
21
|
+
def delete_resource(self, credentials: dict, resource: CleanupResource) -> None:
|
|
22
|
+
if resource.id in self.delete_side_effects:
|
|
23
|
+
raise self.delete_side_effects[resource.id]
|
|
24
|
+
self.deleted.append(resource.id)
|
|
25
|
+
|
|
26
|
+
def describe_resource(self, credentials: dict, resource: CleanupResource) -> bool:
|
|
27
|
+
if resource.id in self.describe_side_effects:
|
|
28
|
+
raise self.describe_side_effects[resource.id]
|
|
29
|
+
return self.describe_results.get(resource.id, False)
|
|
30
|
+
|
|
31
|
+
def tag_scan(self, credentials: dict, lab_slug: str, region: str) -> list[str]:
|
|
32
|
+
if self.tag_scan_error is not None:
|
|
33
|
+
raise self.tag_scan_error
|
|
34
|
+
return self.tag_scan_result
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _register_with_creds():
|
|
38
|
+
labrun_checks.register(
|
|
39
|
+
"tok-cleanup",
|
|
40
|
+
"s3-raw-ingestion",
|
|
41
|
+
credentials={
|
|
42
|
+
"aws_access_key_id": "TESTKEY",
|
|
43
|
+
"aws_secret_access_key": "TESTSECRET",
|
|
44
|
+
"region": "us-east-1",
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _make_manifest():
|
|
50
|
+
return [
|
|
51
|
+
CleanupResource(
|
|
52
|
+
type="s3_bucket",
|
|
53
|
+
id="labrun-test-bucket",
|
|
54
|
+
cli_delete_command="aws s3 rb s3://labrun-test-bucket --force",
|
|
55
|
+
),
|
|
56
|
+
CleanupResource(
|
|
57
|
+
type="glue_database",
|
|
58
|
+
id="labrun_test_db",
|
|
59
|
+
cli_delete_command="aws glue delete-database --name labrun_test_db",
|
|
60
|
+
),
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class TestRunCleanupHappyPath:
|
|
65
|
+
def test_all_clean_returns_clean(self):
|
|
66
|
+
_register_with_creds()
|
|
67
|
+
adapter = FakeAdapter()
|
|
68
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
69
|
+
|
|
70
|
+
assert result.status == "clean"
|
|
71
|
+
assert result.warnings == []
|
|
72
|
+
assert result.orphans == []
|
|
73
|
+
|
|
74
|
+
def test_all_resources_deleted(self):
|
|
75
|
+
_register_with_creds()
|
|
76
|
+
adapter = FakeAdapter()
|
|
77
|
+
labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
78
|
+
|
|
79
|
+
assert "labrun-test-bucket" in adapter.deleted
|
|
80
|
+
assert "labrun_test_db" in adapter.deleted
|
|
81
|
+
|
|
82
|
+
def test_empty_manifest_returns_clean(self):
|
|
83
|
+
_register_with_creds()
|
|
84
|
+
adapter = FakeAdapter()
|
|
85
|
+
result = labrun_checks.run_cleanup([], adapter=adapter)
|
|
86
|
+
|
|
87
|
+
assert result.status == "clean"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class TestRunCleanupWithoutRegister:
|
|
91
|
+
def test_raises_runtime_error(self):
|
|
92
|
+
adapter = FakeAdapter()
|
|
93
|
+
with pytest.raises(RuntimeError, match="register"):
|
|
94
|
+
labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class TestPhase1Teardown:
|
|
98
|
+
def test_delete_failure_returns_dirty_with_warning(self):
|
|
99
|
+
_register_with_creds()
|
|
100
|
+
adapter = FakeAdapter()
|
|
101
|
+
adapter.delete_side_effects["labrun-test-bucket"] = RuntimeError("AccessDenied")
|
|
102
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
103
|
+
|
|
104
|
+
assert result.status == "dirty"
|
|
105
|
+
assert any("FAILED TO DELETE" in w for w in result.warnings)
|
|
106
|
+
assert any("labrun-test-bucket" in w for w in result.warnings)
|
|
107
|
+
assert any("aws s3 rb" in w for w in result.warnings)
|
|
108
|
+
|
|
109
|
+
def test_delete_failure_continues_to_next_resource(self):
|
|
110
|
+
_register_with_creds()
|
|
111
|
+
adapter = FakeAdapter()
|
|
112
|
+
adapter.delete_side_effects["labrun-test-bucket"] = RuntimeError("boom")
|
|
113
|
+
labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
114
|
+
|
|
115
|
+
assert "labrun_test_db" in adapter.deleted
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class TestPhase2Verification:
|
|
119
|
+
def test_resource_still_alive_returns_dirty(self):
|
|
120
|
+
_register_with_creds()
|
|
121
|
+
adapter = FakeAdapter()
|
|
122
|
+
adapter.describe_results["labrun-test-bucket"] = True
|
|
123
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
124
|
+
|
|
125
|
+
assert result.status == "dirty"
|
|
126
|
+
assert any("still alive" in w for w in result.warnings)
|
|
127
|
+
assert any("aws s3 rb" in w for w in result.warnings)
|
|
128
|
+
|
|
129
|
+
def test_describe_error_returns_dirty(self):
|
|
130
|
+
_register_with_creds()
|
|
131
|
+
adapter = FakeAdapter()
|
|
132
|
+
adapter.describe_side_effects["labrun_test_db"] = RuntimeError("throttled")
|
|
133
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
134
|
+
|
|
135
|
+
assert result.status == "dirty"
|
|
136
|
+
assert any("VERIFICATION ERROR" in w for w in result.warnings)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class TestPhase3TagAudit:
|
|
140
|
+
def test_orphan_found_returns_dirty(self):
|
|
141
|
+
_register_with_creds()
|
|
142
|
+
adapter = FakeAdapter()
|
|
143
|
+
adapter.tag_scan_result = ["arn:aws:s3:::some-orphan-bucket"]
|
|
144
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
145
|
+
|
|
146
|
+
assert result.status == "dirty"
|
|
147
|
+
assert len(result.orphans) == 1
|
|
148
|
+
assert "ORPHAN FOUND" in result.orphans[0]
|
|
149
|
+
assert "some-orphan-bucket" in result.orphans[0]
|
|
150
|
+
|
|
151
|
+
def test_manifest_resource_in_tag_scan_not_reported_as_orphan(self):
|
|
152
|
+
_register_with_creds()
|
|
153
|
+
adapter = FakeAdapter()
|
|
154
|
+
adapter.tag_scan_result = ["arn:aws:s3:::labrun-test-bucket"]
|
|
155
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
156
|
+
|
|
157
|
+
assert result.status == "clean"
|
|
158
|
+
assert result.orphans == []
|
|
159
|
+
|
|
160
|
+
def test_tag_scan_error_returns_dirty(self):
|
|
161
|
+
_register_with_creds()
|
|
162
|
+
adapter = FakeAdapter()
|
|
163
|
+
adapter.tag_scan_error = RuntimeError("tagging API down")
|
|
164
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
165
|
+
|
|
166
|
+
assert result.status == "dirty"
|
|
167
|
+
assert any("TAG AUDIT ERROR" in w for w in result.warnings)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class TestAdapterDefault:
|
|
171
|
+
def test_none_adapter_imports_aws(self):
|
|
172
|
+
"""When adapter=None, run_cleanup should try to instantiate AwsCleanupAdapter."""
|
|
173
|
+
_register_with_creds()
|
|
174
|
+
from unittest.mock import patch
|
|
175
|
+
with patch("labrun_checks.adapters.aws.AwsCleanupAdapter") as mock_cls:
|
|
176
|
+
mock_instance = FakeAdapter()
|
|
177
|
+
mock_cls.return_value = mock_instance
|
|
178
|
+
result = labrun_checks.run_cleanup(_make_manifest())
|
|
179
|
+
|
|
180
|
+
mock_cls.assert_called_once()
|
|
181
|
+
assert result.status == "clean"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class TestCleanupResultShape:
|
|
185
|
+
def test_result_has_expected_fields(self):
|
|
186
|
+
_register_with_creds()
|
|
187
|
+
adapter = FakeAdapter()
|
|
188
|
+
adapter.describe_results["labrun-test-bucket"] = True
|
|
189
|
+
adapter.tag_scan_result = ["arn:aws:s3:::orphan-bucket"]
|
|
190
|
+
result = labrun_checks.run_cleanup(_make_manifest(), adapter=adapter)
|
|
191
|
+
|
|
192
|
+
assert isinstance(result, CleanupResult)
|
|
193
|
+
assert result.status == "dirty"
|
|
194
|
+
assert isinstance(result.warnings, list)
|
|
195
|
+
assert isinstance(result.orphans, list)
|
|
196
|
+
assert len(result.warnings) >= 1
|
|
197
|
+
assert len(result.orphans) >= 1
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class TestRegisterCredentials:
|
|
201
|
+
def test_register_with_credentials_kwarg(self):
|
|
202
|
+
from labrun_checks._state import get_session
|
|
203
|
+
|
|
204
|
+
creds = {
|
|
205
|
+
"aws_access_key_id": "TESTKEY",
|
|
206
|
+
"aws_secret_access_key": "TESTSECRET",
|
|
207
|
+
"region": "us-east-1",
|
|
208
|
+
}
|
|
209
|
+
labrun_checks.register("tok", "lab-1", credentials=creds)
|
|
210
|
+
session = get_session()
|
|
211
|
+
assert session.credentials == creds
|
|
212
|
+
|
|
213
|
+
def test_register_without_credentials_defaults_empty_dict(self):
|
|
214
|
+
from labrun_checks._state import get_session
|
|
215
|
+
|
|
216
|
+
labrun_checks.register("tok", "lab-1")
|
|
217
|
+
session = get_session()
|
|
218
|
+
assert session.credentials == {}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class TestIamRoleAdapter:
|
|
222
|
+
def test_delete_iam_role_detaches_managed_and_deletes_inline_then_role(self):
|
|
223
|
+
from unittest.mock import MagicMock, patch
|
|
224
|
+
|
|
225
|
+
from labrun_checks._types import CleanupResource
|
|
226
|
+
from labrun_checks.adapters.aws import _delete_iam_role
|
|
227
|
+
|
|
228
|
+
iam = MagicMock()
|
|
229
|
+
iam.list_attached_role_policies.return_value = {
|
|
230
|
+
"AttachedPolicies": [{"PolicyArn": "arn:aws:iam::123:policy/managed-1"}],
|
|
231
|
+
}
|
|
232
|
+
iam.list_role_policies.return_value = {"PolicyNames": ["inline-1"]}
|
|
233
|
+
|
|
234
|
+
with patch("labrun_checks.adapters.aws.make_iam_client", return_value=iam):
|
|
235
|
+
resource = CleanupResource(type="iam_role", id="my-role")
|
|
236
|
+
_delete_iam_role({}, resource)
|
|
237
|
+
|
|
238
|
+
iam.detach_role_policy.assert_called_once_with(
|
|
239
|
+
RoleName="my-role",
|
|
240
|
+
PolicyArn="arn:aws:iam::123:policy/managed-1",
|
|
241
|
+
)
|
|
242
|
+
iam.delete_role_policy.assert_called_once_with(
|
|
243
|
+
RoleName="my-role",
|
|
244
|
+
PolicyName="inline-1",
|
|
245
|
+
)
|
|
246
|
+
iam.delete_role.assert_called_once_with(RoleName="my-role")
|
|
247
|
+
|
|
248
|
+
def test_describe_iam_role_calls_get_role(self):
|
|
249
|
+
from unittest.mock import MagicMock, patch
|
|
250
|
+
|
|
251
|
+
from labrun_checks._types import CleanupResource
|
|
252
|
+
from labrun_checks.adapters.aws import _describe_iam_role
|
|
253
|
+
|
|
254
|
+
iam = MagicMock()
|
|
255
|
+
with patch("labrun_checks.adapters.aws.make_iam_client", return_value=iam):
|
|
256
|
+
resource = CleanupResource(type="iam_role", id="my-role")
|
|
257
|
+
_describe_iam_role({}, resource)
|
|
258
|
+
|
|
259
|
+
iam.get_role.assert_called_once_with(RoleName="my-role")
|
|
260
|
+
|
|
261
|
+
def test_no_such_entity_is_already_gone(self):
|
|
262
|
+
from botocore.exceptions import ClientError
|
|
263
|
+
|
|
264
|
+
from labrun_checks.adapters.aws import _is_already_gone
|
|
265
|
+
|
|
266
|
+
exc = ClientError(
|
|
267
|
+
{"Error": {"Code": "NoSuchEntity", "Message": "Role not found"}},
|
|
268
|
+
"GetRole",
|
|
269
|
+
)
|
|
270
|
+
assert _is_already_gone(exc) is True
|