PyPI - compose-runner - Versions diffs - 0.6.1__py2.py3-none-any.whl → 0.6.2rc1__py2.py3-none-any.whl - Mend

compose-runner 0.6.1py2.py3-none-any.whl → 0.6.2rc1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

compose_runner/_version.py +2 -2
compose_runner/aws_lambda/common.py +60 -0
compose_runner/aws_lambda/log_poll_handler.py +15 -39
compose_runner/aws_lambda/results_handler.py +13 -38
compose_runner/aws_lambda/run_handler.py +83 -105
compose_runner/aws_lambda/status_handler.py +102 -0
compose_runner/ecs_task.py +145 -0
compose_runner/tests/test_lambda_handlers.py +87 -30
compose_runner-0.6.2rc1.dist-info/METADATA +79 -0
{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/RECORD +13 -10
compose_runner-0.6.1.dist-info/METADATA +0 -62
{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/WHEEL +0 -0
{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/entry_points.txt +0 -0
{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/licenses/LICENSE +0 -0

compose_runner/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.6.1'
-__version_tuple__ = version_tuple = (0, 6, 1)
+__version__ = version = '0.6.2rc1'
+__version_tuple__ = version_tuple = (0, 6, 2, 'rc1')
 __commit_id__ = commit_id = None

compose_runner/aws_lambda/common.py ADDED Viewed

@@ -0,0 +1,60 @@
+from __future__ import annotations
+import base64
+import json
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+def is_http_event(event: Any) -> bool:
+    return isinstance(event, dict) and "requestContext" in event
+def _decode_body(event: Dict[str, Any]) -> Optional[str]:
+    body = event.get("body")
+    if not body:
+        return None
+    if event.get("isBase64Encoded"):
+        return base64.b64decode(body).decode("utf-8")
+    return body
+def extract_payload(event: Dict[str, Any]) -> Dict[str, Any]:
+    if not is_http_event(event):
+        return event
+    body = _decode_body(event)
+    if not body:
+        return {}
+    return json.loads(body)
+def http_response(body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
+    return {
+        "statusCode": status_code,
+        "headers": {"Content-Type": "application/json"},
+        "body": json.dumps(body),
+    }
+@dataclass(frozen=True)
+class LambdaRequest:
+    raw_event: Any
+    payload: Dict[str, Any]
+    is_http: bool
+    @classmethod
+    def parse(cls, event: Any) -> "LambdaRequest":
+        payload = extract_payload(event)
+        return cls(raw_event=event, payload=payload, is_http=is_http_event(event))
+    def respond(self, body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
+        if self.is_http:
+            return http_response(body, status_code)
+        return body
+    def bad_request(self, message: str, status_code: int = 400) -> Dict[str, Any]:
+        return self.respond({"status": "FAILED", "error": message}, status_code=status_code)
+    def get(self, key: str, default: Any = None) -> Any:
+        return self.payload.get(key, default)

compose_runner/aws_lambda/log_poll_handler.py CHANGED Viewed

@@ -2,52 +2,30 @@ from __future__ import annotations
 import os
 import time
-import base64
-import json
 from typing import Any, Dict, List
 import boto3
+from compose_runner.aws_lambda.common import LambdaRequest
 _LOGS_CLIENT = boto3.client("logs", region_name=os.environ.get("AWS_REGION", "us-east-1"))
 LOG_GROUP_ENV = "RUNNER_LOG_GROUP"
 DEFAULT_LOOKBACK_MS_ENV = "DEFAULT_LOOKBACK_MS"
-def _is_http_event(event: Any) -> bool:
-    return isinstance(event, dict) and "requestContext" in event
-def _extract_payload(event: Dict[str, Any]) -> Dict[str, Any]:
-    if not _is_http_event(event):
-        return event
-    body = event.get("body")
-    if not body:
-        return {}
-    if event.get("isBase64Encoded"):
-        body = base64.b64decode(body).decode("utf-8")
-    return json.loads(body)
-def _http_response(body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
-    return {
-        "statusCode": status_code,
-        "headers": {"Content-Type": "application/json"},
-        "body": json.dumps(body),
-    }
 def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
-    raw_event = event
-    event = _extract_payload(event)
-    job_id = event.get("job_id")
-    if not job_id:
-        message = "Request payload must include 'job_id'."
-        if _is_http_event(raw_event):
-            return _http_response({"status": "FAILED", "error": message}, status_code=400)
+    request = LambdaRequest.parse(event)
+    payload = request.payload
+    artifact_prefix = payload.get("artifact_prefix")
+    if not artifact_prefix:
+        message = "Request payload must include 'artifact_prefix'."
+        if request.is_http:
+            return request.bad_request(message, status_code=400)
         raise KeyError(message)
-    next_token = event.get("next_token")
-    start_time = event.get("start_time")
-    end_time = event.get("end_time")
+    next_token = payload.get("next_token")
+    start_time = payload.get("start_time")
+    end_time = payload.get("end_time")
     log_group = os.environ[LOG_GROUP_ENV]
     lookback_ms = int(os.environ.get(DEFAULT_LOOKBACK_MS_ENV, "3600000"))
@@ -60,7 +38,7 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
     params: Dict[str, Any] = {
         "logGroupName": log_group,
-        "filterPattern": f'"{job_id}"',
+        "filterPattern": f'"{artifact_prefix}"',
         "startTime": int(start_time),
     }
     if end_time is not None:
@@ -75,10 +53,8 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
     ]
     body = {
-        "job_id": job_id,
+        "artifact_prefix": artifact_prefix,
         "events": events,
         "next_token": response.get("nextToken"),
     }
-    if _is_http_event(raw_event):
-        return _http_response(body)
-    return body
+    return request.respond(body)

compose_runner/aws_lambda/results_handler.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from __future__ import annotations
 import os
-import base64
-import json
 from datetime import datetime, timezone
 from typing import Any, Dict, List
 import boto3
+from compose_runner.aws_lambda.common import LambdaRequest
 _S3 = boto3.client("s3", region_name=os.environ.get("AWS_REGION", "us-east-1"))
 RESULTS_BUCKET_ENV = "RESULTS_BUCKET"
@@ -21,44 +21,21 @@ def _serialize_dt(value: datetime) -> str:
     return value.astimezone(timezone.utc).isoformat()
-def _is_http_event(event: Any) -> bool:
-    return isinstance(event, dict) and "requestContext" in event
-def _extract_payload(event: Dict[str, Any]) -> Dict[str, Any]:
-    if not _is_http_event(event):
-        return event
-    body = event.get("body")
-    if not body:
-        return {}
-    if event.get("isBase64Encoded"):
-        body = base64.b64decode(body).decode("utf-8")
-    return json.loads(body)
-def _http_response(body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
-    return {
-        "statusCode": status_code,
-        "headers": {"Content-Type": "application/json"},
-        "body": json.dumps(body),
-    }
 def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
-    raw_event = event
-    event = _extract_payload(event)
+    request = LambdaRequest.parse(event)
+    payload = request.payload
     bucket = os.environ[RESULTS_BUCKET_ENV]
     prefix = os.environ.get(RESULTS_PREFIX_ENV)
-    job_id = event.get("job_id")
-    if not job_id:
-        message = "Request payload must include 'job_id'."
-        if _is_http_event(raw_event):
-            return _http_response({"status": "FAILED", "error": message}, status_code=400)
+    artifact_prefix = payload.get("artifact_prefix")
+    if not artifact_prefix:
+        message = "Request payload must include 'artifact_prefix'."
+        if request.is_http:
+            return request.bad_request(message, status_code=400)
         raise KeyError(message)
-    expires_in = int(event.get("expires_in", DEFAULT_EXPIRES_IN))
+    expires_in = int(payload.get("expires_in", DEFAULT_EXPIRES_IN))
-    key_prefix = f"{prefix.rstrip('/')}/{job_id}" if prefix else job_id
+    key_prefix = f"{prefix.rstrip('/')}/{artifact_prefix}" if prefix else artifact_prefix
     response = _S3.list_objects_v2(Bucket=bucket, Prefix=key_prefix)
     contents = response.get("Contents", [])
@@ -84,11 +61,9 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
         )
     body = {
-        "job_id": job_id,
+        "artifact_prefix": artifact_prefix,
         "artifacts": artifacts,
         "bucket": bucket,
         "prefix": key_prefix,
     }
-    if _is_http_event(raw_event):
-        return _http_response(body)
-    return body
+    return request.respond(body)

compose_runner/aws_lambda/run_handler.py CHANGED Viewed

@@ -3,45 +3,24 @@ from __future__ import annotations
 import json
 import logging
 import os
-import base64
-from pathlib import Path
-from typing import Any, Dict, Iterable, Optional
+import uuid
+from typing import Any, Dict, Optional
 import boto3
+from botocore.exceptions import ClientError
-NUMBA_CACHE_DIR = Path(os.environ.get("NUMBA_CACHE_DIR", "/tmp/numba_cache"))
-NUMBA_CACHE_DIR.mkdir(parents=True, exist_ok=True)
-os.environ["NUMBA_CACHE_DIR"] = str(NUMBA_CACHE_DIR)
-from compose_runner.run import run as run_compose
+from compose_runner.aws_lambda.common import LambdaRequest
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-_S3_CLIENT = boto3.client("s3", region_name=os.environ.get("AWS_REGION", "us-east-1"))
-def _is_http_event(event: Any) -> bool:
-    return isinstance(event, dict) and "requestContext" in event
+_SFN_CLIENT = boto3.client("stepfunctions", region_name=os.environ.get("AWS_REGION", "us-east-1"))
-def _extract_payload(event: Dict[str, Any]) -> Dict[str, Any]:
-    if not _is_http_event(event):
-        return event
-    body = event.get("body")
-    if not body:
-        return {}
-    if event.get("isBase64Encoded"):
-        body = base64.b64decode(body).decode("utf-8")
-    return json.loads(body)
-def _http_response(body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
-    return {
-        "statusCode": status_code,
-        "headers": {"Content-Type": "application/json"},
-        "body": json.dumps(body),
-    }
+STATE_MACHINE_ARN_ENV = "STATE_MACHINE_ARN"
+RESULTS_BUCKET_ENV = "RESULTS_BUCKET"
+RESULTS_PREFIX_ENV = "RESULTS_PREFIX"
+NSC_KEY_ENV = "NSC_KEY"
+NV_KEY_ENV = "NV_KEY"
 def _log(job_id: str, message: str, **details: Any) -> None:
@@ -50,88 +29,87 @@ def _log(job_id: str, message: str, **details: Any) -> None:
     logger.info(json.dumps(payload))
-def _iter_result_files(result_dir: Path) -> Iterable[Path]:
-    for path in result_dir.iterdir():
-        if path.is_file():
-            yield path
-def _upload_results(job_id: str, result_dir: Path, bucket: str, prefix: Optional[str]) -> None:
-    base_prefix = f"{prefix.rstrip('/')}/{job_id}" if prefix else job_id
-    for file_path in _iter_result_files(result_dir):
-        key = f"{base_prefix}/{file_path.name}"
-        _S3_CLIENT.upload_file(str(file_path), bucket, key)
+def _job_input(
+    payload: Dict[str, Any],
+    artifact_prefix: str,
+    bucket: Optional[str],
+    prefix: Optional[str],
+    nsc_key: Optional[str],
+    nv_key: Optional[str],
+) -> Dict[str, Any]:
+    no_upload_flag = bool(payload.get("no_upload", False))
+    doc: Dict[str, Any] = {
+        "artifact_prefix": artifact_prefix,
+        "meta_analysis_id": payload["meta_analysis_id"],
+        "environment": payload.get("environment", "production"),
+        "no_upload": "true" if no_upload_flag else "false",
+        "results": {"bucket": bucket or "", "prefix": prefix or ""},
+    }
+    n_cores = payload.get("n_cores")
+    doc["n_cores"] = str(n_cores) if n_cores is not None else ""
+    if nsc_key is not None:
+        doc["nsc_key"] = nsc_key
+    else:
+        doc["nsc_key"] = ""
+    if nv_key is not None:
+        doc["nv_key"] = nv_key
+    else:
+        doc["nv_key"] = ""
+    return doc
 def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
-    raw_event = event
-    payload = _extract_payload(event)
-    job_id = context.aws_request_id
+    request = LambdaRequest.parse(event)
+    payload = request.payload
+    if STATE_MACHINE_ARN_ENV not in os.environ:
+        raise RuntimeError(f"{STATE_MACHINE_ARN_ENV} environment variable must be set.")
     if "meta_analysis_id" not in payload:
         message = "Request payload must include 'meta_analysis_id'."
-        _log(job_id, "workflow.failed", error=message)
-        if _is_http_event(raw_event):
-            return _http_response(
-                {"job_id": job_id, "status": "FAILED", "error": message}, status_code=400
-            )
+        if request.is_http:
+            return request.bad_request(message, status_code=400)
         raise KeyError(message)
-    meta_analysis_id = payload["meta_analysis_id"]
-    environment = payload.get("environment", "production")
-    nsc_key = payload.get("nsc_key") or os.environ.get("NSC_KEY")
-    nv_key = payload.get("nv_key") or os.environ.get("NV_KEY")
-    no_upload = bool(payload.get("no_upload", False))
-    n_cores = payload.get("n_cores")
-    result_dir = Path("/tmp") / job_id
-    result_dir.mkdir(parents=True, exist_ok=True)
-    bucket = os.environ.get("RESULTS_BUCKET")
-    prefix = os.environ.get("RESULTS_PREFIX")
+    artifact_prefix = payload.get("artifact_prefix") or str(uuid.uuid4())
+    bucket = os.environ.get(RESULTS_BUCKET_ENV)
+    prefix = os.environ.get(RESULTS_PREFIX_ENV)
+    nsc_key = payload.get("nsc_key") or os.environ.get(NSC_KEY_ENV)
+    nv_key = payload.get("nv_key") or os.environ.get(NV_KEY_ENV)
+    job_input = _job_input(payload, artifact_prefix, bucket, prefix, nsc_key, nv_key)
+    params = {
+        "stateMachineArn": os.environ[STATE_MACHINE_ARN_ENV],
+        "name": artifact_prefix,
+        "input": json.dumps(job_input),
+    }
-    _log(
-        job_id,
-        "workflow.start",
-        meta_analysis_id=meta_analysis_id,
-        environment=environment,
-        no_upload=no_upload,
-    )
     try:
-        url, _ = run_compose(
-            meta_analysis_id=meta_analysis_id,
-            environment=environment,
-            result_dir=str(result_dir),
-            nsc_key=nsc_key,
-            nv_key=nv_key,
-            no_upload=no_upload,
-            n_cores=n_cores,
-        )
-        _log(job_id, "workflow.completed", result_url=url)
-        if bucket:
-            _upload_results(job_id, result_dir, bucket, prefix)
-            _log(job_id, "artifacts.uploaded", bucket=bucket, prefix=prefix)
+        response = _SFN_CLIENT.start_execution(**params)
+    except _SFN_CLIENT.exceptions.ExecutionAlreadyExists as exc:
+        _log(artifact_prefix, "workflow.duplicate", error=str(exc))
         body = {
-            "job_id": job_id,
-            "status": "SUCCEEDED",
-            "result_url": url,
-            "artifacts_bucket": bucket,
-            "artifacts_prefix": prefix,
+            "status": "FAILED",
+            "error": "A job with the provided artifact_prefix already exists.",
+            "artifact_prefix": artifact_prefix,
         }
-        if _is_http_event(raw_event):
-            return _http_response(body)
-        return body
-    except Exception as exc:  # noqa: broad-except - bubble up but log context
-        _log(job_id, "workflow.failed", error=str(exc))
-        if _is_http_event(raw_event):
-            return _http_response(
-                {"job_id": job_id, "status": "FAILED", "error": str(exc)}, status_code=500
-            )
-        raise
-    finally:
-        if os.environ.get("DELETE_TMP", "true").lower() == "true":
-            for path in _iter_result_files(result_dir):
-                try:
-                    path.unlink()
-                except OSError:
-                    _log(job_id, "cleanup.warning", file=str(path))
+        if request.is_http:
+            return request.respond(body, status_code=409)
+        raise ValueError(body["error"]) from exc
+    except ClientError as exc:
+        _log(artifact_prefix, "workflow.failed_to_queue", error=str(exc))
+        message = "Failed to start compose-runner job."
+        body = {"status": "FAILED", "error": message}
+        if request.is_http:
+            return request.respond(body, status_code=500)
+        raise RuntimeError(message) from exc
+    execution_arn = response["executionArn"]
+    _log(artifact_prefix, "workflow.queued", execution_arn=execution_arn)
+    body = {
+        "job_id": execution_arn,
+        "artifact_prefix": artifact_prefix,
+        "status": "SUBMITTED",
+        "status_url": f"/jobs/{execution_arn}",
+    }
+    return request.respond(body, status_code=202)

compose_runner/aws_lambda/status_handler.py ADDED Viewed

@@ -0,0 +1,102 @@
+from __future__ import annotations
+import json
+import os
+from datetime import datetime
+from typing import Any, Dict, Optional
+import boto3
+from botocore.exceptions import ClientError
+from compose_runner.aws_lambda.common import LambdaRequest
+_SFN = boto3.client("stepfunctions", region_name=os.environ.get("AWS_REGION", "us-east-1"))
+_S3 = boto3.client("s3", region_name=os.environ.get("AWS_REGION", "us-east-1"))
+RESULTS_BUCKET_ENV = "RESULTS_BUCKET"
+RESULTS_PREFIX_ENV = "RESULTS_PREFIX"
+METADATA_FILENAME = "metadata.json"
+def _serialize_dt(value: datetime) -> str:
+    return value.astimezone().isoformat()
+def _metadata_key(prefix: Optional[str], artifact_prefix: str) -> str:
+    if prefix:
+        return f"{prefix.rstrip('/')}/{artifact_prefix}/{METADATA_FILENAME}"
+    return f"{artifact_prefix}/{METADATA_FILENAME}"
+def _load_metadata(bucket: str, prefix: Optional[str], artifact_prefix: str) -> Optional[Dict[str, Any]]:
+    key = _metadata_key(prefix, artifact_prefix)
+    try:
+        response = _S3.get_object(Bucket=bucket, Key=key)
+    except ClientError as error:
+        if error.response["Error"]["Code"] in {"NoSuchKey", "404"}:
+            return None
+        raise
+    data = response["Body"].read()
+    return json.loads(data.decode("utf-8"))
+def _parse_output(output: Optional[str]) -> Dict[str, Any]:
+    if not output:
+        return {}
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError:
+        return {"raw_output": output}
+def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
+    request = LambdaRequest.parse(event)
+    payload = request.payload
+    job_id = payload.get("job_id")
+    if not job_id:
+        message = "Request payload must include 'job_id'."
+        if request.is_http:
+            return request.bad_request(message, status_code=400)
+        raise KeyError(message)
+    try:
+        description = _SFN.describe_execution(executionArn=job_id)
+    except ClientError as error:
+        body = {"status": "FAILED", "error": error.response["Error"]["Message"]}
+        if request.is_http:
+            status_code = 404 if error.response["Error"]["Code"] == "ExecutionDoesNotExist" else 500
+            return request.respond(body, status_code=status_code)
+        raise
+    status = description["status"]
+    body: Dict[str, Any] = {
+        "job_id": job_id,
+        "status": status,
+        "start_time": _serialize_dt(description["startDate"]),
+    }
+    if "stopDate" in description:
+        body["stop_time"] = _serialize_dt(description["stopDate"])
+    output_doc = _parse_output(description.get("output"))
+    body["output"] = output_doc
+    artifact_prefix = description.get("name")
+    if not artifact_prefix:
+        raise ValueError("Execution does not expose a name; cannot determine artifact prefix.")
+    body["artifact_prefix"] = artifact_prefix
+    if status in {"SUCCEEDED", "FAILED"}:
+        results_info = output_doc.get("results") or {}
+        bucket = results_info.get("bucket") or os.environ.get(RESULTS_BUCKET_ENV)
+        prefix = results_info.get("prefix") or os.environ.get(RESULTS_PREFIX_ENV)
+        if bucket and artifact_prefix:
+            metadata = _load_metadata(bucket, prefix, artifact_prefix)
+            if metadata:
+                body["result"] = metadata
+        if status == "FAILED":
+            body["error"] = output_doc.get("error")
+    return request.respond(body)

compose_runner/ecs_task.py ADDED Viewed

@@ -0,0 +1,145 @@
+from __future__ import annotations
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, Iterable, Optional
+import boto3
+from compose_runner.run import run as run_compose
+NUMBA_CACHE_DIR = Path(os.environ.get("NUMBA_CACHE_DIR", "/tmp/numba_cache"))
+NUMBA_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+os.environ["NUMBA_CACHE_DIR"] = str(NUMBA_CACHE_DIR)
+logger = logging.getLogger("compose_runner.ecs_task")
+handler = logging.StreamHandler(sys.stdout)
+formatter = logging.Formatter("%(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+_S3_CLIENT = boto3.client("s3", region_name=os.environ.get("AWS_REGION", "us-east-1"))
+RESULTS_BUCKET_ENV = "RESULTS_BUCKET"
+RESULTS_PREFIX_ENV = "RESULTS_PREFIX"
+ARTIFACT_PREFIX_ENV = "ARTIFACT_PREFIX"
+META_ANALYSIS_ENV = "META_ANALYSIS_ID"
+ENVIRONMENT_ENV = "ENVIRONMENT"
+NSC_KEY_ENV = "NSC_KEY"
+NV_KEY_ENV = "NV_KEY"
+NO_UPLOAD_ENV = "NO_UPLOAD"
+N_CORES_ENV = "N_CORES"
+DELETE_TMP_ENV = "DELETE_TMP"
+METADATA_FILENAME = "metadata.json"
+def _log(artifact_prefix: str, message: str, **details: Any) -> None:
+    payload = {"artifact_prefix": artifact_prefix, "message": message, **details}
+    logger.info(json.dumps(payload))
+def _iter_result_files(result_dir: Path) -> Iterable[Path]:
+    for path in result_dir.iterdir():
+        if path.is_file():
+            yield path
+def _upload_results(artifact_prefix: str, result_dir: Path, bucket: str, prefix: Optional[str]) -> None:
+    base_prefix = f"{prefix.rstrip('/')}/{artifact_prefix}" if prefix else artifact_prefix
+    for file_path in _iter_result_files(result_dir):
+        key = f"{base_prefix}/{file_path.name}"
+        _S3_CLIENT.upload_file(str(file_path), bucket, key)
+def _write_metadata(bucket: str, prefix: Optional[str], artifact_prefix: str, metadata: Dict[str, Any]) -> None:
+    base_prefix = f"{prefix.rstrip('/')}/{artifact_prefix}" if prefix else artifact_prefix
+    key = f"{base_prefix}/{METADATA_FILENAME}"
+    metadata["metadata_key"] = key
+    _S3_CLIENT.put_object(
+        Bucket=bucket,
+        Key=key,
+        Body=json.dumps(metadata).encode("utf-8"),
+        ContentType="application/json",
+    )
+def _bool_from_env(value: Optional[str]) -> bool:
+    if value is None:
+        return False
+    return value.lower() in {"1", "true", "t", "yes", "y"}
+def main() -> None:
+    if ARTIFACT_PREFIX_ENV not in os.environ:
+        raise RuntimeError(f"{ARTIFACT_PREFIX_ENV} environment variable must be set.")
+    if META_ANALYSIS_ENV not in os.environ:
+        raise RuntimeError(f"{META_ANALYSIS_ENV} environment variable must be set.")
+    artifact_prefix = os.environ[ARTIFACT_PREFIX_ENV]
+    meta_analysis_id = os.environ[META_ANALYSIS_ENV]
+    environment = os.environ.get(ENVIRONMENT_ENV, "production")
+    nsc_key = os.environ.get(NSC_KEY_ENV) or None
+    nv_key = os.environ.get(NV_KEY_ENV) or None
+    no_upload = _bool_from_env(os.environ.get(NO_UPLOAD_ENV))
+    n_cores_value = os.environ.get(N_CORES_ENV)
+    n_cores = int(n_cores_value) if n_cores_value else None
+    bucket = os.environ.get(RESULTS_BUCKET_ENV)
+    prefix = os.environ.get(RESULTS_PREFIX_ENV)
+    result_dir = Path("/tmp") / artifact_prefix
+    result_dir.mkdir(parents=True, exist_ok=True)
+    _log(
+        artifact_prefix,
+        "workflow.start",
+        meta_analysis_id=meta_analysis_id,
+        environment=environment,
+        no_upload=no_upload,
+    )
+    try:
+        url, _ = run_compose(
+            meta_analysis_id=meta_analysis_id,
+            environment=environment,
+            result_dir=str(result_dir),
+            nsc_key=nsc_key,
+            nv_key=nv_key,
+            no_upload=no_upload,
+            n_cores=n_cores,
+        )
+        _log(artifact_prefix, "workflow.completed", result_url=url)
+        metadata: Dict[str, Any] = {
+            "artifact_prefix": artifact_prefix,
+            "meta_analysis_id": meta_analysis_id,
+            "result_url": url,
+            "artifacts_bucket": bucket,
+            "artifacts_prefix": prefix,
+        }
+        if bucket:
+            _upload_results(artifact_prefix, result_dir, bucket, prefix)
+            _log(artifact_prefix, "artifacts.uploaded", bucket=bucket, prefix=prefix)
+            _write_metadata(bucket, prefix, artifact_prefix, metadata)
+            _log(artifact_prefix, "metadata.written", bucket=bucket, prefix=prefix)
+        _log(artifact_prefix, "workflow.success", result_url=url)
+    except Exception as exc:  # noqa: broad-except
+        _log(artifact_prefix, "workflow.failed", error=str(exc))
+        raise
+    finally:
+        delete_tmp = _bool_from_env(os.environ.get(DELETE_TMP_ENV, "true"))
+        if delete_tmp:
+            for path in _iter_result_files(result_dir):
+                try:
+                    path.unlink()
+                except OSError:
+                    _log(artifact_prefix, "cleanup.warning", file=str(path))
+if __name__ == "__main__":
+    main()

compose_runner/tests/test_lambda_handlers.py CHANGED Viewed

@@ -1,11 +1,10 @@
 from __future__ import annotations
 import json
+from datetime import datetime, timezone
 from typing import Any, Dict
-import pytest
-from compose_runner.aws_lambda import log_poll_handler, results_handler, run_handler
+from compose_runner.aws_lambda import log_poll_handler, results_handler, run_handler, status_handler
 class DummyContext:
@@ -25,42 +24,49 @@ def _make_http_event(payload: Dict[str, Any]) -> Dict[str, Any]:
 def test_run_handler_http_success(monkeypatch, tmp_path):
-    called = {}
-    def fake_run(**kwargs):
-        called.update(kwargs)
-        return "https://result/url", None
+    captured = {}
-    uploads = []
+    class FakeSFN:
+        def start_execution(self, **kwargs):
+            captured.update(kwargs)
+            return {"executionArn": "arn:aws:states:us-east-1:123:execution:state-machine:run-123"}
-    class FakeS3:
-        def upload_file(self, filename, bucket, key):
-            uploads.append((filename, bucket, key))
+        class exceptions:
+            class ExecutionAlreadyExists(Exception):
+                ...
-    monkeypatch.setattr(run_handler, "run_compose", fake_run)
-    monkeypatch.setattr(run_handler, "_S3_CLIENT", FakeS3())
+    monkeypatch.setattr(run_handler, "_SFN_CLIENT", FakeSFN())
+    monkeypatch.setenv("STATE_MACHINE_ARN", "arn:aws:states:state-machine")
     monkeypatch.setenv("RESULTS_BUCKET", "bucket")
     monkeypatch.setenv("RESULTS_PREFIX", "prefix")
     monkeypatch.setenv("NSC_KEY", "nsc")
     monkeypatch.setenv("NV_KEY", "nv")
-    event = _make_http_event({"meta_analysis_id": "abc123", "environment": "production"})
-    context = DummyContext("job-456")
+    event = _make_http_event(
+        {"meta_analysis_id": "abc123", "environment": "production", "artifact_prefix": "artifact-123"}
+    )
+    context = DummyContext("unused")
     response = run_handler.handler(event, context)
     body = json.loads(response["body"])
-    assert response["statusCode"] == 200
-    assert body["job_id"] == "job-456"
-    assert body["status"] == "SUCCEEDED"
-    assert called["meta_analysis_id"] == "abc123"
-    assert called["environment"] == "production"
-    assert called["nsc_key"] == "nsc"
-    assert called["nv_key"] == "nv"
-    assert uploads == []  # no files written during test
+    assert response["statusCode"] == 202
+    assert body["job_id"].startswith("arn:aws:states")
+    assert body["artifact_prefix"] == "artifact-123"
+    assert body["status"] == "SUBMITTED"
+    assert captured["name"] == "artifact-123"
+    input_doc = json.loads(captured["input"])
+    assert input_doc["artifact_prefix"] == "artifact-123"
+    assert input_doc["meta_analysis_id"] == "abc123"
+    assert input_doc["environment"] == "production"
+    assert input_doc["results"]["bucket"] == "bucket"
+    assert input_doc["results"]["prefix"] == "prefix"
+    assert input_doc["nsc_key"] == "nsc"
+    assert input_doc["nv_key"] == "nv"
 def test_run_handler_missing_meta_analysis(monkeypatch):
+    monkeypatch.setenv("STATE_MACHINE_ARN", "arn:aws:states:state-machine")
     event = _make_http_event({"environment": "production"})
     response = run_handler.handler(event, DummyContext())
     body = json.loads(response["body"])
@@ -80,9 +86,9 @@ def test_log_poll_handler(monkeypatch):
     monkeypatch.setenv("DEFAULT_LOOKBACK_MS", "1000")
     monkeypatch.setattr(log_poll_handler, "_LOGS_CLIENT", FakeLogs())
-    event = {"job_id": "id"}
+    event = {"artifact_prefix": "id"}
     result = log_poll_handler.handler(event, DummyContext())
-    assert result["job_id"] == "id"
+    assert result["artifact_prefix"] == "id"
     assert result["next_token"] == "token-1"
     assert result["events"][0]["message"] == events_payload[0]["message"]
@@ -94,7 +100,7 @@ def test_log_poll_handler_http_missing_job_id(monkeypatch):
     body = json.loads(response["body"])
     assert response["statusCode"] == 400
     assert body["status"] == "FAILED"
-    assert "job_id" in body["error"]
+    assert "artifact_prefix" in body["error"]
 def test_results_handler(monkeypatch):
@@ -119,15 +125,66 @@ def test_results_handler(monkeypatch):
     monkeypatch.setenv("RESULTS_PREFIX", "prefix")
     monkeypatch.setattr(results_handler, "_S3", FakeS3())
-    event = _make_http_event({"job_id": "id"})
+    event = _make_http_event({"artifact_prefix": "id"})
     response = results_handler.handler(event, DummyContext())
     body = json.loads(response["body"])
     assert response["statusCode"] == 200
-    assert body["job_id"] == "id"
+    assert body["artifact_prefix"] == "id"
     assert body["artifacts"][0]["url"] == "https://signed/url"
     assert body["artifacts"][0]["filename"] == "file1.nii.gz"
+def test_status_handler_succeeded(monkeypatch):
+    start = datetime(2024, 1, 1, tzinfo=timezone.utc)
+    stop = datetime(2024, 1, 1, 1, tzinfo=timezone.utc)
+    output_payload = {"results": {"bucket": "bucket", "prefix": "prefix"}}
+    class FakeBody:
+        def __init__(self, data):
+            self._data = data
+        def read(self):
+            return self._data
+    class FakeSFN:
+        def describe_execution(self, **kwargs):
+            return {
+                "status": "SUCCEEDED",
+                "name": "artifact-1",
+                "startDate": start,
+                "stopDate": stop,
+                "output": json.dumps(output_payload),
+            }
+    class FakeS3:
+        def get_object(self, Bucket, Key):
+            assert Bucket == "bucket"
+            assert Key == "prefix/artifact-1/metadata.json"
+            metadata = {"artifact_prefix": "artifact-1", "result_url": "https://results"}
+            return {"Body": FakeBody(json.dumps(metadata).encode("utf-8"))}
+    monkeypatch.setattr(status_handler, "_SFN", FakeSFN())
+    monkeypatch.setattr(status_handler, "_S3", FakeS3())
+    event = _make_http_event({"job_id": "arn:execution"})
+    response = status_handler.handler(event, DummyContext())
+    body = json.loads(response["body"])
+    assert response["statusCode"] == 200
+    assert body["status"] == "SUCCEEDED"
+    assert body["artifact_prefix"] == "artifact-1"
+    assert body["result"]["result_url"] == "https://results"
+def test_status_handler_missing_job_id(monkeypatch):
+    event = _make_http_event({})
+    response = status_handler.handler(event, DummyContext())
+    body = json.loads(response["body"])
+    assert response["statusCode"] == 400
+    assert body["status"] == "FAILED"
+    assert "job_id" in body["error"]
 def test_results_handler_missing_job_id(monkeypatch):
     monkeypatch.setenv("RESULTS_BUCKET", "bucket")
     event = _make_http_event({})
@@ -135,4 +192,4 @@ def test_results_handler_missing_job_id(monkeypatch):
     body = json.loads(response["body"])
     assert response["statusCode"] == 400
     assert body["status"] == "FAILED"
-    assert "job_id" in body["error"]
+    assert "artifact_prefix" in body["error"]

compose_runner-0.6.2rc1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,79 @@
+Metadata-Version: 2.4
+Name: compose-runner
+Version: 0.6.2rc1
+Summary: A package for running neurosynth-compose analyses
+Project-URL: Repository, https://github.com/neurostuff/compose-runner
+Author-email: James Kent <jamesdkent21@gmail.com>
+License: BSD 3-Clause License
+License-File: LICENSE
+Keywords: meta-analysis,neuroimaging,neurosynth,neurosynth-compose
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Programming Language :: Python :: 3
+Requires-Dist: click
+Requires-Dist: nimare
+Requires-Dist: numpy
+Requires-Dist: sentry-sdk
+Provides-Extra: aws
+Requires-Dist: boto3; extra == 'aws'
+Provides-Extra: tests
+Requires-Dist: pytest; extra == 'tests'
+Requires-Dist: pytest-recording; extra == 'tests'
+Requires-Dist: vcrpy; extra == 'tests'
+Description-Content-Type: text/markdown
+# compose-runner
+Python package to execute meta-analyses created using neurosynth compose and NiMARE
+as the meta-analysis execution engine.
+## AWS Deployment
+This repository includes an AWS CDK application that turns compose-runner into a
+serverless batch pipeline using Step Functions, AWS Lambda, and ECS Fargate.
+The deployed architecture works like this:
+- `ComposeRunnerSubmit` (Lambda Function URL) accepts HTTP requests, validates
+  the meta-analysis payload, and starts a Step Functions execution. The response
+  is immediate and returns both a durable `job_id` (the execution ARN) and the
+  `artifact_prefix` used for S3 and log correlation.
+- A Standard state machine runs a single Fargate task (`compose_runner.ecs_task`)
+  and waits for completion. The container downloads inputs, executes the
+  meta-analysis on up to 4 vCPU / 30 GiB of memory, uploads artifacts to S3, and
+  writes `metadata.json` into the same prefix.
+- `ComposeRunnerStatus` (Lambda Function URL) wraps `DescribeExecution`, merges
+  metadata from S3, and exposes a simple status endpoint suitable for polling.
+- `ComposeRunnerLogPoller` streams the ECS CloudWatch Logs for a given `artifact_prefix`,
+  while `ComposeRunnerResultsFetcher` returns presigned URLs for stored artifacts.
+1. Create a virtual environment and install the CDK dependencies:
+   ```bash
+   cd infra/cdk
+   python -m venv .venv
+   source .venv/bin/activate
+   pip install -r requirements.txt
+   ```
+2. (One-time per account/region) bootstrap the CDK environment:
+   ```bash
+   cdk bootstrap
+   ```
+3. Deploy the stack (supplying the compose-runner version you want baked into the images):
+   ```bash
+   cdk deploy \
+     -c composeRunnerVersion=$(hatch version) \
+     -c resultsPrefix=compose-runner/results \
+     -c taskCpu=4096 \
+     -c taskMemoryMiB=30720
+   ```
+   Pass `-c resultsBucketName=<bucket>` to use an existing S3 bucket, or omit it
+   to let the stack create and retain a dedicated bucket. Additional knobs:
+   - `-c stateMachineTimeoutSeconds=7200` to control the max wall clock per run
+   - `-c submitTimeoutSeconds` / `-c statusTimeoutSeconds` / `-c pollTimeoutSeconds`
+     to tune Lambda timeouts
+   - `-c taskEphemeralStorageGiB` if the default 21 GiB scratch volume is insufficient
+The deployment builds both the Lambda image (`aws_lambda/Dockerfile`) and the
+Fargate task image (`Dockerfile`), provisions the Step Functions state machine,
+and configures a public VPC so each task has outbound internet access.
+The CloudFormation outputs list the HTTPS endpoints for submission, status,
+logs, and artifact retrieval, alongside the Step Functions ARN.

{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/RECORD RENAMED Viewed

@@ -1,23 +1,26 @@
 compose_runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-compose_runner/_version.py,sha256=7vNQiXfKffK0nbqts6Xy6-E1b1YOm4EGigvgaHr83o4,704
+compose_runner/_version.py,sha256=PQv64kDBSWybHkMaDVivQRRmALSrws0RGcr_Z0k2xNY,714
 compose_runner/cli.py,sha256=1tkxFgEe8Yk7VkzE8qxGmCGqLU7UbGin2VaP0AiZkVg,1101
+compose_runner/ecs_task.py,sha256=5-DbbcwfAqqkYRUuPADIlYatp5NK70uJYCo06O3IcdM,4997
 compose_runner/run.py,sha256=yIh8Fj8dfVKvahRl483qGOsDUoAS1FdsYrKZp_HknGo,18525
 compose_runner/sentry.py,sha256=pjqwsZrXrKB0cCy-TL-_2eYJIqUU0aV-8e0SWUk-9Xw,320
 compose_runner/aws_lambda/__init__.py,sha256=yZNXXv7gCPSrtLCEX5Qf4cnzSTS3fHPV6k-SyZwiZIA,48
-compose_runner/aws_lambda/log_poll_handler.py,sha256=GXdGmHahH-pizAR7AIGSFotgnmPR8fG6pHaa9SLca78,2516
-compose_runner/aws_lambda/results_handler.py,sha256=XsykZ91p_PgcawocPozftDJhGGVbq2KD4nLjRoSqYGU,2714
-compose_runner/aws_lambda/run_handler.py,sha256=bJGrtasNd9C7kgAH-k-vO-4m-UKxJ9SMmDApTJ0Mk_c,4563
+compose_runner/aws_lambda/common.py,sha256=cA2G5lO4P8uVBqJaYcU6Y3P3t3syoTmk4SpLKZhAFo8,1688
+compose_runner/aws_lambda/log_poll_handler.py,sha256=eEU-Ra_-17me3e4eqSTd2Nv_qoaOl7zi3kIxD58Tbek,1905
+compose_runner/aws_lambda/results_handler.py,sha256=vSxs4nbWyBmkFFKRGIp5-T4W2hPh9zgj7uNH-e18aW8,2107
+compose_runner/aws_lambda/run_handler.py,sha256=iZW35xqa9FBZQTxRBH0JOpYhQ3Si1eTkxFHB5VJ5drA,3981
+compose_runner/aws_lambda/status_handler.py,sha256=K_VDyPYY3ExiyalDyf35nXi3UZzqj4AenWmlxkzWNXo,3423
 compose_runner/tests/conftest.py,sha256=ijb1iw724izKMxrvclt5x7LljTGoBfHwSS-jIEUe-sQ,191
 compose_runner/tests/test_cli.py,sha256=G3Kz7Nbl2voJ_luXPL7E6slkRNF9lmcpZ-nHBAqeL-M,290
-compose_runner/tests/test_lambda_handlers.py,sha256=zadS-7HwBX-JZyyatWMSNoDOaSafb11_p1YpvxFgl2E,4757
+compose_runner/tests/test_lambda_handlers.py,sha256=A13q8KEMzxFPdqQYOIEqhYvEwKN1-3SPtGN9nRvLvtU,7115
 compose_runner/tests/test_run.py,sha256=Nhx7wz8XxQuxy3kT5yoE_S1Hw0Mgmfn8TWYOZXm1_Gg,1795
 compose_runner/tests/cassettes/test_run/test_download_bundle.yaml,sha256=vgdGDqirjBHosQsspkaN5Ty6XqJbkYUAbGtdImym5xI,79304
 compose_runner/tests/cassettes/test_run/test_run_database_workflow.yaml,sha256=ay0aHtU-nmVWvbmN_EIgO9MMkC4ZeQljKU8nkTXOoDw,8724312
 compose_runner/tests/cassettes/test_run/test_run_group_comparison_workflow.yaml,sha256=FaZpMdcaM7TMgyueyZBGftm6ywUh1HhtGmCegXUmRFA,4029712
 compose_runner/tests/cassettes/test_run/test_run_string_group_comparison_workflow.yaml,sha256=pcn6tQwrimhDtP8yJ3jFlsfEOnk8FWybYQr9IQ5A_KA,3233839
 compose_runner/tests/cassettes/test_run/test_run_workflow.yaml,sha256=0Nk7eJWAmgYALG2ODrezbRhpYsc00JiuYVjXt3TUm5c,3857234
-compose_runner-0.6.1.dist-info/METADATA,sha256=tOKPEgafEThNEAYz7B-vKNP1sS1YIp51PXXcwNOja6s,2443
-compose_runner-0.6.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
-compose_runner-0.6.1.dist-info/entry_points.txt,sha256=TyPmB9o2tSWw8L3mcach9r2EL7inRVXE9ew3_XReMIY,55
-compose_runner-0.6.1.dist-info/licenses/LICENSE,sha256=PeiWxrrRme2rIpPMV9vjgGe7UHEKCIcTb0KagYhnyqo,1313
-compose_runner-0.6.1.dist-info/RECORD,,
+compose_runner-0.6.2rc1.dist-info/METADATA,sha256=8_a_K_vWLHqO6Kz-KyARz2NOt0WZ2MfTBf-pHQ2MppE,3435
+compose_runner-0.6.2rc1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
+compose_runner-0.6.2rc1.dist-info/entry_points.txt,sha256=TyPmB9o2tSWw8L3mcach9r2EL7inRVXE9ew3_XReMIY,55
+compose_runner-0.6.2rc1.dist-info/licenses/LICENSE,sha256=PeiWxrrRme2rIpPMV9vjgGe7UHEKCIcTb0KagYhnyqo,1313
+compose_runner-0.6.2rc1.dist-info/RECORD,,

compose_runner-0.6.1.dist-info/METADATA DELETED Viewed

@@ -1,62 +0,0 @@
-Metadata-Version: 2.4
-Name: compose-runner
-Version: 0.6.1
-Summary: A package for running neurosynth-compose analyses
-Project-URL: Repository, https://github.com/neurostuff/compose-runner
-Author-email: James Kent <jamesdkent21@gmail.com>
-License: BSD 3-Clause License
-License-File: LICENSE
-Keywords: meta-analysis,neuroimaging,neurosynth,neurosynth-compose
-Classifier: License :: OSI Approved :: BSD License
-Classifier: Programming Language :: Python :: 3
-Requires-Dist: click
-Requires-Dist: nimare
-Requires-Dist: numpy
-Requires-Dist: sentry-sdk
-Provides-Extra: aws
-Requires-Dist: boto3; extra == 'aws'
-Provides-Extra: tests
-Requires-Dist: pytest; extra == 'tests'
-Requires-Dist: pytest-recording; extra == 'tests'
-Requires-Dist: vcrpy; extra == 'tests'
-Description-Content-Type: text/markdown
-# compose-runner
-Python package to execute meta-analyses created using neurosynth compose and NiMARE
-as the meta-analysis execution engine.
-## AWS Lambda Deployment
-This repository includes an AWS CDK application for provisioning the Lambda-based
-execution environment and log polling function.
-1. Create a virtual environment and install the CDK dependencies:
-   ```bash
-   cd infra/cdk
-   python -m venv .venv
-   source .venv/bin/activate
-   pip install -r requirements.txt
-   ```
-2. (One-time per account/region) bootstrap the CDK environment:
-   ```bash
-   cdk bootstrap
-   ```
-3. Deploy the stack (supplying the compose-runner version you want baked into the Lambda image):
-   ```bash
-   cdk deploy \
-     -c composeRunnerVersion=$(hatch version) \
-     -c resultsPrefix=compose-runner/results \
-     -c runMemorySize=3008 \
-     -c runTimeoutSeconds=900
-   ```
-   The deployment output includes HTTPS endpoints for submitting runs (`ComposeRunnerFunctionUrl`), polling logs (`ComposeRunnerLogPollerFunctionUrl`), and fetching presigned S3 URLs (`ComposeRunnerResultsFunctionUrl`).
-   Omit `resultsBucketName` to let the stack create a managed bucket, or pass an
-   existing bucket name via `-c resultsBucketName=<bucket>`.
-The deployment builds the Lambda container image from `aws_lambda/Dockerfile`,
-creates two functions (`ComposeRunnerFunction` and `ComposeRunnerLogPoller`),
-and provisions the S3 bucket used to store generated artifacts (including
-`meta_results.pkl`). The log poller function expects clients to call it with a
-job ID (the run Lambda invocation request ID) and returns filtered CloudWatch Logs
-entries for that job.

{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/WHEEL RENAMED Viewed

File without changes

{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{compose_runner-0.6.1.dist-info → compose_runner-0.6.2rc1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

compose-runner 0.6.1__py2.py3-none-any.whl → 0.6.2rc1__py2.py3-none-any.whl

compose-runner 0.6.1py2.py3-none-any.whl → 0.6.2rc1py2.py3-none-any.whl