compose-runner 0.6.1__tar.gz → 0.6.2rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/Dockerfile +2 -2
  2. compose_runner-0.6.2rc1/PKG-INFO +79 -0
  3. compose_runner-0.6.2rc1/README.md +56 -0
  4. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/_version.py +2 -2
  5. compose_runner-0.6.2rc1/compose_runner/aws_lambda/common.py +60 -0
  6. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/aws_lambda/log_poll_handler.py +15 -39
  7. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/aws_lambda/results_handler.py +13 -38
  8. compose_runner-0.6.2rc1/compose_runner/aws_lambda/run_handler.py +115 -0
  9. compose_runner-0.6.2rc1/compose_runner/aws_lambda/status_handler.py +102 -0
  10. compose_runner-0.6.2rc1/compose_runner/ecs_task.py +145 -0
  11. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/test_lambda_handlers.py +87 -30
  12. compose_runner-0.6.2rc1/infra/cdk/stacks/compose_runner_stack.py +339 -0
  13. compose_runner-0.6.1/PKG-INFO +0 -62
  14. compose_runner-0.6.1/README.md +0 -39
  15. compose_runner-0.6.1/compose_runner/aws_lambda/run_handler.py +0 -137
  16. compose_runner-0.6.1/infra/cdk/stacks/compose_runner_stack.py +0 -162
  17. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/.gitignore +0 -0
  18. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/LICENSE +0 -0
  19. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/aws_lambda/.dockerignore +0 -0
  20. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/aws_lambda/Dockerfile +0 -0
  21. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/__init__.py +0 -0
  22. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/aws_lambda/__init__.py +0 -0
  23. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/cli.py +0 -0
  24. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/run.py +0 -0
  25. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/sentry.py +0 -0
  26. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/cassettes/test_run/test_download_bundle.yaml +0 -0
  27. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/cassettes/test_run/test_run_database_workflow.yaml +0 -0
  28. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/cassettes/test_run/test_run_group_comparison_workflow.yaml +0 -0
  29. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/cassettes/test_run/test_run_string_group_comparison_workflow.yaml +0 -0
  30. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/cassettes/test_run/test_run_workflow.yaml +0 -0
  31. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/conftest.py +0 -0
  32. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/test_cli.py +0 -0
  33. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/compose_runner/tests/test_run.py +0 -0
  34. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/infra/cdk/app.py +0 -0
  35. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/infra/cdk/cdk.json +0 -0
  36. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/infra/cdk/requirements.txt +0 -0
  37. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/infra/cdk/stacks/__init__.py +0 -0
  38. {compose_runner-0.6.1 → compose_runner-0.6.2rc1}/pyproject.toml +0 -0
@@ -14,7 +14,7 @@ RUN hatch dep show requirements > requirements.txt && pip install -r requirement
14
14
 
15
15
  COPY . .
16
16
 
17
- # install the package (more likely to change, leverage caching!)
18
- RUN pip install .
17
+ # install the package with AWS extras so the ECS task has boto3, etc.
18
+ RUN pip install '.[aws]'
19
19
 
20
20
  ENTRYPOINT ["compose-run"]
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: compose-runner
3
+ Version: 0.6.2rc1
4
+ Summary: A package for running neurosynth-compose analyses
5
+ Project-URL: Repository, https://github.com/neurostuff/compose-runner
6
+ Author-email: James Kent <jamesdkent21@gmail.com>
7
+ License: BSD 3-Clause License
8
+ License-File: LICENSE
9
+ Keywords: meta-analysis,neuroimaging,neurosynth,neurosynth-compose
10
+ Classifier: License :: OSI Approved :: BSD License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Dist: click
13
+ Requires-Dist: nimare
14
+ Requires-Dist: numpy
15
+ Requires-Dist: sentry-sdk
16
+ Provides-Extra: aws
17
+ Requires-Dist: boto3; extra == 'aws'
18
+ Provides-Extra: tests
19
+ Requires-Dist: pytest; extra == 'tests'
20
+ Requires-Dist: pytest-recording; extra == 'tests'
21
+ Requires-Dist: vcrpy; extra == 'tests'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # compose-runner
25
+
26
+ Python package to execute meta-analyses created using neurosynth compose and NiMARE
27
+ as the meta-analysis execution engine.
28
+
29
+ ## AWS Deployment
30
+
31
+ This repository includes an AWS CDK application that turns compose-runner into a
32
+ serverless batch pipeline using Step Functions, AWS Lambda, and ECS Fargate.
33
+ The deployed architecture works like this:
34
+
35
+ - `ComposeRunnerSubmit` (Lambda Function URL) accepts HTTP requests, validates
36
+ the meta-analysis payload, and starts a Step Functions execution. The response
37
+ is immediate and returns both a durable `job_id` (the execution ARN) and the
38
+ `artifact_prefix` used for S3 and log correlation.
39
+ - A Standard state machine runs a single Fargate task (`compose_runner.ecs_task`)
40
+ and waits for completion. The container downloads inputs, executes the
41
+ meta-analysis on up to 4 vCPU / 30 GiB of memory, uploads artifacts to S3, and
42
+ writes `metadata.json` into the same prefix.
43
+ - `ComposeRunnerStatus` (Lambda Function URL) wraps `DescribeExecution`, merges
44
+ metadata from S3, and exposes a simple status endpoint suitable for polling.
45
+ - `ComposeRunnerLogPoller` streams the ECS CloudWatch Logs for a given `artifact_prefix`,
46
+ while `ComposeRunnerResultsFetcher` returns presigned URLs for stored artifacts.
47
+
48
+ 1. Create a virtual environment and install the CDK dependencies:
49
+ ```bash
50
+ cd infra/cdk
51
+ python -m venv .venv
52
+ source .venv/bin/activate
53
+ pip install -r requirements.txt
54
+ ```
55
+ 2. (One-time per account/region) bootstrap the CDK environment:
56
+ ```bash
57
+ cdk bootstrap
58
+ ```
59
+ 3. Deploy the stack (supplying the compose-runner version you want baked into the images):
60
+ ```bash
61
+ cdk deploy \
62
+ -c composeRunnerVersion=$(hatch version) \
63
+ -c resultsPrefix=compose-runner/results \
64
+ -c taskCpu=4096 \
65
+ -c taskMemoryMiB=30720
66
+ ```
67
+ Pass `-c resultsBucketName=<bucket>` to use an existing S3 bucket, or omit it
68
+ to let the stack create and retain a dedicated bucket. Additional knobs:
69
+
70
+ - `-c stateMachineTimeoutSeconds=7200` to control the max wall clock per run
71
+ - `-c submitTimeoutSeconds` / `-c statusTimeoutSeconds` / `-c pollTimeoutSeconds`
72
+ to tune Lambda timeouts
73
+ - `-c taskEphemeralStorageGiB` if the default 21 GiB scratch volume is insufficient
74
+
75
+ The deployment builds both the Lambda image (`aws_lambda/Dockerfile`) and the
76
+ Fargate task image (`Dockerfile`), provisions the Step Functions state machine,
77
+ and configures a public VPC so each task has outbound internet access.
78
+ The CloudFormation outputs list the HTTPS endpoints for submission, status,
79
+ logs, and artifact retrieval, alongside the Step Functions ARN.
@@ -0,0 +1,56 @@
1
+ # compose-runner
2
+
3
+ Python package to execute meta-analyses created using neurosynth compose and NiMARE
4
+ as the meta-analysis execution engine.
5
+
6
+ ## AWS Deployment
7
+
8
+ This repository includes an AWS CDK application that turns compose-runner into a
9
+ serverless batch pipeline using Step Functions, AWS Lambda, and ECS Fargate.
10
+ The deployed architecture works like this:
11
+
12
+ - `ComposeRunnerSubmit` (Lambda Function URL) accepts HTTP requests, validates
13
+ the meta-analysis payload, and starts a Step Functions execution. The response
14
+ is immediate and returns both a durable `job_id` (the execution ARN) and the
15
+ `artifact_prefix` used for S3 and log correlation.
16
+ - A Standard state machine runs a single Fargate task (`compose_runner.ecs_task`)
17
+ and waits for completion. The container downloads inputs, executes the
18
+ meta-analysis on up to 4 vCPU / 30 GiB of memory, uploads artifacts to S3, and
19
+ writes `metadata.json` into the same prefix.
20
+ - `ComposeRunnerStatus` (Lambda Function URL) wraps `DescribeExecution`, merges
21
+ metadata from S3, and exposes a simple status endpoint suitable for polling.
22
+ - `ComposeRunnerLogPoller` streams the ECS CloudWatch Logs for a given `artifact_prefix`,
23
+ while `ComposeRunnerResultsFetcher` returns presigned URLs for stored artifacts.
24
+
25
+ 1. Create a virtual environment and install the CDK dependencies:
26
+ ```bash
27
+ cd infra/cdk
28
+ python -m venv .venv
29
+ source .venv/bin/activate
30
+ pip install -r requirements.txt
31
+ ```
32
+ 2. (One-time per account/region) bootstrap the CDK environment:
33
+ ```bash
34
+ cdk bootstrap
35
+ ```
36
+ 3. Deploy the stack (supplying the compose-runner version you want baked into the images):
37
+ ```bash
38
+ cdk deploy \
39
+ -c composeRunnerVersion=$(hatch version) \
40
+ -c resultsPrefix=compose-runner/results \
41
+ -c taskCpu=4096 \
42
+ -c taskMemoryMiB=30720
43
+ ```
44
+ Pass `-c resultsBucketName=<bucket>` to use an existing S3 bucket, or omit it
45
+ to let the stack create and retain a dedicated bucket. Additional knobs:
46
+
47
+ - `-c stateMachineTimeoutSeconds=7200` to control the max wall clock per run
48
+ - `-c submitTimeoutSeconds` / `-c statusTimeoutSeconds` / `-c pollTimeoutSeconds`
49
+ to tune Lambda timeouts
50
+ - `-c taskEphemeralStorageGiB` if the default 21 GiB scratch volume is insufficient
51
+
52
+ The deployment builds both the Lambda image (`aws_lambda/Dockerfile`) and the
53
+ Fargate task image (`Dockerfile`), provisions the Step Functions state machine,
54
+ and configures a public VPC so each task has outbound internet access.
55
+ The CloudFormation outputs list the HTTPS endpoints for submission, status,
56
+ logs, and artifact retrieval, alongside the Step Functions ARN.
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.6.1'
32
- __version_tuple__ = version_tuple = (0, 6, 1)
31
+ __version__ = version = '0.6.2rc1'
32
+ __version_tuple__ = version_tuple = (0, 6, 2, 'rc1')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import json
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, Optional
7
+
8
+
9
+ def is_http_event(event: Any) -> bool:
10
+ return isinstance(event, dict) and "requestContext" in event
11
+
12
+
13
+ def _decode_body(event: Dict[str, Any]) -> Optional[str]:
14
+ body = event.get("body")
15
+ if not body:
16
+ return None
17
+ if event.get("isBase64Encoded"):
18
+ return base64.b64decode(body).decode("utf-8")
19
+ return body
20
+
21
+
22
+ def extract_payload(event: Dict[str, Any]) -> Dict[str, Any]:
23
+ if not is_http_event(event):
24
+ return event
25
+ body = _decode_body(event)
26
+ if not body:
27
+ return {}
28
+ return json.loads(body)
29
+
30
+
31
+ def http_response(body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
32
+ return {
33
+ "statusCode": status_code,
34
+ "headers": {"Content-Type": "application/json"},
35
+ "body": json.dumps(body),
36
+ }
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class LambdaRequest:
41
+ raw_event: Any
42
+ payload: Dict[str, Any]
43
+ is_http: bool
44
+
45
+ @classmethod
46
+ def parse(cls, event: Any) -> "LambdaRequest":
47
+ payload = extract_payload(event)
48
+ return cls(raw_event=event, payload=payload, is_http=is_http_event(event))
49
+
50
+ def respond(self, body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
51
+ if self.is_http:
52
+ return http_response(body, status_code)
53
+ return body
54
+
55
+ def bad_request(self, message: str, status_code: int = 400) -> Dict[str, Any]:
56
+ return self.respond({"status": "FAILED", "error": message}, status_code=status_code)
57
+
58
+ def get(self, key: str, default: Any = None) -> Any:
59
+ return self.payload.get(key, default)
60
+
@@ -2,52 +2,30 @@ from __future__ import annotations
2
2
 
3
3
  import os
4
4
  import time
5
- import base64
6
- import json
7
5
  from typing import Any, Dict, List
8
6
 
9
7
  import boto3
10
8
 
9
+ from compose_runner.aws_lambda.common import LambdaRequest
10
+
11
11
  _LOGS_CLIENT = boto3.client("logs", region_name=os.environ.get("AWS_REGION", "us-east-1"))
12
12
 
13
13
  LOG_GROUP_ENV = "RUNNER_LOG_GROUP"
14
14
  DEFAULT_LOOKBACK_MS_ENV = "DEFAULT_LOOKBACK_MS"
15
15
 
16
- def _is_http_event(event: Any) -> bool:
17
- return isinstance(event, dict) and "requestContext" in event
18
-
19
-
20
- def _extract_payload(event: Dict[str, Any]) -> Dict[str, Any]:
21
- if not _is_http_event(event):
22
- return event
23
- body = event.get("body")
24
- if not body:
25
- return {}
26
- if event.get("isBase64Encoded"):
27
- body = base64.b64decode(body).decode("utf-8")
28
- return json.loads(body)
29
-
30
-
31
- def _http_response(body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
32
- return {
33
- "statusCode": status_code,
34
- "headers": {"Content-Type": "application/json"},
35
- "body": json.dumps(body),
36
- }
37
-
38
16
 
39
17
  def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
40
- raw_event = event
41
- event = _extract_payload(event)
42
- job_id = event.get("job_id")
43
- if not job_id:
44
- message = "Request payload must include 'job_id'."
45
- if _is_http_event(raw_event):
46
- return _http_response({"status": "FAILED", "error": message}, status_code=400)
18
+ request = LambdaRequest.parse(event)
19
+ payload = request.payload
20
+ artifact_prefix = payload.get("artifact_prefix")
21
+ if not artifact_prefix:
22
+ message = "Request payload must include 'artifact_prefix'."
23
+ if request.is_http:
24
+ return request.bad_request(message, status_code=400)
47
25
  raise KeyError(message)
48
- next_token = event.get("next_token")
49
- start_time = event.get("start_time")
50
- end_time = event.get("end_time")
26
+ next_token = payload.get("next_token")
27
+ start_time = payload.get("start_time")
28
+ end_time = payload.get("end_time")
51
29
 
52
30
  log_group = os.environ[LOG_GROUP_ENV]
53
31
  lookback_ms = int(os.environ.get(DEFAULT_LOOKBACK_MS_ENV, "3600000"))
@@ -60,7 +38,7 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
60
38
 
61
39
  params: Dict[str, Any] = {
62
40
  "logGroupName": log_group,
63
- "filterPattern": f'"{job_id}"',
41
+ "filterPattern": f'"{artifact_prefix}"',
64
42
  "startTime": int(start_time),
65
43
  }
66
44
  if end_time is not None:
@@ -75,10 +53,8 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
75
53
  ]
76
54
 
77
55
  body = {
78
- "job_id": job_id,
56
+ "artifact_prefix": artifact_prefix,
79
57
  "events": events,
80
58
  "next_token": response.get("nextToken"),
81
59
  }
82
- if _is_http_event(raw_event):
83
- return _http_response(body)
84
- return body
60
+ return request.respond(body)
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
- import base64
5
- import json
6
4
  from datetime import datetime, timezone
7
5
  from typing import Any, Dict, List
8
6
 
9
7
  import boto3
10
8
 
9
+ from compose_runner.aws_lambda.common import LambdaRequest
10
+
11
11
  _S3 = boto3.client("s3", region_name=os.environ.get("AWS_REGION", "us-east-1"))
12
12
 
13
13
  RESULTS_BUCKET_ENV = "RESULTS_BUCKET"
@@ -21,44 +21,21 @@ def _serialize_dt(value: datetime) -> str:
21
21
  return value.astimezone(timezone.utc).isoformat()
22
22
 
23
23
 
24
- def _is_http_event(event: Any) -> bool:
25
- return isinstance(event, dict) and "requestContext" in event
26
-
27
-
28
- def _extract_payload(event: Dict[str, Any]) -> Dict[str, Any]:
29
- if not _is_http_event(event):
30
- return event
31
- body = event.get("body")
32
- if not body:
33
- return {}
34
- if event.get("isBase64Encoded"):
35
- body = base64.b64decode(body).decode("utf-8")
36
- return json.loads(body)
37
-
38
-
39
- def _http_response(body: Dict[str, Any], status_code: int = 200) -> Dict[str, Any]:
40
- return {
41
- "statusCode": status_code,
42
- "headers": {"Content-Type": "application/json"},
43
- "body": json.dumps(body),
44
- }
45
-
46
-
47
24
  def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
48
- raw_event = event
49
- event = _extract_payload(event)
25
+ request = LambdaRequest.parse(event)
26
+ payload = request.payload
50
27
  bucket = os.environ[RESULTS_BUCKET_ENV]
51
28
  prefix = os.environ.get(RESULTS_PREFIX_ENV)
52
29
 
53
- job_id = event.get("job_id")
54
- if not job_id:
55
- message = "Request payload must include 'job_id'."
56
- if _is_http_event(raw_event):
57
- return _http_response({"status": "FAILED", "error": message}, status_code=400)
30
+ artifact_prefix = payload.get("artifact_prefix")
31
+ if not artifact_prefix:
32
+ message = "Request payload must include 'artifact_prefix'."
33
+ if request.is_http:
34
+ return request.bad_request(message, status_code=400)
58
35
  raise KeyError(message)
59
- expires_in = int(event.get("expires_in", DEFAULT_EXPIRES_IN))
36
+ expires_in = int(payload.get("expires_in", DEFAULT_EXPIRES_IN))
60
37
 
61
- key_prefix = f"{prefix.rstrip('/')}/{job_id}" if prefix else job_id
38
+ key_prefix = f"{prefix.rstrip('/')}/{artifact_prefix}" if prefix else artifact_prefix
62
39
 
63
40
  response = _S3.list_objects_v2(Bucket=bucket, Prefix=key_prefix)
64
41
  contents = response.get("Contents", [])
@@ -84,11 +61,9 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
84
61
  )
85
62
 
86
63
  body = {
87
- "job_id": job_id,
64
+ "artifact_prefix": artifact_prefix,
88
65
  "artifacts": artifacts,
89
66
  "bucket": bucket,
90
67
  "prefix": key_prefix,
91
68
  }
92
- if _is_http_event(raw_event):
93
- return _http_response(body)
94
- return body
69
+ return request.respond(body)
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import uuid
7
+ from typing import Any, Dict, Optional
8
+
9
+ import boto3
10
+ from botocore.exceptions import ClientError
11
+
12
+ from compose_runner.aws_lambda.common import LambdaRequest
13
+
14
+ logger = logging.getLogger(__name__)
15
+ logger.setLevel(logging.INFO)
16
+
17
+ _SFN_CLIENT = boto3.client("stepfunctions", region_name=os.environ.get("AWS_REGION", "us-east-1"))
18
+
19
+ STATE_MACHINE_ARN_ENV = "STATE_MACHINE_ARN"
20
+ RESULTS_BUCKET_ENV = "RESULTS_BUCKET"
21
+ RESULTS_PREFIX_ENV = "RESULTS_PREFIX"
22
+ NSC_KEY_ENV = "NSC_KEY"
23
+ NV_KEY_ENV = "NV_KEY"
24
+
25
+
26
+ def _log(job_id: str, message: str, **details: Any) -> None:
27
+ payload = {"job_id": job_id, "message": message, **details}
28
+ # Ensure consistent JSON logging for ingestion/filtering.
29
+ logger.info(json.dumps(payload))
30
+
31
+
32
+ def _job_input(
33
+ payload: Dict[str, Any],
34
+ artifact_prefix: str,
35
+ bucket: Optional[str],
36
+ prefix: Optional[str],
37
+ nsc_key: Optional[str],
38
+ nv_key: Optional[str],
39
+ ) -> Dict[str, Any]:
40
+ no_upload_flag = bool(payload.get("no_upload", False))
41
+ doc: Dict[str, Any] = {
42
+ "artifact_prefix": artifact_prefix,
43
+ "meta_analysis_id": payload["meta_analysis_id"],
44
+ "environment": payload.get("environment", "production"),
45
+ "no_upload": "true" if no_upload_flag else "false",
46
+ "results": {"bucket": bucket or "", "prefix": prefix or ""},
47
+ }
48
+ n_cores = payload.get("n_cores")
49
+ doc["n_cores"] = str(n_cores) if n_cores is not None else ""
50
+ if nsc_key is not None:
51
+ doc["nsc_key"] = nsc_key
52
+ else:
53
+ doc["nsc_key"] = ""
54
+ if nv_key is not None:
55
+ doc["nv_key"] = nv_key
56
+ else:
57
+ doc["nv_key"] = ""
58
+ return doc
59
+
60
+
61
+ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
62
+ request = LambdaRequest.parse(event)
63
+ payload = request.payload
64
+ if STATE_MACHINE_ARN_ENV not in os.environ:
65
+ raise RuntimeError(f"{STATE_MACHINE_ARN_ENV} environment variable must be set.")
66
+
67
+ if "meta_analysis_id" not in payload:
68
+ message = "Request payload must include 'meta_analysis_id'."
69
+ if request.is_http:
70
+ return request.bad_request(message, status_code=400)
71
+ raise KeyError(message)
72
+
73
+ artifact_prefix = payload.get("artifact_prefix") or str(uuid.uuid4())
74
+ bucket = os.environ.get(RESULTS_BUCKET_ENV)
75
+ prefix = os.environ.get(RESULTS_PREFIX_ENV)
76
+ nsc_key = payload.get("nsc_key") or os.environ.get(NSC_KEY_ENV)
77
+ nv_key = payload.get("nv_key") or os.environ.get(NV_KEY_ENV)
78
+
79
+ job_input = _job_input(payload, artifact_prefix, bucket, prefix, nsc_key, nv_key)
80
+ params = {
81
+ "stateMachineArn": os.environ[STATE_MACHINE_ARN_ENV],
82
+ "name": artifact_prefix,
83
+ "input": json.dumps(job_input),
84
+ }
85
+
86
+ try:
87
+ response = _SFN_CLIENT.start_execution(**params)
88
+ except _SFN_CLIENT.exceptions.ExecutionAlreadyExists as exc:
89
+ _log(artifact_prefix, "workflow.duplicate", error=str(exc))
90
+ body = {
91
+ "status": "FAILED",
92
+ "error": "A job with the provided artifact_prefix already exists.",
93
+ "artifact_prefix": artifact_prefix,
94
+ }
95
+ if request.is_http:
96
+ return request.respond(body, status_code=409)
97
+ raise ValueError(body["error"]) from exc
98
+ except ClientError as exc:
99
+ _log(artifact_prefix, "workflow.failed_to_queue", error=str(exc))
100
+ message = "Failed to start compose-runner job."
101
+ body = {"status": "FAILED", "error": message}
102
+ if request.is_http:
103
+ return request.respond(body, status_code=500)
104
+ raise RuntimeError(message) from exc
105
+
106
+ execution_arn = response["executionArn"]
107
+ _log(artifact_prefix, "workflow.queued", execution_arn=execution_arn)
108
+
109
+ body = {
110
+ "job_id": execution_arn,
111
+ "artifact_prefix": artifact_prefix,
112
+ "status": "SUBMITTED",
113
+ "status_url": f"/jobs/{execution_arn}",
114
+ }
115
+ return request.respond(body, status_code=202)
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from datetime import datetime
6
+ from typing import Any, Dict, Optional
7
+
8
+ import boto3
9
+ from botocore.exceptions import ClientError
10
+
11
+ from compose_runner.aws_lambda.common import LambdaRequest
12
+
13
+ _SFN = boto3.client("stepfunctions", region_name=os.environ.get("AWS_REGION", "us-east-1"))
14
+ _S3 = boto3.client("s3", region_name=os.environ.get("AWS_REGION", "us-east-1"))
15
+
16
+ RESULTS_BUCKET_ENV = "RESULTS_BUCKET"
17
+ RESULTS_PREFIX_ENV = "RESULTS_PREFIX"
18
+ METADATA_FILENAME = "metadata.json"
19
+
20
+
21
+ def _serialize_dt(value: datetime) -> str:
22
+ return value.astimezone().isoformat()
23
+
24
+
25
+ def _metadata_key(prefix: Optional[str], artifact_prefix: str) -> str:
26
+ if prefix:
27
+ return f"{prefix.rstrip('/')}/{artifact_prefix}/{METADATA_FILENAME}"
28
+ return f"{artifact_prefix}/{METADATA_FILENAME}"
29
+
30
+
31
+ def _load_metadata(bucket: str, prefix: Optional[str], artifact_prefix: str) -> Optional[Dict[str, Any]]:
32
+ key = _metadata_key(prefix, artifact_prefix)
33
+ try:
34
+ response = _S3.get_object(Bucket=bucket, Key=key)
35
+ except ClientError as error:
36
+ if error.response["Error"]["Code"] in {"NoSuchKey", "404"}:
37
+ return None
38
+ raise
39
+ data = response["Body"].read()
40
+ return json.loads(data.decode("utf-8"))
41
+
42
+
43
+ def _parse_output(output: Optional[str]) -> Dict[str, Any]:
44
+ if not output:
45
+ return {}
46
+ try:
47
+ return json.loads(output)
48
+ except json.JSONDecodeError:
49
+ return {"raw_output": output}
50
+
51
+
52
+ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
53
+ request = LambdaRequest.parse(event)
54
+ payload = request.payload
55
+
56
+ job_id = payload.get("job_id")
57
+ if not job_id:
58
+ message = "Request payload must include 'job_id'."
59
+ if request.is_http:
60
+ return request.bad_request(message, status_code=400)
61
+ raise KeyError(message)
62
+
63
+ try:
64
+ description = _SFN.describe_execution(executionArn=job_id)
65
+ except ClientError as error:
66
+ body = {"status": "FAILED", "error": error.response["Error"]["Message"]}
67
+ if request.is_http:
68
+ status_code = 404 if error.response["Error"]["Code"] == "ExecutionDoesNotExist" else 500
69
+ return request.respond(body, status_code=status_code)
70
+ raise
71
+
72
+ status = description["status"]
73
+ body: Dict[str, Any] = {
74
+ "job_id": job_id,
75
+ "status": status,
76
+ "start_time": _serialize_dt(description["startDate"]),
77
+ }
78
+ if "stopDate" in description:
79
+ body["stop_time"] = _serialize_dt(description["stopDate"])
80
+
81
+ output_doc = _parse_output(description.get("output"))
82
+ body["output"] = output_doc
83
+
84
+ artifact_prefix = description.get("name")
85
+ if not artifact_prefix:
86
+ raise ValueError("Execution does not expose a name; cannot determine artifact prefix.")
87
+ body["artifact_prefix"] = artifact_prefix
88
+
89
+ if status in {"SUCCEEDED", "FAILED"}:
90
+ results_info = output_doc.get("results") or {}
91
+ bucket = results_info.get("bucket") or os.environ.get(RESULTS_BUCKET_ENV)
92
+ prefix = results_info.get("prefix") or os.environ.get(RESULTS_PREFIX_ENV)
93
+
94
+ if bucket and artifact_prefix:
95
+ metadata = _load_metadata(bucket, prefix, artifact_prefix)
96
+ if metadata:
97
+ body["result"] = metadata
98
+
99
+ if status == "FAILED":
100
+ body["error"] = output_doc.get("error")
101
+
102
+ return request.respond(body)