nebelung 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nebelung-1.0.0/PKG-INFO +220 -0
- nebelung-1.0.0/README.md +199 -0
- nebelung-1.0.0/nebelung/__init__.py +11 -0
- nebelung-1.0.0/nebelung/terra_workflow.py +147 -0
- nebelung-1.0.0/nebelung/terra_workspace.py +387 -0
- nebelung-1.0.0/nebelung/types.py +60 -0
- nebelung-1.0.0/nebelung/utils.py +295 -0
- nebelung-1.0.0/nebelung/wdl.py +130 -0
- nebelung-1.0.0/pyproject.toml +33 -0
nebelung-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: nebelung
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Firecloud API Wrapper
|
|
5
|
+
Home-page: https://github.com/broadinstitute/nebelung
|
|
6
|
+
Keywords: terra,firecloud
|
|
7
|
+
Author: Devin McCabe
|
|
8
|
+
Author-email: dmccabe@broadinstitute.org
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: firecloud (>=0.16)
|
|
14
|
+
Requires-Dist: pandas (>=1.5)
|
|
15
|
+
Requires-Dist: pandera (>=0.20)
|
|
16
|
+
Requires-Dist: pydantic (>=2.8)
|
|
17
|
+
Requires-Dist: pygithub (>=2.3)
|
|
18
|
+
Project-URL: Repository, https://github.com/broadinstitute/nebelung
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
Nebelung: Python wrapper for the Firecloud API
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+

|
|
25
|
+
|
|
26
|
+
This package provides a wrapper around the [Firecloud](https://pypi.org/project/firecloud/) package and performs a similar, though cat-themed, function as [dalmation](https://github.com/getzlab/dalmatian).
|
|
27
|
+
|
|
28
|
+
# Installation
|
|
29
|
+
|
|
30
|
+
Nebelung requires Python 3.11 or later.
|
|
31
|
+
|
|
32
|
+
```shell
|
|
33
|
+
poetry add nebelung
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
# Usage
|
|
37
|
+
|
|
38
|
+
The package has two classes, `TerraWorkspace` and `TerraWorkflow`, and a variety of utility functions that wrap a subset of Firecloud API functionality.
|
|
39
|
+
|
|
40
|
+
## Workspaces
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from nebelung.terra_workspace import TerraWorkspace
|
|
44
|
+
|
|
45
|
+
terra_workspace = TerraWorkspace(
|
|
46
|
+
workspace_namespace="terra_workspace_namespace",
|
|
47
|
+
workspace_name="terra_workspace_name",
|
|
48
|
+
owners=["user1@example.com", "group@firecloud.org"],
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Entities
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
# get a workspace data table as a Pandas data frame
|
|
56
|
+
df = terra_workspace.get_entities("sample")
|
|
57
|
+
|
|
58
|
+
# get a workspace data table as a Pandas data frame typed with Pandera
|
|
59
|
+
# (`YourPanderaSchema` should subclass `nebelung.types.CoercedDataFrame`)
|
|
60
|
+
df = terra_workspace.get_entities("sample", YourPanderaSchema)
|
|
61
|
+
|
|
62
|
+
# upsert a data frame to a workspace data table
|
|
63
|
+
terra_workspace.upload_entities(df) # first column of `df` should be, e.g., `sample_id`
|
|
64
|
+
|
|
65
|
+
# create a sample set named, e.g., `samples_2024-08-21T17-24-19_call_cnvs"
|
|
66
|
+
sample_set_id = terra_workspace.created_sample_set(
|
|
67
|
+
["sample_id1", "sample_id2"],
|
|
68
|
+
suffix="call_cnvs",
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Workflow outputs
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
# collect workflow outputs from successful jobs as a list of `nebelung.types.TaskResult` objects
|
|
76
|
+
outputs = terra_workspace.collect_workflow_outputs()
|
|
77
|
+
|
|
78
|
+
# collect workflow outputs from successful jobs submitted in the last week
|
|
79
|
+
import datetime
|
|
80
|
+
a_week_ago = datetime.datetime.now() - datetime.timedelta(days=7)
|
|
81
|
+
outputs = terra_workspace.collect_workflow_outputs(since=a_week_ago)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Workflow
|
|
85
|
+
|
|
86
|
+
Here, a "workflow" (standard data pipeline terminology) comprises a "method" and "method config" (Terra terminology).
|
|
87
|
+
|
|
88
|
+
The standard method for making a WDL-based workflow available in a Terra workspace is to configure the git repo to push to [Dockstore](https://dockstore.org/). Although this would be the recommended technique to make a workflow available publicly, there are several drawbacks:
|
|
89
|
+
|
|
90
|
+
- The git repo must be public (for GCP-backed Terra workspaces at least).
|
|
91
|
+
- Every change to the method (WDL) or method config (JSON) requires creating and pushing a git commit.
|
|
92
|
+
- The workflow isn't updated on Dockstore immediately, since it depends on continuous deployment (CD).
|
|
93
|
+
- The Dockstore UI doesn't provide great visibility into CD build failures and their causes.
|
|
94
|
+
|
|
95
|
+
An alternative to Dockstore is to push the WDL directly to Firecloud. However, [that API endpoint](https://api.firecloud.org/#/Method%20Repository/post_api_methods) doesn't support uploading a WDL script that imports other local WDL scripts, nor a zip file of cross-referenced WDL scripts (like Cromwell does). The endpoint will accept WDL that imports other scripts via URLs, but currently only from the `githubusercontent.com` domain.
|
|
96
|
+
|
|
97
|
+
### Method persistence with GitHub gists
|
|
98
|
+
|
|
99
|
+
Thus, Nebelung (ab)uses [GitHub gists](https://gist.github.com/) to persist all the WDL scripts for a workflow as multiple files belonging to a single gist, then uploads the top-level WDL script's code to Firecloud. Any `import "./path/to/included/script.wdl" as other_script` statement is rewritten so that the imported script is persisted in the gist and thus imported from a `https://gist.githubusercontent.com` URL. This happens recursively, so local imports can have their own local imports.
|
|
100
|
+
|
|
101
|
+
### Method config
|
|
102
|
+
|
|
103
|
+
To aid in automation and make it easier to submit jobs manually without filling out many fields in the job submission UI, a JSON-formatted method config is also required, e.g.:
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
{
|
|
107
|
+
"deleted": false,
|
|
108
|
+
"inputs": {
|
|
109
|
+
"call_cnvs.sample_id": "this.sample_id"
|
|
110
|
+
},
|
|
111
|
+
"methodConfigVersion": 1,
|
|
112
|
+
"methodRepoMethod": {
|
|
113
|
+
"methodNamespace": "omics_pipelines",
|
|
114
|
+
"methodName": "call_cnvs",
|
|
115
|
+
"methodVersion": 1
|
|
116
|
+
},
|
|
117
|
+
"namespace": "omics_pipelines",
|
|
118
|
+
"name": "call_cnvs",
|
|
119
|
+
"outputs": {
|
|
120
|
+
"call_cnvs.segs": "this.segments"
|
|
121
|
+
},
|
|
122
|
+
"rootEntityType": "sample"
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
- Both methods and method configs have their own namespaces. To simplify things, the above example uses the same sets of values for both. This approach might not be ideal if your methods and their configs are not one-to-one.
|
|
127
|
+
- The `TerraWorkspace.update_workflow` method will replace the `methodVersion` with an auto-incrementing version number based on the latest method's "snapshot ID" each time the method is updated. The `methodConfigVersion` should be incremented manually if desired.
|
|
128
|
+
|
|
129
|
+
### Versioning
|
|
130
|
+
|
|
131
|
+
Some information about a submitted job's method isn't easily recovered via the Firecloud API later on. Both `update_workflow` and `collect_workflow_outputs` are written to make it easier to connect workflow outputs to method versions for use in object (workflow output files and values) versioning. Include these workflow inputs in the WDL to enable this feature:
|
|
132
|
+
|
|
133
|
+
```wdl
|
|
134
|
+
version 1.0
|
|
135
|
+
|
|
136
|
+
workflow call_cnvs {
|
|
137
|
+
input {
|
|
138
|
+
String workflow_version = "1.0" # internal semver not tied to WARP releases
|
|
139
|
+
String workflow_source_url # populated automatically with URL of this script
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
The `update_workflow` method will automatically include these workflow inputs in the new method config's inputs, with `workflow_source_url` being set dynamically to the URL of the GitHub gist of that WDL script and `workflow_version` available for explicitly versioning the WDL.
|
|
145
|
+
|
|
146
|
+
Because GitHub gist has its own built-in versioning, a `workflow_source_url` stored in a job submission's inputs will always resolve to the exact WDL script that was used in the job, even if that method is updated later.
|
|
147
|
+
|
|
148
|
+
### Validation
|
|
149
|
+
|
|
150
|
+
To avoid persisting potentially invalid WDL, `update_workflow` also validates all the WDL scripts with [WOMtool](https://cromwell.readthedocs.io/en/stable/WOMtool) first.
|
|
151
|
+
|
|
152
|
+
### Example
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
import os
|
|
156
|
+
from pathlib import Path
|
|
157
|
+
from nebelung.terra_workflow import TerraWorkflow
|
|
158
|
+
|
|
159
|
+
# download the latest WOMtool from https://github.com/broadinstitute/cromwell/releases
|
|
160
|
+
os.environ["WOMTOOL_JAR"] = "/path/to/womtool.jar"
|
|
161
|
+
|
|
162
|
+
# generate a Github personal access token (fine-grained) at
|
|
163
|
+
# https://github.com/settings/tokens?type=beta
|
|
164
|
+
# with the "Read and Write access to gists" permission
|
|
165
|
+
os.environ["GITHUB_PAT"] = "github_pat_..."
|
|
166
|
+
|
|
167
|
+
terra_workflow = TerraWorkflow(
|
|
168
|
+
repo_namespace="omics_pipelines", # should match `methodRepoMethod` from method config
|
|
169
|
+
repo_method_name="call_cnvs", # should match `methodRepoMethod` from method config
|
|
170
|
+
method_config_name="call_cnvs", # should match `name` from method config
|
|
171
|
+
method_synopsis="This method calls CNVs.",
|
|
172
|
+
workflow_wdl_path=Path("/path/to/call_cnvs.wdl").resolve(),
|
|
173
|
+
method_config_json_path=Path("/path/to/call_cnvs.json").resolve(),
|
|
174
|
+
github_pat="github_pat_...", # (if not using the GITHUB_PAT ENV variable)
|
|
175
|
+
womtool_jar="/path/to/womtool.jar", # (if not using the WOMTOOL_JAR ENV variable)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# create or update a workflow (i.e. method and method config) directly in Firecloud
|
|
179
|
+
terra_workspace.update_workflow(terra_workflow, n_snapshots_to_keep=20)
|
|
180
|
+
|
|
181
|
+
# submit a job
|
|
182
|
+
terra_workspace.submit_workflow_run(
|
|
183
|
+
terra_workflow,
|
|
184
|
+
# any arguments below are passed to `firecloud_api.create_submission`
|
|
185
|
+
entity="samples_2024-08-21T17-24-19_call_cnvs", # from `create_sample_set`
|
|
186
|
+
etype="sample_set", # data type of the `entity` arg
|
|
187
|
+
expression="this.samples", # the root entity (i.e. the WDL expects a single sample)
|
|
188
|
+
use_callcache=True,
|
|
189
|
+
use_reference_disks=False,
|
|
190
|
+
memory_retry_multiplier=1.2,
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Call Firecloud API directly
|
|
195
|
+
|
|
196
|
+
All calls to the Firecloud API made internally by Nebelung are retried automatically (with a backoff function) in the case of a networking-related error. This function also detects other errors returned by the API and parses the JSON response if the call was successful.
|
|
197
|
+
|
|
198
|
+
To use this functionality in the cases where Nebelung doesn't provide an endpoint wrapper, import the Firecloud API and the `call_firecloud_api` function:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from firecloud import api as firecloud_api
|
|
202
|
+
from nebelung.utils import call_firecloud_api
|
|
203
|
+
|
|
204
|
+
# get a job submission
|
|
205
|
+
result = call_firecloud_api(
|
|
206
|
+
firecloud_api.get_submission,
|
|
207
|
+
namespace="terra_workspace_namespace",
|
|
208
|
+
workspace="terra_workspace_name",
|
|
209
|
+
max_retries=1,
|
|
210
|
+
# kwargs for `get_submission`
|
|
211
|
+
submission_id="<uuid>",
|
|
212
|
+
)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
# Development
|
|
216
|
+
|
|
217
|
+
Run `pre-commit run --all-files` to automatically format your code with [Ruff](https://docs.astral.sh/ruff/) and check static types with [Pyright](https://microsoft.github.io/pyright).
|
|
218
|
+
|
|
219
|
+
To update the [package on pipy.org](https://pypi.org/project/nebelung), update the `version` in `pyproject.toml` and run `poetry publish --build`.
|
|
220
|
+
|
nebelung-1.0.0/README.md
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
Nebelung: Python wrapper for the Firecloud API
|
|
2
|
+
---
|
|
3
|
+
|
|
4
|
+

|
|
5
|
+
|
|
6
|
+
This package provides a wrapper around the [Firecloud](https://pypi.org/project/firecloud/) package and performs a similar, though cat-themed, function as [dalmation](https://github.com/getzlab/dalmatian).
|
|
7
|
+
|
|
8
|
+
# Installation
|
|
9
|
+
|
|
10
|
+
Nebelung requires Python 3.11 or later.
|
|
11
|
+
|
|
12
|
+
```shell
|
|
13
|
+
poetry add nebelung
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
# Usage
|
|
17
|
+
|
|
18
|
+
The package has two classes, `TerraWorkspace` and `TerraWorkflow`, and a variety of utility functions that wrap a subset of Firecloud API functionality.
|
|
19
|
+
|
|
20
|
+
## Workspaces
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from nebelung.terra_workspace import TerraWorkspace
|
|
24
|
+
|
|
25
|
+
terra_workspace = TerraWorkspace(
|
|
26
|
+
workspace_namespace="terra_workspace_namespace",
|
|
27
|
+
workspace_name="terra_workspace_name",
|
|
28
|
+
owners=["user1@example.com", "group@firecloud.org"],
|
|
29
|
+
)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Entities
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
# get a workspace data table as a Pandas data frame
|
|
36
|
+
df = terra_workspace.get_entities("sample")
|
|
37
|
+
|
|
38
|
+
# get a workspace data table as a Pandas data frame typed with Pandera
|
|
39
|
+
# (`YourPanderaSchema` should subclass `nebelung.types.CoercedDataFrame`)
|
|
40
|
+
df = terra_workspace.get_entities("sample", YourPanderaSchema)
|
|
41
|
+
|
|
42
|
+
# upsert a data frame to a workspace data table
|
|
43
|
+
terra_workspace.upload_entities(df) # first column of `df` should be, e.g., `sample_id`
|
|
44
|
+
|
|
45
|
+
# create a sample set named, e.g., `samples_2024-08-21T17-24-19_call_cnvs"
|
|
46
|
+
sample_set_id = terra_workspace.created_sample_set(
|
|
47
|
+
["sample_id1", "sample_id2"],
|
|
48
|
+
suffix="call_cnvs",
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Workflow outputs
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
# collect workflow outputs from successful jobs as a list of `nebelung.types.TaskResult` objects
|
|
56
|
+
outputs = terra_workspace.collect_workflow_outputs()
|
|
57
|
+
|
|
58
|
+
# collect workflow outputs from successful jobs submitted in the last week
|
|
59
|
+
import datetime
|
|
60
|
+
a_week_ago = datetime.datetime.now() - datetime.timedelta(days=7)
|
|
61
|
+
outputs = terra_workspace.collect_workflow_outputs(since=a_week_ago)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Workflow
|
|
65
|
+
|
|
66
|
+
Here, a "workflow" (standard data pipeline terminology) comprises a "method" and "method config" (Terra terminology).
|
|
67
|
+
|
|
68
|
+
The standard method for making a WDL-based workflow available in a Terra workspace is to configure the git repo to push to [Dockstore](https://dockstore.org/). Although this would be the recommended technique to make a workflow available publicly, there are several drawbacks:
|
|
69
|
+
|
|
70
|
+
- The git repo must be public (for GCP-backed Terra workspaces at least).
|
|
71
|
+
- Every change to the method (WDL) or method config (JSON) requires creating and pushing a git commit.
|
|
72
|
+
- The workflow isn't updated on Dockstore immediately, since it depends on continuous deployment (CD).
|
|
73
|
+
- The Dockstore UI doesn't provide great visibility into CD build failures and their causes.
|
|
74
|
+
|
|
75
|
+
An alternative to Dockstore is to push the WDL directly to Firecloud. However, [that API endpoint](https://api.firecloud.org/#/Method%20Repository/post_api_methods) doesn't support uploading a WDL script that imports other local WDL scripts, nor a zip file of cross-referenced WDL scripts (like Cromwell does). The endpoint will accept WDL that imports other scripts via URLs, but currently only from the `githubusercontent.com` domain.
|
|
76
|
+
|
|
77
|
+
### Method persistence with GitHub gists
|
|
78
|
+
|
|
79
|
+
Thus, Nebelung (ab)uses [GitHub gists](https://gist.github.com/) to persist all the WDL scripts for a workflow as multiple files belonging to a single gist, then uploads the top-level WDL script's code to Firecloud. Any `import "./path/to/included/script.wdl" as other_script` statement is rewritten so that the imported script is persisted in the gist and thus imported from a `https://gist.githubusercontent.com` URL. This happens recursively, so local imports can have their own local imports.
|
|
80
|
+
|
|
81
|
+
### Method config
|
|
82
|
+
|
|
83
|
+
To aid in automation and make it easier to submit jobs manually without filling out many fields in the job submission UI, a JSON-formatted method config is also required, e.g.:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"deleted": false,
|
|
88
|
+
"inputs": {
|
|
89
|
+
"call_cnvs.sample_id": "this.sample_id"
|
|
90
|
+
},
|
|
91
|
+
"methodConfigVersion": 1,
|
|
92
|
+
"methodRepoMethod": {
|
|
93
|
+
"methodNamespace": "omics_pipelines",
|
|
94
|
+
"methodName": "call_cnvs",
|
|
95
|
+
"methodVersion": 1
|
|
96
|
+
},
|
|
97
|
+
"namespace": "omics_pipelines",
|
|
98
|
+
"name": "call_cnvs",
|
|
99
|
+
"outputs": {
|
|
100
|
+
"call_cnvs.segs": "this.segments"
|
|
101
|
+
},
|
|
102
|
+
"rootEntityType": "sample"
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
- Both methods and method configs have their own namespaces. To simplify things, the above example uses the same sets of values for both. This approach might not be ideal if your methods and their configs are not one-to-one.
|
|
107
|
+
- The `TerraWorkspace.update_workflow` method will replace the `methodVersion` with an auto-incrementing version number based on the latest method's "snapshot ID" each time the method is updated. The `methodConfigVersion` should be incremented manually if desired.
|
|
108
|
+
|
|
109
|
+
### Versioning
|
|
110
|
+
|
|
111
|
+
Some information about a submitted job's method isn't easily recovered via the Firecloud API later on. Both `update_workflow` and `collect_workflow_outputs` are written to make it easier to connect workflow outputs to method versions for use in object (workflow output files and values) versioning. Include these workflow inputs in the WDL to enable this feature:
|
|
112
|
+
|
|
113
|
+
```wdl
|
|
114
|
+
version 1.0
|
|
115
|
+
|
|
116
|
+
workflow call_cnvs {
|
|
117
|
+
input {
|
|
118
|
+
String workflow_version = "1.0" # internal semver not tied to WARP releases
|
|
119
|
+
String workflow_source_url # populated automatically with URL of this script
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
The `update_workflow` method will automatically include these workflow inputs in the new method config's inputs, with `workflow_source_url` being set dynamically to the URL of the GitHub gist of that WDL script and `workflow_version` available for explicitly versioning the WDL.
|
|
125
|
+
|
|
126
|
+
Because GitHub gist has its own built-in versioning, a `workflow_source_url` stored in a job submission's inputs will always resolve to the exact WDL script that was used in the job, even if that method is updated later.
|
|
127
|
+
|
|
128
|
+
### Validation
|
|
129
|
+
|
|
130
|
+
To avoid persisting potentially invalid WDL, `update_workflow` also validates all the WDL scripts with [WOMtool](https://cromwell.readthedocs.io/en/stable/WOMtool) first.
|
|
131
|
+
|
|
132
|
+
### Example
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
import os
|
|
136
|
+
from pathlib import Path
|
|
137
|
+
from nebelung.terra_workflow import TerraWorkflow
|
|
138
|
+
|
|
139
|
+
# download the latest WOMtool from https://github.com/broadinstitute/cromwell/releases
|
|
140
|
+
os.environ["WOMTOOL_JAR"] = "/path/to/womtool.jar"
|
|
141
|
+
|
|
142
|
+
# generate a Github personal access token (fine-grained) at
|
|
143
|
+
# https://github.com/settings/tokens?type=beta
|
|
144
|
+
# with the "Read and Write access to gists" permission
|
|
145
|
+
os.environ["GITHUB_PAT"] = "github_pat_..."
|
|
146
|
+
|
|
147
|
+
terra_workflow = TerraWorkflow(
|
|
148
|
+
repo_namespace="omics_pipelines", # should match `methodRepoMethod` from method config
|
|
149
|
+
repo_method_name="call_cnvs", # should match `methodRepoMethod` from method config
|
|
150
|
+
method_config_name="call_cnvs", # should match `name` from method config
|
|
151
|
+
method_synopsis="This method calls CNVs.",
|
|
152
|
+
workflow_wdl_path=Path("/path/to/call_cnvs.wdl").resolve(),
|
|
153
|
+
method_config_json_path=Path("/path/to/call_cnvs.json").resolve(),
|
|
154
|
+
github_pat="github_pat_...", # (if not using the GITHUB_PAT ENV variable)
|
|
155
|
+
womtool_jar="/path/to/womtool.jar", # (if not using the WOMTOOL_JAR ENV variable)
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# create or update a workflow (i.e. method and method config) directly in Firecloud
|
|
159
|
+
terra_workspace.update_workflow(terra_workflow, n_snapshots_to_keep=20)
|
|
160
|
+
|
|
161
|
+
# submit a job
|
|
162
|
+
terra_workspace.submit_workflow_run(
|
|
163
|
+
terra_workflow,
|
|
164
|
+
# any arguments below are passed to `firecloud_api.create_submission`
|
|
165
|
+
entity="samples_2024-08-21T17-24-19_call_cnvs", # from `create_sample_set`
|
|
166
|
+
etype="sample_set", # data type of the `entity` arg
|
|
167
|
+
expression="this.samples", # the root entity (i.e. the WDL expects a single sample)
|
|
168
|
+
use_callcache=True,
|
|
169
|
+
use_reference_disks=False,
|
|
170
|
+
memory_retry_multiplier=1.2,
|
|
171
|
+
)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Call Firecloud API directly
|
|
175
|
+
|
|
176
|
+
All calls to the Firecloud API made internally by Nebelung are retried automatically (with a backoff function) in the case of a networking-related error. This function also detects other errors returned by the API and parses the JSON response if the call was successful.
|
|
177
|
+
|
|
178
|
+
To use this functionality in the cases where Nebelung doesn't provide an endpoint wrapper, import the Firecloud API and the `call_firecloud_api` function:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from firecloud import api as firecloud_api
|
|
182
|
+
from nebelung.utils import call_firecloud_api
|
|
183
|
+
|
|
184
|
+
# get a job submission
|
|
185
|
+
result = call_firecloud_api(
|
|
186
|
+
firecloud_api.get_submission,
|
|
187
|
+
namespace="terra_workspace_namespace",
|
|
188
|
+
workspace="terra_workspace_name",
|
|
189
|
+
max_retries=1,
|
|
190
|
+
# kwargs for `get_submission`
|
|
191
|
+
submission_id="<uuid>",
|
|
192
|
+
)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
# Development
|
|
196
|
+
|
|
197
|
+
Run `pre-commit run --all-files` to automatically format your code with [Ruff](https://docs.astral.sh/ruff/) and check static types with [Pyright](https://microsoft.github.io/pyright).
|
|
198
|
+
|
|
199
|
+
To update the [package on pipy.org](https://pypi.org/project/nebelung), update the `version` in `pyproject.toml` and run `poetry publish --build`.
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from firecloud import api as firecloud_api
|
|
8
|
+
from firecloud.api import __post as firecloud_post
|
|
9
|
+
|
|
10
|
+
from nebelung.types import PersistedWdl
|
|
11
|
+
from nebelung.utils import call_firecloud_api
|
|
12
|
+
from nebelung.wdl import GistedWdl
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TerraWorkflow:
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
repo_namespace: str,
|
|
19
|
+
repo_method_name: str,
|
|
20
|
+
method_config_name: str,
|
|
21
|
+
method_synopsis: str,
|
|
22
|
+
workflow_wdl_path: Path,
|
|
23
|
+
method_config_json_path: Path,
|
|
24
|
+
github_pat: str | None = None,
|
|
25
|
+
womtool_jar: str | None = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
self.repo_namespace = repo_namespace
|
|
28
|
+
self.repo_method_name = repo_method_name
|
|
29
|
+
self.method_config_name = method_config_name
|
|
30
|
+
self.method_synopsis = method_synopsis
|
|
31
|
+
self.workflow_wdl_path = workflow_wdl_path
|
|
32
|
+
self.method_config_json_path = method_config_json_path
|
|
33
|
+
self.github_pat = os.getenv("GITHUB_PAT", github_pat)
|
|
34
|
+
self.womtool_jar = os.getenv("WOMTOOL_JAR", womtool_jar)
|
|
35
|
+
|
|
36
|
+
self.method_config = json.load(open(self.method_config_json_path, "r"))
|
|
37
|
+
self.persisted_wdl_script: PersistedWdl | None = None
|
|
38
|
+
|
|
39
|
+
def persist_method_on_github(self) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Upload the method's WDL script to GitHub, rewriting import statements for
|
|
42
|
+
dependent WDL scripts as needed.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
assert self.github_pat is not None, (
|
|
46
|
+
"A GitHub personal access token must be defined to persist this method on "
|
|
47
|
+
"GitHub. Set the GITHUB_PAT environment variable or the `github_pat` "
|
|
48
|
+
"argument when instantiating this `TerraWorkflow` instance."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
assert self.womtool_jar is not None, (
|
|
52
|
+
"A path to a WOMTool .jar file is required to validate the WDL script. Set "
|
|
53
|
+
"the WOMTOOL_JAR environment variable or the `womtool_jar` argument when "
|
|
54
|
+
"instantiating this `TerraWorkflow` instance."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if self.persisted_wdl_script is None:
|
|
58
|
+
logging.info(f"Persisting {self.workflow_wdl_path} on GitHub")
|
|
59
|
+
gisted_wdl = GistedWdl(
|
|
60
|
+
method_name=self.repo_method_name,
|
|
61
|
+
github_pat=self.github_pat,
|
|
62
|
+
womtool_jar=self.womtool_jar,
|
|
63
|
+
)
|
|
64
|
+
self.persisted_wdl_script = gisted_wdl.persist_wdl_script(
|
|
65
|
+
wdl_path=self.workflow_wdl_path
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def update_method(self, owners: list[str]) -> dict:
|
|
69
|
+
"""
|
|
70
|
+
Update a Firecloud method.
|
|
71
|
+
|
|
72
|
+
:param owners: a list of Firecloud users/groups to set as owners
|
|
73
|
+
:return: the latest method's snapshot
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
# get contents of WDL uploaded to GCS
|
|
77
|
+
self.persist_method_on_github()
|
|
78
|
+
assert self.persisted_wdl_script is not None
|
|
79
|
+
|
|
80
|
+
logging.info("Setting method repository config ACL")
|
|
81
|
+
# the firecloud package doesn't have a wrapper for this endpoint
|
|
82
|
+
call_firecloud_api(
|
|
83
|
+
firecloud_post,
|
|
84
|
+
methcall=f"configurations/{self.repo_namespace}/permissions",
|
|
85
|
+
json=[{"user": x, "role": "OWNER"} for x in owners],
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
with tempfile.NamedTemporaryFile("w") as f:
|
|
89
|
+
f.write(self.persisted_wdl_script["wdl"])
|
|
90
|
+
f.flush()
|
|
91
|
+
|
|
92
|
+
logging.info("Updating method")
|
|
93
|
+
snapshot = call_firecloud_api(
|
|
94
|
+
firecloud_api.update_repository_method,
|
|
95
|
+
namespace=self.repo_namespace,
|
|
96
|
+
method=self.repo_method_name,
|
|
97
|
+
synopsis=self.method_synopsis,
|
|
98
|
+
wdl=f.name,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
logging.info("Setting method ACL")
|
|
102
|
+
call_firecloud_api(
|
|
103
|
+
firecloud_api.update_repository_method_acl,
|
|
104
|
+
namespace=self.repo_namespace,
|
|
105
|
+
method=self.repo_method_name,
|
|
106
|
+
snapshot_id=snapshot["snapshotId"],
|
|
107
|
+
acl_updates=[{"user": x, "role": "OWNER"} for x in owners],
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return snapshot
|
|
111
|
+
|
|
112
|
+
def get_method_snapshots(self) -> list[dict]:
|
|
113
|
+
"""
|
|
114
|
+
Get all of the snapshots of the method.
|
|
115
|
+
|
|
116
|
+
:return: list of snapshot information, most recent first
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
logging.info(f"Getting {self.repo_method_name} method snapshots")
|
|
120
|
+
snapshots = call_firecloud_api(
|
|
121
|
+
firecloud_api.list_repository_methods,
|
|
122
|
+
namespace=self.repo_namespace,
|
|
123
|
+
name=self.repo_method_name,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
snapshots.sort(key=lambda x: x["snapshotId"], reverse=True)
|
|
127
|
+
return snapshots
|
|
128
|
+
|
|
129
|
+
def delete_old_method_snapshots(self, n_snapshots_to_keep: int) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Delete all but `n_snapshots_to_keep` of the most recent snapshots of the method.
|
|
132
|
+
|
|
133
|
+
:param n_snapshots_to_keep: the number of snapshots to keep
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
snapshots = self.get_method_snapshots()
|
|
137
|
+
|
|
138
|
+
to_delete = snapshots[n_snapshots_to_keep:]
|
|
139
|
+
logging.info(f"Deleting {len(to_delete)} old snapshot(s)")
|
|
140
|
+
|
|
141
|
+
for x in to_delete:
|
|
142
|
+
call_firecloud_api(
|
|
143
|
+
firecloud_api.delete_repository_method,
|
|
144
|
+
namespace=self.repo_namespace,
|
|
145
|
+
name=self.repo_method_name,
|
|
146
|
+
snapshot_id=x["snapshotId"],
|
|
147
|
+
)
|