cpg-utils 5.3.0__tar.gz → 5.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/PKG-INFO +1 -1
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/cromwell.py +5 -7
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/hail_batch.py +23 -2
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils.egg-info/PKG-INFO +1 -1
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/setup.py +1 -1
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/LICENSE +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/README.md +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/__init__.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/cloud.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/cloudpath_hail_az.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/config.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/constants.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/cromwell_model.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/dataproc.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/git.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/membership.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/py.typed +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils/slack.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils.egg-info/SOURCES.txt +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils.egg-info/dependency_links.txt +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils.egg-info/requires.txt +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/cpg_utils.egg-info/top_level.txt +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/pyproject.toml +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/setup.cfg +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/test/__init__.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/test/test_config.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/test/test_cromwell.py +0 -0
- {cpg_utils-5.3.0 → cpg_utils-5.4.0}/test/test_doctests.py +0 -0
|
@@ -42,7 +42,7 @@ class CromwellBackend(Enum):
|
|
|
42
42
|
pipelines_api = 'papi'
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
DEFAULT_BACKEND = CromwellBackend.
|
|
45
|
+
DEFAULT_BACKEND = CromwellBackend.batch
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
class CromwellOutputType:
|
|
@@ -643,7 +643,8 @@ def watch_workflow_and_get_output(
|
|
|
643
643
|
array_length = output.array_length
|
|
644
644
|
if array_length is None:
|
|
645
645
|
# is single
|
|
646
|
-
j = b.
|
|
646
|
+
j = b.new_bash_job(f'{job_prefix}_collect_{output_name}')
|
|
647
|
+
j.image(driver_image)
|
|
647
648
|
if output.resource_group:
|
|
648
649
|
# is single resource group
|
|
649
650
|
out_file_map[oname] = _copy_resource_group_into_batch(
|
|
@@ -660,13 +661,13 @@ def watch_workflow_and_get_output(
|
|
|
660
661
|
output_name=output_name,
|
|
661
662
|
idx=None,
|
|
662
663
|
copy_file_into_batch=output.copy_file_into_batch,
|
|
663
|
-
driver_image=driver_image,
|
|
664
664
|
)
|
|
665
665
|
else:
|
|
666
666
|
# is array
|
|
667
667
|
outs: list[Resource] = []
|
|
668
668
|
for idx in range(array_length):
|
|
669
|
-
j = b.
|
|
669
|
+
j = b.new_bash_job(f'{job_prefix}_collect_{output_name}[{idx}]')
|
|
670
|
+
j.image(driver_image)
|
|
670
671
|
if output.resource_group:
|
|
671
672
|
# is array output group
|
|
672
673
|
outs.append(
|
|
@@ -685,7 +686,6 @@ def watch_workflow_and_get_output(
|
|
|
685
686
|
output_name=output_name,
|
|
686
687
|
idx=idx,
|
|
687
688
|
copy_file_into_batch=output.copy_file_into_batch,
|
|
688
|
-
driver_image=driver_image,
|
|
689
689
|
),
|
|
690
690
|
)
|
|
691
691
|
|
|
@@ -701,7 +701,6 @@ def _copy_basic_file_into_batch(
|
|
|
701
701
|
output_name: str,
|
|
702
702
|
idx: int | None,
|
|
703
703
|
copy_file_into_batch: bool,
|
|
704
|
-
driver_image: str,
|
|
705
704
|
) -> Resource:
|
|
706
705
|
"""
|
|
707
706
|
1. Take the file-pointer to the dictionary `rdict`,
|
|
@@ -725,7 +724,6 @@ def _copy_basic_file_into_batch(
|
|
|
725
724
|
jq_el = f'"{output_name}"[{idx}]'
|
|
726
725
|
|
|
727
726
|
# activate to gcloud storage cp
|
|
728
|
-
j.image(driver_image)
|
|
729
727
|
j.env('GOOGLE_APPLICATION_CREDENTIALS', '/gsa-key/key.json')
|
|
730
728
|
j.command(GCLOUD_ACTIVATE_AUTH)
|
|
731
729
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Convenience functions related to Hail."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import base64
|
|
5
|
+
import gzip
|
|
4
6
|
import inspect
|
|
5
7
|
import logging
|
|
6
8
|
import os
|
|
@@ -156,6 +158,23 @@ class Batch(hb.Batch):
|
|
|
156
158
|
toml.dump(dict(get_config()), f)
|
|
157
159
|
set_config_paths([str(config_path)])
|
|
158
160
|
|
|
161
|
+
def _pack_attribute(self, key: str, value: str) -> dict[str, str]:
|
|
162
|
+
"""
|
|
163
|
+
Attributes are stored in a TEXT database field, which is limited to 64K.
|
|
164
|
+
If necessary, compress the value and annotate the key accordingly.
|
|
165
|
+
Eventually this may no longer suffice and we will need to split the value
|
|
166
|
+
across several attributes or similar.
|
|
167
|
+
"""
|
|
168
|
+
if len(value) <= 10000: # noqa: PLR2004
|
|
169
|
+
return {key: value} # Store short values verbatim
|
|
170
|
+
|
|
171
|
+
raw = value.encode()
|
|
172
|
+
compressed_b64 = base64.standard_b64encode(gzip.compress(raw, compresslevel=9))
|
|
173
|
+
if len(compressed_b64) > 65535: # noqa: PLR2004
|
|
174
|
+
raise ValueError(f'Job attribute {key!r} value is too large')
|
|
175
|
+
|
|
176
|
+
return {f'{key}_gzip': compressed_b64.decode('ascii')}
|
|
177
|
+
|
|
159
178
|
def _process_job_attributes(
|
|
160
179
|
self,
|
|
161
180
|
name: str | None = None,
|
|
@@ -175,7 +194,7 @@ class Batch(hb.Batch):
|
|
|
175
194
|
dataset = attributes.get('dataset')
|
|
176
195
|
sequencing_group = attributes.get('sequencing_group')
|
|
177
196
|
participant_id = attributes.get('participant_id')
|
|
178
|
-
sequencing_groups: set[str] = set(attributes.
|
|
197
|
+
sequencing_groups: set[str] = set(attributes.pop('sequencing_groups', []) or [])
|
|
179
198
|
if sequencing_group:
|
|
180
199
|
sequencing_groups.add(sequencing_group)
|
|
181
200
|
part = attributes.get('part')
|
|
@@ -215,7 +234,9 @@ class Batch(hb.Batch):
|
|
|
215
234
|
self.job_by_tool[tool]['job_n'] += 1
|
|
216
235
|
self.job_by_tool[tool]['sequencing_groups'] |= sequencing_groups
|
|
217
236
|
|
|
218
|
-
|
|
237
|
+
seqgroups_str = str(sorted(sequencing_groups))
|
|
238
|
+
attributes.update(self._pack_attribute('sequencing_groups', seqgroups_str))
|
|
239
|
+
|
|
219
240
|
fixed_attrs = {k: str(v) for k, v in attributes.items()}
|
|
220
241
|
return name, fixed_attrs
|
|
221
242
|
|
|
@@ -8,7 +8,7 @@ with open('README.md') as f:
|
|
|
8
8
|
setup(
|
|
9
9
|
name='cpg-utils',
|
|
10
10
|
# This tag is automatically updated by bumpversion
|
|
11
|
-
version='5.
|
|
11
|
+
version='5.4.0',
|
|
12
12
|
description='Library of convenience functions specific to the CPG',
|
|
13
13
|
long_description=long_description,
|
|
14
14
|
long_description_content_type='text/markdown',
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|