cpg-utils 5.2.0__tar.gz → 5.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/PKG-INFO +22 -2
  2. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/cloud.py +54 -2
  3. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/config.py +32 -12
  4. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/cromwell.py +7 -7
  5. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/hail_batch.py +1 -1
  6. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils.egg-info/PKG-INFO +22 -2
  7. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils.egg-info/requires.txt +1 -0
  8. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/setup.py +2 -1
  9. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/LICENSE +0 -0
  10. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/README.md +0 -0
  11. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/__init__.py +0 -0
  12. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/cloudpath_hail_az.py +0 -0
  13. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/constants.py +0 -0
  14. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/cromwell_model.py +0 -0
  15. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/dataproc.py +0 -0
  16. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/git.py +0 -0
  17. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/membership.py +0 -0
  18. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/py.typed +0 -0
  19. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils/slack.py +0 -0
  20. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils.egg-info/SOURCES.txt +0 -0
  21. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils.egg-info/dependency_links.txt +0 -0
  22. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/cpg_utils.egg-info/top_level.txt +0 -0
  23. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/pyproject.toml +0 -0
  24. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/setup.cfg +0 -0
  25. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/test/__init__.py +0 -0
  26. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/test/test_config.py +0 -0
  27. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/test/test_cromwell.py +0 -0
  28. {cpg-utils-5.2.0 → cpg_utils-5.3.0}/test/test_doctests.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: cpg-utils
3
- Version: 5.2.0
3
+ Version: 5.3.0
4
4
  Summary: Library of convenience functions specific to the CPG
5
5
  Home-page: https://github.com/populationgenomics/cpg-utils
6
6
  License: MIT
@@ -17,6 +17,26 @@ Classifier: Topic :: Scientific/Engineering
17
17
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
+ Requires-Dist: boto3>=1.28.56
21
+ Requires-Dist: botocore>=1.31.56
22
+ Requires-Dist: cloudpathlib[all]
23
+ Requires-Dist: frozendict
24
+ Requires-Dist: google-auth>=1.27.0
25
+ Requires-Dist: google-cloud-artifact-registry
26
+ Requires-Dist: google-cloud-secret-manager
27
+ Requires-Dist: requests
28
+ Requires-Dist: tabulate
29
+ Requires-Dist: toml
30
+ Requires-Dist: deprecated
31
+ Dynamic: classifier
32
+ Dynamic: description
33
+ Dynamic: description-content-type
34
+ Dynamic: home-page
35
+ Dynamic: keywords
36
+ Dynamic: license
37
+ Dynamic: license-file
38
+ Dynamic: requires-dist
39
+ Dynamic: summary
20
40
 
21
41
  # cpg-utils
22
42
 
@@ -5,7 +5,9 @@ import os
5
5
  import re
6
6
  import subprocess
7
7
  import traceback
8
- from typing import Any
8
+ import urllib.parse
9
+ from collections import defaultdict
10
+ from typing import Any, NamedTuple
9
11
 
10
12
  # pylint: disable=no-name-in-module
11
13
  import google.api_core.exceptions
@@ -26,7 +28,7 @@ from google.auth._default import (
26
28
  _SERVICE_ACCOUNT_TYPE,
27
29
  )
28
30
  from google.auth.transport import requests
29
- from google.cloud import secretmanager
31
+ from google.cloud import artifactregistry, secretmanager
30
32
  from google.oauth2 import credentials as oauth2_credentials
31
33
  from google.oauth2 import service_account
32
34
 
@@ -123,6 +125,56 @@ def write_secret(project_id: str, secret_name: str, secret_value: str) -> None:
123
125
  secret_manager.disable_secret_version(request={'name': version.name})
124
126
 
125
127
 
128
+ class DockerImage(NamedTuple):
129
+ name: str
130
+ uri: str
131
+ tag_uri: str
132
+ size: str
133
+ build_time: str
134
+
135
+
136
+ _repo_image_tags: dict[str, defaultdict[str, dict[str, DockerImage]]] = {}
137
+
138
+
139
+ def _ensure_image_tags_loaded(project: str, location: str, repository: str) -> None:
140
+ """Populate _repo_image_tags as a map-of-map-of-maps of 'repository' -> 'imagename' -> 'tag' -> image."""
141
+ if repository in _repo_image_tags:
142
+ return
143
+
144
+ image_tags: defaultdict[str, dict[str, DockerImage]] = defaultdict(dict)
145
+
146
+ request = artifactregistry.ListDockerImagesRequest(
147
+ parent=f'projects/{project}/locations/{location}/repositories/{repository}',
148
+ page_size=500, # Increase efficiency by making fewer requests
149
+ )
150
+ for image in artifactregistry.ArtifactRegistryClient().list_docker_images(request):
151
+ name_and_checksum = image.name.rpartition('/dockerImages/')[2]
152
+ name = urllib.parse.unquote(name_and_checksum).rpartition('@')[0]
153
+ base_uri = image.uri.rpartition('@')[0]
154
+ for tag in image.tags:
155
+ image_tags[name][tag] = DockerImage(
156
+ image.name,
157
+ image.uri,
158
+ f'{base_uri}@{tag}',
159
+ image.image_size_bytes,
160
+ image.build_time,
161
+ )
162
+
163
+ image_tags.default_factory = None
164
+ _repo_image_tags[repository] = image_tags
165
+
166
+
167
+ def find_image(repository: str | None, name: str, version: str) -> DockerImage:
168
+ """Returns image details or raises ValueError if the image or tag does not exist."""
169
+ repository = f'images-{repository}' if repository is not None else 'images'
170
+ _ensure_image_tags_loaded('cpg-common', 'australia-southeast1', repository)
171
+ try:
172
+ return _repo_image_tags[repository][name][version]
173
+ except KeyError as e:
174
+ message = f'Image {name}:{version} not found in {repository} repository ({e} not found)'
175
+ raise ValueError(message) from None
176
+
177
+
126
178
  def get_google_identity_token(
127
179
  target_audience: str | None,
128
180
  request: google.auth.transport.Request | None = None,
@@ -9,6 +9,7 @@ import toml
9
9
  from frozendict import frozendict
10
10
 
11
11
  from cpg_utils import Path, to_path
12
+ from cpg_utils.cloud import find_image
12
13
 
13
14
  AR_GUID_NAME = 'ar-guid'
14
15
 
@@ -474,33 +475,52 @@ def output_path(
474
475
  )
475
476
 
476
477
 
477
- def image_path(key: str) -> str:
478
+ def image_path(
479
+ key: str,
480
+ version: str | list[str] | None = None,
481
+ repository: str | None = None,
482
+ ) -> str:
478
483
  """
479
- Returns a path to a container image using key in config's "images" section.
484
+ Returns a path to a container image for the given key (i.e., image name)
485
+ and version.
480
486
 
481
487
  Examples
482
488
  --------
483
- >> image_path('bcftools')
484
- 'australia-southeast1-docker.pkg.dev/cpg-common/images/bcftools:1.10.2'
485
-
486
- Assuming config structure as follows:
487
-
488
- ```toml
489
- [images]
490
- bcftools = 'australia-southeast1-docker.pkg.dev/cpg-common/images/bcftools:1.10.2'
491
- ```
489
+ >> image_path('bcftools', '1.16-1')
490
+ 'australia-southeast1-docker.pkg.dev/cpg-common/images/bcftools:1.16-1'
492
491
 
493
492
  Parameters
494
493
  ----------
495
494
  key : str
495
+ Specifies the image name.
496
+ When `version` is not specified:
496
497
  Describes the key within the `images` config section. Can list sections
497
498
  separated with '/'.
498
499
 
500
+ version : str or list[str], optional
501
+ Specifies the desired image version, e.g., '1.18-1', either directly as
502
+ a version number string or indirectly via a config key list which will
503
+ be used to retrieve a version number string via `config_retrieve`.
504
+
505
+ repository : str, optional
506
+ The suffix (e.g., 'dev' for images-dev) of an artifact registry repository
507
+ to be used instead of the default production images repository.
508
+
509
+ Using `image_path(key)` without giving `version` is deprecated. In future,
510
+ specifying it will be required.
511
+
499
512
  Returns
500
513
  -------
501
514
  str
502
515
  """
503
- return config_retrieve(['images', *key.strip('/').split('/')])
516
+ if version is None:
517
+ return config_retrieve(['images', *key.strip('/').split('/')])
518
+
519
+ if isinstance(version, list):
520
+ version = config_retrieve(version)
521
+
522
+ assert isinstance(version, str)
523
+ return find_image(repository, key, version).tag_uri
504
524
 
505
525
 
506
526
  def reference_path(key: str) -> str:
@@ -708,7 +708,7 @@ def _copy_basic_file_into_batch(
708
708
  2. the output name `output`,
709
709
  3. check that the value we select is a string,
710
710
  4. either:
711
- (a) gsutil cp it into `output_filename`
711
+ (a) gcloud storage cp it into `output_filename`
712
712
  (b) write the value into `output_filename`
713
713
  """
714
714
  output_filename = j.out
@@ -724,14 +724,14 @@ def _copy_basic_file_into_batch(
724
724
  # wrap this in quotes, because output often contains a '.', which has to be escaped in jq
725
725
  jq_el = f'"{output_name}"[{idx}]'
726
726
 
727
- # activate to gsutil cp
727
+ # activate to gcloud storage cp
728
728
  j.image(driver_image)
729
729
  j.env('GOOGLE_APPLICATION_CREDENTIALS', '/gsa-key/key.json')
730
730
  j.command(GCLOUD_ACTIVATE_AUTH)
731
731
 
732
732
  # this has to be in bash unfortunately :(
733
733
  # we want to check that the output we get is a string
734
- # if it starts with gs://, then we'll `gsutil cp` it into output_filename
734
+ # if it starts with gs://, then we'll `gcloud storage cp` it into output_filename
735
735
  # otherwise write the value into output_filename.
736
736
 
737
737
  # in future, add s3://* or AWS handling here
@@ -751,7 +751,7 @@ fi
751
751
  OUTPUT_VALUE=$(cat {rdict} | jq -r '.{jq_el}')
752
752
  if [[ "$OUTPUT_VALUE" == gs://* ]]; then
753
753
  echo "Copying file from $OUTPUT_VALUE";
754
- gsutil cp $OUTPUT_VALUE {output_filename};
754
+ gcloud storage cp $OUTPUT_VALUE {output_filename};
755
755
  else
756
756
  # cleaner to directly pipe into file
757
757
  cat {rdict} | jq -r '.{jq_el}' > {output_filename}
@@ -798,13 +798,13 @@ def _copy_resource_group_into_batch(
798
798
  # wrap this in quotes, because output often contains a '.', which has to be escaped in jq
799
799
  jq_els = [f'"{output_source}"[{idx}]' for output_source in rg.values()]
800
800
 
801
- # activate to gsutil cp
801
+ # activate to use a gcloud cp
802
802
  j.env('GOOGLE_APPLICATION_CREDENTIALS', '/gsa-key/key.json')
803
803
  j.command(GCLOUD_ACTIVATE_AUTH)
804
804
 
805
805
  # this has to be in bash unfortunately :(
806
806
  # we want to check that the output we get is a string
807
- # if it starts with gs://, then we'll `gsutil cp` it into output_filename
807
+ # if it starts with gs://, then we'll `gcloud storage cp` it into output_filename
808
808
  # otherwise write the value into output_filename.
809
809
 
810
810
  # in future, add s3://* or AWS handling here
@@ -825,7 +825,7 @@ def _copy_resource_group_into_batch(
825
825
  OUTPUT_VALUE=$(cat {rdict} | jq -r '.{jq_el}')
826
826
  if [[ "$OUTPUT_VALUE" == gs://* ]]; then
827
827
  echo "Copying file from $OUTPUT_VALUE";
828
- gsutil cp $OUTPUT_VALUE {output_filename}.{output_name};
828
+ gcloud storage cp $OUTPUT_VALUE {output_filename}.{output_name};
829
829
  else
830
830
  # cleaner to directly pipe into file
831
831
  cat {rdict} | jq -r '.{jq_el}' > {output_filename}.{output_name};
@@ -555,7 +555,7 @@ function retry_gs_cp {
555
555
  dst=/io/batch/${basename $src}
556
556
  fi
557
557
 
558
- retry gsutil -o GSUtil:check_hashes=never cp $src $dst
558
+ retry gcloud storage cp $src $dst
559
559
  }
560
560
  """
561
561
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: cpg-utils
3
- Version: 5.2.0
3
+ Version: 5.3.0
4
4
  Summary: Library of convenience functions specific to the CPG
5
5
  Home-page: https://github.com/populationgenomics/cpg-utils
6
6
  License: MIT
@@ -17,6 +17,26 @@ Classifier: Topic :: Scientific/Engineering
17
17
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
+ Requires-Dist: boto3>=1.28.56
21
+ Requires-Dist: botocore>=1.31.56
22
+ Requires-Dist: cloudpathlib[all]
23
+ Requires-Dist: frozendict
24
+ Requires-Dist: google-auth>=1.27.0
25
+ Requires-Dist: google-cloud-artifact-registry
26
+ Requires-Dist: google-cloud-secret-manager
27
+ Requires-Dist: requests
28
+ Requires-Dist: tabulate
29
+ Requires-Dist: toml
30
+ Requires-Dist: deprecated
31
+ Dynamic: classifier
32
+ Dynamic: description
33
+ Dynamic: description-content-type
34
+ Dynamic: home-page
35
+ Dynamic: keywords
36
+ Dynamic: license
37
+ Dynamic: license-file
38
+ Dynamic: requires-dist
39
+ Dynamic: summary
20
40
 
21
41
  # cpg-utils
22
42
 
@@ -3,6 +3,7 @@ botocore>=1.31.56
3
3
  cloudpathlib[all]
4
4
  frozendict
5
5
  google-auth>=1.27.0
6
+ google-cloud-artifact-registry
6
7
  google-cloud-secret-manager
7
8
  requests
8
9
  tabulate
@@ -8,7 +8,7 @@ with open('README.md') as f:
8
8
  setup(
9
9
  name='cpg-utils',
10
10
  # This tag is automatically updated by bumpversion
11
- version='5.2.0',
11
+ version='5.3.0',
12
12
  description='Library of convenience functions specific to the CPG',
13
13
  long_description=long_description,
14
14
  long_description_content_type='text/markdown',
@@ -24,6 +24,7 @@ setup(
24
24
  'cloudpathlib[all]',
25
25
  'frozendict',
26
26
  'google-auth>=1.27.0',
27
+ 'google-cloud-artifact-registry',
27
28
  'google-cloud-secret-manager',
28
29
  'requests',
29
30
  'tabulate',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes