dayhoff-tools 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,11 +6,14 @@ import shutil
6
6
  import subprocess
7
7
  import sys
8
8
  from pathlib import Path
9
+ from typing import Optional
9
10
 
10
11
  import toml
11
12
  import typer
12
13
  import yaml
13
14
 
15
+ # Import cloud helper lazily inside functions to avoid heavy deps at module load
16
+
14
17
 
15
18
  def test_github_actions_locally():
16
19
  """Run the script test_pytest_in_github_actions_container.sh.sh."""
@@ -52,9 +55,16 @@ def get_ancestry(filepath: str) -> None:
52
55
 
53
56
  def import_from_warehouse_typer() -> None:
54
57
  """Import a file from warehouse.
55
- This is a thin wrapper around `cli.utils.import_from_warehouse`,
56
- with interactive prompts using questionary.
58
+
59
+ Emits an early warning if the active GCP credentials are the *default VM
60
+ service account* because this will prevent DVC/gsutil from accessing the
61
+ warehouse bucket. The user can abort the command when running
62
+ interactively.
57
63
  """
64
+
65
+ # Early-exit guard for wrong GCP credentials
66
+ _warn_if_gcp_default_sa(force_prompt=True)
67
+
58
68
  # Import only when the function is called
59
69
  import questionary
60
70
  from dayhoff_tools.warehouse import import_from_warehouse
@@ -91,8 +101,16 @@ def import_from_warehouse_typer() -> None:
91
101
 
92
102
 
93
103
  def add_to_warehouse_typer() -> None:
94
- """Add a new data file to warehouse, and expand its .dvc file with
95
- metadata, including ancestor files."""
104
+ """Add a new data file to warehouse and enrich its generated .dvc file.
105
+
106
+ As with *dh wimport*, this command fails when the user is logged in with
107
+ the default VM service account. A guard therefore warns the user first
108
+ and allows them to abort interactively.
109
+ """
110
+
111
+ # Early-exit guard for wrong GCP credentials
112
+ _warn_if_gcp_default_sa(force_prompt=True)
113
+
96
114
  # Import only when the function is called
97
115
  import questionary
98
116
  from dayhoff_tools.warehouse import add_to_warehouse
@@ -548,3 +566,68 @@ def update_dependencies(
548
566
  except Exception as e:
549
567
  print(f"An unexpected error occurred: {e}")
550
568
  sys.exit(1)
569
+
570
+
571
+ # ----------------------
572
+ # Cloud credential guard
573
+ # ----------------------
574
+
575
+
576
+ def _warn_if_gcp_default_sa(force_prompt: bool = False) -> None:
577
+ """Warn the user when the active gcloud principal is the default VM service
578
+ account.
579
+
580
+ This situation prevents gsutil/DVC from accessing private buckets and is
581
+ almost never what we want when interacting with the warehouse. The
582
+ function prints a coloured warning and, when *force_prompt* is *True* and
583
+ the session is interactive, asks the user whether to continue or abort the
584
+ command.
585
+
586
+ Parameters
587
+ ----------
588
+ force_prompt : bool, default False
589
+ When *True* and running in an interactive shell (stdin & stdout are
590
+ TTYs), display a yes/no prompt (via *questionary.confirm*) asking the
591
+ user whether to proceed. If the user declines, ``SystemExit(1)`` is
592
+ raised to stop the CLI command immediately. In non-interactive
593
+ contexts the function merely prints the warning without prompting.
594
+ """
595
+
596
+ # Delay heavy import until the function is actually needed
597
+ from dayhoff_tools.cli import cloud_commands as _cc # local import to ease testing
598
+
599
+ try:
600
+ impersonation = _cc._get_current_gcp_impersonation()
601
+ user = _cc._get_current_gcp_user()
602
+ active = impersonation if impersonation != "None" else user
603
+ short_name = _cc._get_short_name(active)
604
+ except Exception:
605
+ # If gcloud is missing or some other unexpected error occurs, fail
606
+ # gracefully by doing nothing.
607
+ return
608
+
609
+ if short_name != "default VM service account":
610
+ # All good – nothing to warn about
611
+ return
612
+
613
+ # Colour constants – fall back to empty strings if not available
614
+ RED = getattr(_cc, "RED", "\033[0;31m")
615
+ YELLOW = getattr(_cc, "YELLOW", "\033[0;33m")
616
+ BLUE = getattr(_cc, "BLUE", "\033[0;36m")
617
+ NC = getattr(_cc, "NC", "\033[0m")
618
+
619
+ warning_msg = (
620
+ f"{YELLOW}⚠ You are currently authenticated as the *default VM service account*.{NC}\n"
621
+ f"{YELLOW} This will block gsutil/DVC access to private buckets (e.g. warehouse).{NC}\n"
622
+ f"{YELLOW} Run {BLUE}dh gcp login{YELLOW} or {BLUE}dh gcp use-devcon{YELLOW} before retrying.{NC}"
623
+ )
624
+ print(warning_msg, file=sys.stderr)
625
+
626
+ interactive = sys.stdin.isatty() and sys.stdout.isatty()
627
+ if force_prompt and interactive:
628
+ import questionary
629
+
630
+ proceed = questionary.confirm("Proceed anyway?", default=False).ask()
631
+ if not proceed:
632
+ print(f"{RED}Aborted due to unsafe GCP credentials.{NC}", file=sys.stderr)
633
+ raise SystemExit(1)
@@ -283,7 +283,6 @@ class MMSeqsProfileProcessor(Processor):
283
283
  aln_db = mmseqs_temp_dir / "alnDB"
284
284
  profile_db = mmseqs_temp_dir / "profileDB"
285
285
  result_db = mmseqs_temp_dir / "resultDB"
286
- hits_db = mmseqs_temp_dir / "hitsDB"
287
286
 
288
287
  try:
289
288
  # 1. Create query database
@@ -406,32 +405,19 @@ class MMSeqsProfileProcessor(Processor):
406
405
  run_base_dir,
407
406
  )
408
407
 
409
- # 9. Create subdatabase of hits from original target_db
408
+ # 9. Extract hit sequences directly to FASTA using result2flat
410
409
  self._run_mmseqs_command(
411
410
  [
412
411
  "mmseqs",
413
- "createsubdb",
414
- str(
415
- target_db
416
- ), # inputDB: database to filter (all target sequences)
417
- str(result_db), # filterDB: IDs appearing in result_db (2nd column)
418
- str(hits_db),
419
- "--subdb-mode",
420
- "1", # Use mode 1 to extract target sequences (2nd column)
421
- ],
422
- "Create hits subDB from target_db",
423
- run_base_dir,
424
- )
425
-
426
- # 10. Convert hit sequences to FASTA -> to intermediate file
427
- self._run_mmseqs_command(
428
- [
429
- "mmseqs",
430
- "convert2fasta",
431
- str(hits_db),
432
- str(intermediate_hits_fasta_file),
412
+ "result2flat",
413
+ str(profile_db), # queryDB used in search
414
+ str(target_db), # targetDB
415
+ str(result_db), # resultDB
416
+ str(intermediate_hits_fasta_file), # output FASTA
417
+ "--use-fasta-header",
418
+ "1",
433
419
  ],
434
- "Convert hits to FASTA",
420
+ "Extract hit sequences to FASTA via result2flat",
435
421
  run_base_dir,
436
422
  )
437
423
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.1.12
3
+ Version: 1.1.14
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -5,13 +5,13 @@ dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
5
5
  dayhoff_tools/cli/cloud_commands.py,sha256=KiYEuD3nSg8QPWBYfrhdze2La_CJe4iqK-8uOAHyS8U,35827
6
6
  dayhoff_tools/cli/main.py,sha256=Ae0Bee2VjRzUge1I2DJoDVqoXpQnKfxGhdiMSmIWJwo,3788
7
7
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
8
- dayhoff_tools/cli/utility_commands.py,sha256=KCcywhwwDAFnDcts4f_RrgikX9EpUsqdJgFgu59RNnk,20863
8
+ dayhoff_tools/cli/utility_commands.py,sha256=3cLL4821liLsKRWkEP4VZZ60rMjdvGWoyejHS-TxnnU,24093
9
9
  dayhoff_tools/deployment/base.py,sha256=u-AjbtHnFLoLt33dhYXHIpV-6jcieMEHHGBGN_U9Hm0,15626
10
10
  dayhoff_tools/deployment/deploy_aws.py,sha256=O0gQxHioSU_sNU8T8MD4wSOPvWc--V8eRRZzlRu035I,16446
11
11
  dayhoff_tools/deployment/deploy_gcp.py,sha256=DxBM4sUzwPK9RWLP9bSfr38n1HHl-TVrp4TsbdN8pUA,5795
12
12
  dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqqDkpaIOaAn_4,26027
13
13
  dayhoff_tools/deployment/job_runner.py,sha256=4tmdplpvqSE9bVxRWHo2U5kwkYrYod0Uwzpg2Q7qG5o,4850
14
- dayhoff_tools/deployment/processors.py,sha256=1MjsXh9FJnp_8BC8bmn_OTo_P6HEom1eanvBhcMadTI,19859
14
+ dayhoff_tools/deployment/processors.py,sha256=sfSnhDH0H0Sn7eZMHlmFsToYaAKjbuxbt__i6iDSohw,19367
15
15
  dayhoff_tools/deployment/swarm.py,sha256=MGcS2_x4RNFtnVjWlU_SwNfhICz8NlGYr9cYBK4ZKDA,21688
16
16
  dayhoff_tools/embedders.py,sha256=CRgcb2z7KeeFrRQawyUZuJ4Yi0-J5jSr0hwuRhjG_FI,36513
17
17
  dayhoff_tools/fasta.py,sha256=e7xw3pInoupqCGE0-fJTOzmW_earL1M7qPyoqIPfUT4,46269
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
26
26
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
27
27
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
28
28
  dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
29
- dayhoff_tools-1.1.12.dist-info/METADATA,sha256=h-fxcltAUMl48kcW27BMzLNMEl1vsJYe4ROIwAMFh7I,2225
30
- dayhoff_tools-1.1.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
- dayhoff_tools-1.1.12.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
- dayhoff_tools-1.1.12.dist-info/RECORD,,
29
+ dayhoff_tools-1.1.14.dist-info/METADATA,sha256=qaH1l5HdQMoQq4SAcXucEKpOe8ajX5Ng0isGrdmkiFg,2225
30
+ dayhoff_tools-1.1.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
+ dayhoff_tools-1.1.14.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
+ dayhoff_tools-1.1.14.dist-info/RECORD,,