dayhoff-tools 1.1.12__tar.gz → 1.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/PKG-INFO +1 -1
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/cli/utility_commands.py +87 -4
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/deployment/processors.py +9 -23
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/pyproject.toml +1 -1
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/README.md +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/__init__.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/chemistry/standardizer.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/chemistry/utils.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/cli/__init__.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/cli/cloud_commands.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/cli/main.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/cli/swarm_commands.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/deployment/base.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/deployment/deploy_aws.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/deployment/deploy_utils.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/deployment/job_runner.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/deployment/swarm.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/embedders.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/fasta.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/file_ops.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/h5.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/intake/gcp.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/intake/gtdb.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/intake/kegg.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/intake/mmseqs.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/intake/structure.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/intake/uniprot.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/logs.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/sqlite.py +0 -0
- {dayhoff_tools-1.1.12 → dayhoff_tools-1.1.14}/dayhoff_tools/warehouse.py +0 -0
@@ -6,11 +6,14 @@ import shutil
|
|
6
6
|
import subprocess
|
7
7
|
import sys
|
8
8
|
from pathlib import Path
|
9
|
+
from typing import Optional
|
9
10
|
|
10
11
|
import toml
|
11
12
|
import typer
|
12
13
|
import yaml
|
13
14
|
|
15
|
+
# Import cloud helper lazily inside functions to avoid heavy deps at module load
|
16
|
+
|
14
17
|
|
15
18
|
def test_github_actions_locally():
|
16
19
|
"""Run the script test_pytest_in_github_actions_container.sh.sh."""
|
@@ -52,9 +55,16 @@ def get_ancestry(filepath: str) -> None:
|
|
52
55
|
|
53
56
|
def import_from_warehouse_typer() -> None:
|
54
57
|
"""Import a file from warehouse.
|
55
|
-
|
56
|
-
|
58
|
+
|
59
|
+
Emits an early warning if the active GCP credentials are the *default VM
|
60
|
+
service account* because this will prevent DVC/gsutil from accessing the
|
61
|
+
warehouse bucket. The user can abort the command when running
|
62
|
+
interactively.
|
57
63
|
"""
|
64
|
+
|
65
|
+
# Early-exit guard for wrong GCP credentials
|
66
|
+
_warn_if_gcp_default_sa(force_prompt=True)
|
67
|
+
|
58
68
|
# Import only when the function is called
|
59
69
|
import questionary
|
60
70
|
from dayhoff_tools.warehouse import import_from_warehouse
|
@@ -91,8 +101,16 @@ def import_from_warehouse_typer() -> None:
|
|
91
101
|
|
92
102
|
|
93
103
|
def add_to_warehouse_typer() -> None:
|
94
|
-
"""Add a new data file to warehouse
|
95
|
-
|
104
|
+
"""Add a new data file to warehouse and enrich its generated .dvc file.
|
105
|
+
|
106
|
+
As with *dh wimport*, this command fails when the user is logged in with
|
107
|
+
the default VM service account. A guard therefore warns the user first
|
108
|
+
and allows them to abort interactively.
|
109
|
+
"""
|
110
|
+
|
111
|
+
# Early-exit guard for wrong GCP credentials
|
112
|
+
_warn_if_gcp_default_sa(force_prompt=True)
|
113
|
+
|
96
114
|
# Import only when the function is called
|
97
115
|
import questionary
|
98
116
|
from dayhoff_tools.warehouse import add_to_warehouse
|
@@ -548,3 +566,68 @@ def update_dependencies(
|
|
548
566
|
except Exception as e:
|
549
567
|
print(f"An unexpected error occurred: {e}")
|
550
568
|
sys.exit(1)
|
569
|
+
|
570
|
+
|
571
|
+
# ----------------------
|
572
|
+
# Cloud credential guard
|
573
|
+
# ----------------------
|
574
|
+
|
575
|
+
|
576
|
+
def _warn_if_gcp_default_sa(force_prompt: bool = False) -> None:
|
577
|
+
"""Warn the user when the active gcloud principal is the default VM service
|
578
|
+
account.
|
579
|
+
|
580
|
+
This situation prevents gsutil/DVC from accessing private buckets and is
|
581
|
+
almost never what we want when interacting with the warehouse. The
|
582
|
+
function prints a coloured warning and, when *force_prompt* is *True* and
|
583
|
+
the session is interactive, asks the user whether to continue or abort the
|
584
|
+
command.
|
585
|
+
|
586
|
+
Parameters
|
587
|
+
----------
|
588
|
+
force_prompt : bool, default False
|
589
|
+
When *True* and running in an interactive shell (stdin & stdout are
|
590
|
+
TTYs), display a yes/no prompt (via *questionary.confirm*) asking the
|
591
|
+
user whether to proceed. If the user declines, ``SystemExit(1)`` is
|
592
|
+
raised to stop the CLI command immediately. In non-interactive
|
593
|
+
contexts the function merely prints the warning without prompting.
|
594
|
+
"""
|
595
|
+
|
596
|
+
# Delay heavy import until the function is actually needed
|
597
|
+
from dayhoff_tools.cli import cloud_commands as _cc # local import to ease testing
|
598
|
+
|
599
|
+
try:
|
600
|
+
impersonation = _cc._get_current_gcp_impersonation()
|
601
|
+
user = _cc._get_current_gcp_user()
|
602
|
+
active = impersonation if impersonation != "None" else user
|
603
|
+
short_name = _cc._get_short_name(active)
|
604
|
+
except Exception:
|
605
|
+
# If gcloud is missing or some other unexpected error occurs, fail
|
606
|
+
# gracefully by doing nothing.
|
607
|
+
return
|
608
|
+
|
609
|
+
if short_name != "default VM service account":
|
610
|
+
# All good – nothing to warn about
|
611
|
+
return
|
612
|
+
|
613
|
+
# Colour constants – fall back to empty strings if not available
|
614
|
+
RED = getattr(_cc, "RED", "\033[0;31m")
|
615
|
+
YELLOW = getattr(_cc, "YELLOW", "\033[0;33m")
|
616
|
+
BLUE = getattr(_cc, "BLUE", "\033[0;36m")
|
617
|
+
NC = getattr(_cc, "NC", "\033[0m")
|
618
|
+
|
619
|
+
warning_msg = (
|
620
|
+
f"{YELLOW}⚠ You are currently authenticated as the *default VM service account*.{NC}\n"
|
621
|
+
f"{YELLOW} This will block gsutil/DVC access to private buckets (e.g. warehouse).{NC}\n"
|
622
|
+
f"{YELLOW} Run {BLUE}dh gcp login{YELLOW} or {BLUE}dh gcp use-devcon{YELLOW} before retrying.{NC}"
|
623
|
+
)
|
624
|
+
print(warning_msg, file=sys.stderr)
|
625
|
+
|
626
|
+
interactive = sys.stdin.isatty() and sys.stdout.isatty()
|
627
|
+
if force_prompt and interactive:
|
628
|
+
import questionary
|
629
|
+
|
630
|
+
proceed = questionary.confirm("Proceed anyway?", default=False).ask()
|
631
|
+
if not proceed:
|
632
|
+
print(f"{RED}Aborted due to unsafe GCP credentials.{NC}", file=sys.stderr)
|
633
|
+
raise SystemExit(1)
|
@@ -283,7 +283,6 @@ class MMSeqsProfileProcessor(Processor):
|
|
283
283
|
aln_db = mmseqs_temp_dir / "alnDB"
|
284
284
|
profile_db = mmseqs_temp_dir / "profileDB"
|
285
285
|
result_db = mmseqs_temp_dir / "resultDB"
|
286
|
-
hits_db = mmseqs_temp_dir / "hitsDB"
|
287
286
|
|
288
287
|
try:
|
289
288
|
# 1. Create query database
|
@@ -406,32 +405,19 @@ class MMSeqsProfileProcessor(Processor):
|
|
406
405
|
run_base_dir,
|
407
406
|
)
|
408
407
|
|
409
|
-
# 9.
|
408
|
+
# 9. Extract hit sequences directly to FASTA using result2flat
|
410
409
|
self._run_mmseqs_command(
|
411
410
|
[
|
412
411
|
"mmseqs",
|
413
|
-
"
|
414
|
-
str(
|
415
|
-
|
416
|
-
), #
|
417
|
-
str(
|
418
|
-
|
419
|
-
"
|
420
|
-
"1", # Use mode 1 to extract target sequences (2nd column)
|
421
|
-
],
|
422
|
-
"Create hits subDB from target_db",
|
423
|
-
run_base_dir,
|
424
|
-
)
|
425
|
-
|
426
|
-
# 10. Convert hit sequences to FASTA -> to intermediate file
|
427
|
-
self._run_mmseqs_command(
|
428
|
-
[
|
429
|
-
"mmseqs",
|
430
|
-
"convert2fasta",
|
431
|
-
str(hits_db),
|
432
|
-
str(intermediate_hits_fasta_file),
|
412
|
+
"result2flat",
|
413
|
+
str(profile_db), # queryDB used in search
|
414
|
+
str(target_db), # targetDB
|
415
|
+
str(result_db), # resultDB
|
416
|
+
str(intermediate_hits_fasta_file), # output FASTA
|
417
|
+
"--use-fasta-header",
|
418
|
+
"1",
|
433
419
|
],
|
434
|
-
"
|
420
|
+
"Extract hit sequences to FASTA via result2flat",
|
435
421
|
run_base_dir,
|
436
422
|
)
|
437
423
|
|
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
|
|
5
5
|
|
6
6
|
[project]
|
7
7
|
name = "dayhoff-tools"
|
8
|
-
version = "1.1.
|
8
|
+
version = "1.1.14"
|
9
9
|
description = "Common tools for all the repos at Dayhoff Labs"
|
10
10
|
authors = [
|
11
11
|
{name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|