dayhoff-tools 1.1.14__tar.gz → 1.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/PKG-INFO +1 -1
  2. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/cli/cloud_commands.py +96 -37
  3. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/cli/utility_commands.py +58 -61
  4. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/deployment/processors.py +6 -7
  5. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/pyproject.toml +1 -1
  6. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/README.md +0 -0
  7. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/__init__.py +0 -0
  8. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/chemistry/standardizer.py +0 -0
  9. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/chemistry/utils.py +0 -0
  10. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/cli/__init__.py +0 -0
  11. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/cli/main.py +0 -0
  12. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/cli/swarm_commands.py +0 -0
  13. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/deployment/base.py +0 -0
  14. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  15. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  16. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  17. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/deployment/job_runner.py +0 -0
  18. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/deployment/swarm.py +0 -0
  19. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/embedders.py +0 -0
  20. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/fasta.py +0 -0
  21. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/file_ops.py +0 -0
  22. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/h5.py +0 -0
  23. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/intake/gcp.py +0 -0
  24. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/intake/gtdb.py +0 -0
  25. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/intake/kegg.py +0 -0
  26. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/intake/mmseqs.py +0 -0
  27. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/intake/structure.py +0 -0
  28. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/intake/uniprot.py +0 -0
  29. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/logs.py +0 -0
  30. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/sqlite.py +0 -0
  31. {dayhoff_tools-1.1.14 → dayhoff_tools-1.1.16}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.1.14
3
+ Version: 1.1.16
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -256,6 +256,32 @@ def _get_adc_status() -> str:
256
256
  return "Not configured"
257
257
 
258
258
 
259
+ def _is_adc_authenticated() -> bool:
260
+ """Check if Application Default Credentials (ADC) are valid.
261
+
262
+ Returns:
263
+ True if `gcloud auth application-default print-access-token --quiet` succeeds,
264
+ False otherwise.
265
+ """
266
+ try:
267
+ gcloud_path = _find_executable("gcloud")
268
+ returncode, _, _ = _run_command(
269
+ [
270
+ gcloud_path,
271
+ "auth",
272
+ "application-default",
273
+ "print-access-token",
274
+ "--quiet",
275
+ ],
276
+ capture=True,
277
+ check=False,
278
+ suppress_output=True,
279
+ )
280
+ return returncode == 0
281
+ except FileNotFoundError:
282
+ return False
283
+
284
+
259
285
  def _is_gcp_user_authenticated() -> bool:
260
286
  """Check if the current gcloud user authentication is valid and non-interactive.
261
287
 
@@ -347,7 +373,7 @@ def _run_gcloud_login() -> None:
347
373
 
348
374
 
349
375
  def _test_gcp_credentials(user: str, impersonation_sa: str) -> None:
350
- """Test GCP credentials. Only prints output on failure (to stderr)."""
376
+ """Test GCP credentials. Prints output on failure (to stderr) and success (to stdout)."""
351
377
  gcloud_path = _find_executable("gcloud")
352
378
  user_short = _get_short_name(user)
353
379
  impersonation_short = _get_short_name(impersonation_sa)
@@ -363,64 +389,73 @@ def _test_gcp_credentials(user: str, impersonation_sa: str) -> None:
363
389
  ]
364
390
 
365
391
  if impersonation_sa != "None":
392
+ # Test 1: Access as the user directly (temporarily disable impersonation)
393
+ print(f" Testing direct access as user ({user_short})...")
366
394
  orig_sa = impersonation_sa
367
395
  unset_rc, _, unset_err = _gcloud_unset_config(
368
396
  "auth/impersonate_service_account"
369
397
  )
370
398
  if unset_rc != 0:
371
- # Failure to unset is an error state
372
399
  print(
373
- f"{RED}✗ Test Error: Failed to temporarily disable impersonation: {unset_err}{NC}",
400
+ f" {RED}✗ Test Error: Failed to temporarily disable impersonation: {unset_err}{NC}",
374
401
  file=sys.stderr,
375
402
  )
376
-
377
- user_returncode, _, _ = _run_command(cmd, suppress_output=True, check=False)
378
- if user_returncode != 0:
379
- # Failure to access as user
380
- print(
381
- f"{RED}✗ Test Failure: Cannot access resources directly as user '{user_short}'. Check roles/project.{NC}",
382
- file=sys.stderr,
403
+ # Even if unsetting fails, attempt to restore and continue with impersonation test
404
+ else:
405
+ user_returncode, _, _ = _run_command(
406
+ cmd, suppress_output=True, check=False
383
407
  )
408
+ if user_returncode != 0:
409
+ print(
410
+ f" {RED}✗ User Test Failure: Cannot access resources directly as user '{user_short}'. Check roles/project.{NC}",
411
+ file=sys.stderr,
412
+ )
413
+ else:
414
+ print(
415
+ f" {GREEN}✓ User Test ({user_short}): Direct access OK{NC}"
416
+ )
384
417
 
418
+ # Restore impersonation setting
385
419
  set_rc, _, set_err = _gcloud_set_config(
386
420
  "auth/impersonate_service_account", orig_sa
387
421
  )
388
422
  if set_rc != 0:
389
- # Failure to restore is an error state
390
423
  print(
391
- f"{RED}✗ Test Error: Failed to restore impersonation config for {impersonation_short}: {set_err}{NC}",
424
+ f" {RED}✗ Test Error: Failed to restore impersonation config for {impersonation_short}: {set_err}{NC}",
392
425
  file=sys.stderr,
393
426
  )
427
+ # If restoring fails, it's a significant issue for the next test
394
428
 
429
+ # Test 2: Access while impersonating the SA
430
+ print(f" Testing access while impersonating SA ({impersonation_short})...")
395
431
  impersonation_returncode, _, _ = _run_command(
396
432
  cmd, suppress_output=True, check=False
397
433
  )
398
434
  if impersonation_returncode != 0:
399
- # Failure to access while impersonating
400
435
  print(
401
- f"{RED}✗ Test Failure: Cannot access resources impersonating '{impersonation_short}'. Check permissions/config.{NC}",
436
+ f" {RED}✗ Impersonation Test Failure: Cannot access resources impersonating '{impersonation_short}'. Check permissions/config.{NC}",
402
437
  file=sys.stderr,
403
438
  )
439
+ else:
440
+ print(
441
+ f" {GREEN}✓ Impersonation Test ({impersonation_short}): Access OK{NC}"
442
+ )
404
443
 
405
444
  else:
406
445
  # Test user account directly (no impersonation config)
446
+ print(f" Testing direct access as user ({user_short})...")
407
447
  returncode, _, _ = _run_command(cmd, suppress_output=True, check=False)
408
448
  if returncode != 0:
409
- # Failure to access as user
410
449
  print(
411
- f"{RED}✗ Test Failure: Cannot access resources directly as user '{user_short}'. Check roles/project.{NC}",
450
+ f" {RED}✗ User Test Failure: Cannot access resources directly as user '{user_short}'. Check roles/project.{NC}",
412
451
  file=sys.stderr,
413
452
  )
414
- # Success: No output
415
- # else:
416
- # print(f"{GREEN}✓ Direct access as user {user_short}: OK{NC}")
417
- # Correctly indented pass statement if no action needed on success
418
- pass
453
+ else:
454
+ print(f" {GREEN}✓ User Test ({user_short}): Direct access OK{NC}")
419
455
  else:
420
- # If user isn't authenticated at all, maybe print a warning?
421
- # print(f"{YELLOW}User not authenticated, skipping credential test.{NC}")
422
- # Decided against this to keep output minimal unless actual test fails.
423
- pass # Explicit pass for the outer else
456
+ print(
457
+ f" {YELLOW}User not authenticated, skipping credential access tests.{NC}"
458
+ )
424
459
 
425
460
 
426
461
  # --- AWS Functions ---
@@ -516,30 +551,54 @@ aws_app = typer.Typer(help="Manage AWS SSO authentication using RC files.")
516
551
  # --- GCP Commands ---
517
552
  @gcp_app.command("status")
518
553
  def gcp_status():
519
- """Show active GCP credentials for CLI and Libraries/ADC."""
554
+ """Show active GCP credentials for CLI and Libraries/ADC, including staleness."""
520
555
  cli_user = _get_current_gcp_user()
521
556
  cli_impersonation = _get_current_gcp_impersonation()
522
- adc_principal = _get_adc_status()
557
+ adc_principal_raw = _get_adc_status() # Raw status string, potentially complex
558
+
559
+ user_auth_valid = _is_gcp_user_authenticated()
560
+ adc_auth_valid = _is_adc_authenticated()
523
561
 
524
562
  # Determine active principal for CLI
525
563
  if cli_impersonation != "None":
526
564
  cli_active_short = _get_short_name(cli_impersonation)
565
+ cli_is_impersonating = True
527
566
  else:
528
567
  cli_active_short = _get_short_name(cli_user)
568
+ cli_is_impersonating = False
529
569
 
530
- # Get short name for ADC principal
531
- adc_active_short = _get_short_name(adc_principal)
570
+ adc_active_short = _get_short_name(adc_principal_raw)
532
571
 
533
- # Define a fixed width for the principal name field
534
- name_width = 10
572
+ print(f"{BLUE}--- GCP CLI Credentials ---{NC}")
573
+ print(f" Effective Principal: {GREEN}{cli_active_short}{NC}")
574
+ print(f" User Account ({_get_short_name(cli_user)}):")
575
+ if user_auth_valid:
576
+ print(f" └─ Authentication: {GREEN}VALID{NC}")
577
+ else:
578
+ print(
579
+ f" └─ Authentication: {RED}STALE/EXPIRED{NC} (Hint: run 'dh gcp login')"
580
+ )
535
581
 
536
- print(
537
- f"Using {GREEN}{cli_active_short:<{name_width}}{NC} for {BLUE}gcloud CLI{NC} (gcloud, gsutil)"
538
- )
539
- print(
540
- f"Using {GREEN}{adc_active_short:<{name_width}}{NC} for {BLUE}Libraries/Tools{NC} (warehouse, Terraform, Python clients)\n"
541
- )
582
+ if cli_is_impersonating:
583
+ print(
584
+ f" Impersonation ({_get_short_name(cli_impersonation)}): {GREEN}Active{NC}"
585
+ )
586
+ print(f" └─ Access Test: (see results below)")
587
+ else:
588
+ print(f" Impersonation: {YELLOW}Not Active{NC}")
589
+
590
+ print(f"\n{BLUE}--- GCP Library/ADC Credentials ---{NC}")
591
+ print(f" Effective Principal: {GREEN}{adc_active_short}{NC}")
592
+ if adc_principal_raw in ["Not configured", "Error reading", "Invalid format"]:
593
+ print(f" └─ Status: {RED}{adc_principal_raw}{NC}")
594
+ elif adc_auth_valid:
595
+ print(f" └─ Authentication: {GREEN}VALID{NC}")
596
+ else:
597
+ print(
598
+ f" └─ Authentication: {RED}STALE/EXPIRED{NC} (Hint: run 'dh gcp use-...-adc' or 'gcloud auth application-default login ...')"
599
+ )
542
600
 
601
+ print(f"\n{BLUE}--- GCP Access Tests (for CLI configuration) ---{NC}")
543
602
  # Run tests silently, they will print to stderr only on failure
544
603
  _test_gcp_credentials(cli_user, cli_impersonation)
545
604
 
@@ -6,7 +6,6 @@ import shutil
6
6
  import subprocess
7
7
  import sys
8
8
  from pathlib import Path
9
- from typing import Optional
10
9
 
11
10
  import toml
12
11
  import typer
@@ -15,6 +14,64 @@ import yaml
15
14
  # Import cloud helper lazily inside functions to avoid heavy deps at module load
16
15
 
17
16
 
17
+ def _warn_if_gcp_default_sa(force_prompt: bool = False) -> None:
18
+ """Warn the user when the active gcloud principal is the default VM service
19
+ account. See detailed docstring later in file (duplicate for early
20
+ availability)."""
21
+
22
+ from dayhoff_tools.cli import cloud_commands as _cc
23
+
24
+ try:
25
+ impersonation = _cc._get_current_gcp_impersonation()
26
+ user = _cc._get_current_gcp_user()
27
+ active = impersonation if impersonation != "None" else user
28
+ short = _cc._get_short_name(active)
29
+
30
+ # Determine if user creds are valid
31
+ auth_valid = _cc._is_gcp_user_authenticated()
32
+ except Exception:
33
+ # If any helper errors out, don't block execution
34
+ return
35
+
36
+ problem_type = None # "default_sa" | "stale"
37
+ if short == "default VM service account":
38
+ problem_type = "default_sa"
39
+ elif not auth_valid:
40
+ problem_type = "stale"
41
+
42
+ if problem_type is None:
43
+ return # Everything looks good
44
+
45
+ YELLOW = getattr(_cc, "YELLOW", "\033[0;33m")
46
+ BLUE = getattr(_cc, "BLUE", "\033[0;36m")
47
+ RED = getattr(_cc, "RED", "\033[0;31m")
48
+ NC = getattr(_cc, "NC", "\033[0m")
49
+
50
+ if problem_type == "default_sa":
51
+ msg_body = (
52
+ f"You are currently authenticated as the *default VM service account*.\n"
53
+ f" This will block gsutil/DVC access to private buckets (e.g. warehouse)."
54
+ )
55
+ else: # stale creds
56
+ msg_body = (
57
+ f"Your GCP credentials appear to be *expired/stale*.\n"
58
+ f" Re-authenticate to refresh the access token."
59
+ )
60
+
61
+ print(
62
+ f"{YELLOW}⚠ {msg_body}{NC}\n"
63
+ f"{YELLOW} Run {BLUE}dh gcp login{YELLOW} or {BLUE}dh gcp use-devcon{YELLOW} before retrying.{NC}",
64
+ file=sys.stderr,
65
+ )
66
+
67
+ if force_prompt and sys.stdin.isatty() and sys.stdout.isatty():
68
+ import questionary
69
+
70
+ if not questionary.confirm("Proceed anyway?", default=False).ask():
71
+ print(f"{RED}Aborted due to unsafe GCP credentials.{NC}", file=sys.stderr)
72
+ raise SystemExit(1)
73
+
74
+
18
75
  def test_github_actions_locally():
19
76
  """Run the script test_pytest_in_github_actions_container.sh.sh."""
20
77
  script_path = ".devcontainer/scripts/test_pytest_in_github_actions_container.sh"
@@ -571,63 +628,3 @@ def update_dependencies(
571
628
  # ----------------------
572
629
  # Cloud credential guard
573
630
  # ----------------------
574
-
575
-
576
- def _warn_if_gcp_default_sa(force_prompt: bool = False) -> None:
577
- """Warn the user when the active gcloud principal is the default VM service
578
- account.
579
-
580
- This situation prevents gsutil/DVC from accessing private buckets and is
581
- almost never what we want when interacting with the warehouse. The
582
- function prints a coloured warning and, when *force_prompt* is *True* and
583
- the session is interactive, asks the user whether to continue or abort the
584
- command.
585
-
586
- Parameters
587
- ----------
588
- force_prompt : bool, default False
589
- When *True* and running in an interactive shell (stdin & stdout are
590
- TTYs), display a yes/no prompt (via *questionary.confirm*) asking the
591
- user whether to proceed. If the user declines, ``SystemExit(1)`` is
592
- raised to stop the CLI command immediately. In non-interactive
593
- contexts the function merely prints the warning without prompting.
594
- """
595
-
596
- # Delay heavy import until the function is actually needed
597
- from dayhoff_tools.cli import cloud_commands as _cc # local import to ease testing
598
-
599
- try:
600
- impersonation = _cc._get_current_gcp_impersonation()
601
- user = _cc._get_current_gcp_user()
602
- active = impersonation if impersonation != "None" else user
603
- short_name = _cc._get_short_name(active)
604
- except Exception:
605
- # If gcloud is missing or some other unexpected error occurs, fail
606
- # gracefully by doing nothing.
607
- return
608
-
609
- if short_name != "default VM service account":
610
- # All good – nothing to warn about
611
- return
612
-
613
- # Colour constants – fall back to empty strings if not available
614
- RED = getattr(_cc, "RED", "\033[0;31m")
615
- YELLOW = getattr(_cc, "YELLOW", "\033[0;33m")
616
- BLUE = getattr(_cc, "BLUE", "\033[0;36m")
617
- NC = getattr(_cc, "NC", "\033[0m")
618
-
619
- warning_msg = (
620
- f"{YELLOW}⚠ You are currently authenticated as the *default VM service account*.{NC}\n"
621
- f"{YELLOW} This will block gsutil/DVC access to private buckets (e.g. warehouse).{NC}\n"
622
- f"{YELLOW} Run {BLUE}dh gcp login{YELLOW} or {BLUE}dh gcp use-devcon{YELLOW} before retrying.{NC}"
623
- )
624
- print(warning_msg, file=sys.stderr)
625
-
626
- interactive = sys.stdin.isatty() and sys.stdout.isatty()
627
- if force_prompt and interactive:
628
- import questionary
629
-
630
- proceed = questionary.confirm("Proceed anyway?", default=False).ask()
631
- if not proceed:
632
- print(f"{RED}Aborted due to unsafe GCP credentials.{NC}", file=sys.stderr)
633
- raise SystemExit(1)
@@ -409,15 +409,14 @@ class MMSeqsProfileProcessor(Processor):
409
409
  self._run_mmseqs_command(
410
410
  [
411
411
  "mmseqs",
412
- "result2flat",
413
- str(profile_db), # queryDB used in search
414
- str(target_db), # targetDB
415
- str(result_db), # resultDB
412
+ "createseqfiledb",
413
+ str(result_db), # resultDB containing target hits
414
+ str(target_db), # targetDB containing actual sequences
416
415
  str(intermediate_hits_fasta_file), # output FASTA
417
- "--use-fasta-header",
418
- "1",
416
+ "--db-output",
417
+ "1", # Output in FASTA format
419
418
  ],
420
- "Extract hit sequences to FASTA via result2flat",
419
+ "Extract hit sequences to FASTA via createseqfiledb",
421
420
  run_base_dir,
422
421
  )
423
422
 
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.1.14"
8
+ version = "1.1.16"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
File without changes