dayhoff-tools 1.14.2__tar.gz → 1.14.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/PKG-INFO +1 -1
  2. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/embed_t5.py +64 -26
  3. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/studio_commands.py +6 -3
  4. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/pyproject.toml +1 -1
  5. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/README.md +0 -0
  6. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/__init__.py +0 -0
  7. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/batch/__init__.py +0 -0
  8. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/batch/workers/__init__.py +0 -0
  9. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/batch/workers/base.py +0 -0
  10. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/batch/workers/boltz.py +0 -0
  11. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/batch/workers/embed_t5.py +0 -0
  12. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/chemistry/standardizer.py +0 -0
  13. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/chemistry/utils.py +0 -0
  14. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/__init__.py +0 -0
  15. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/__init__.py +0 -0
  16. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/aws_batch.py +0 -0
  17. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/__init__.py +0 -0
  18. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/boltz.py +0 -0
  19. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/cancel.py +0 -0
  20. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/finalize.py +0 -0
  21. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/list_jobs.py +0 -0
  22. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/local.py +0 -0
  23. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/logs.py +0 -0
  24. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/retry.py +0 -0
  25. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/status.py +0 -0
  26. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/commands/submit.py +0 -0
  27. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/job_id.py +0 -0
  28. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/batch/manifest.py +0 -0
  29. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/cloud_commands.py +0 -0
  30. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engine1/__init__.py +0 -0
  31. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engine1/engine_core.py +0 -0
  32. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engine1/engine_lifecycle.py +0 -0
  33. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engine1/engine_maintenance.py +0 -0
  34. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engine1/engine_management.py +0 -0
  35. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engine1/shared.py +0 -0
  36. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engine1/studio_commands.py +0 -0
  37. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/__init__.py +0 -0
  38. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/api_client.py +0 -0
  39. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/auth.py +0 -0
  40. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/engine-studio-cli.md +0 -0
  41. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/engine_commands.py +0 -0
  42. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/progress.py +0 -0
  43. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +0 -0
  44. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/demo.sh +0 -0
  45. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +0 -0
  46. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +0 -0
  47. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +0 -0
  48. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +0 -0
  49. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +0 -0
  50. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +0 -0
  51. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/engines_studios/ssh_config.py +0 -0
  52. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/github_commands.py +0 -0
  53. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/main.py +0 -0
  54. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/swarm_commands.py +0 -0
  55. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/cli/utility_commands.py +0 -0
  56. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/deployment/base.py +0 -0
  57. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  58. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  59. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  60. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/deployment/job_runner.py +0 -0
  61. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/deployment/processors.py +0 -0
  62. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/deployment/swarm.py +0 -0
  63. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/embedders.py +0 -0
  64. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/fasta.py +0 -0
  65. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/file_ops.py +0 -0
  66. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/h5.py +0 -0
  67. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/intake/gcp.py +0 -0
  68. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/intake/gtdb.py +0 -0
  69. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/intake/kegg.py +0 -0
  70. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/intake/mmseqs.py +0 -0
  71. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/intake/structure.py +0 -0
  72. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/intake/uniprot.py +0 -0
  73. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/logs.py +0 -0
  74. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/sqlite.py +0 -0
  75. {dayhoff_tools-1.14.2 → dayhoff_tools-1.14.4}/dayhoff_tools/warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dayhoff-tools
3
- Version: 1.14.2
3
+ Version: 1.14.4
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -1,6 +1,8 @@
1
1
  """T5 embedding pipeline command."""
2
2
 
3
3
  import os
4
+ import shutil
5
+ import subprocess
4
6
  from pathlib import Path
5
7
 
6
8
  import click
@@ -232,38 +234,76 @@ def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk
232
234
 
233
235
 
234
236
  def _run_local_mode(input_path: Path):
235
- """Run embedding locally for a single chunk."""
236
- import subprocess
237
+ """Run embedding locally in a Docker container.
237
238
 
238
- click.echo("Running T5 embedding locally...")
239
+ This runs the embed container with the normal entrypoint, processing
240
+ a single chunk (index 0) for testing purposes.
241
+ """
242
+ click.echo("Running T5 embedding locally in container...")
239
243
  click.echo(f"Input: {input_path}")
240
244
 
241
- # Check if we have the embedder available
242
- try:
243
- from dayhoff_tools.embedders import T5Embedder
245
+ input_dir = input_path.parent
244
246
 
245
- output_file = input_path.with_suffix(".h5")
246
- click.echo(f"Output: {output_file}")
247
- click.echo()
247
+ # Create a temporary job directory structure in the input directory
248
+ # The worker expects JOB_DIR/input/chunk_000.fasta format
249
+ temp_job_dir = input_dir / ".local_embed_job"
250
+ temp_input_dir = temp_job_dir / "input"
251
+ temp_output_dir = temp_job_dir / "output"
248
252
 
249
- embedder = T5Embedder(
250
- max_seq_length=4500,
251
- large_protein_threshold=2500,
252
- batch_residue_limit=4500,
253
- )
254
- embedder.run(str(input_path), str(output_file))
253
+ # Clean up any previous run
254
+ if temp_job_dir.exists():
255
+ shutil.rmtree(temp_job_dir)
255
256
 
256
- click.echo()
257
- click.echo(click.style("✓ Embedding complete!", fg="green"))
258
- click.echo(f"Output: {output_file}")
257
+ temp_input_dir.mkdir(parents=True)
258
+ temp_output_dir.mkdir(parents=True)
259
+
260
+ # Symlink or copy the input file as chunk_000.fasta
261
+ chunk_path = temp_input_dir / "chunk_000.fasta"
262
+ chunk_path.symlink_to(input_path.resolve())
263
+
264
+ click.echo(f"Output will be at: {temp_output_dir}/embed_000.h5")
265
+ click.echo()
266
+
267
+ cmd = [
268
+ "docker", "run", "--rm",
269
+ "--gpus", "all",
270
+ "-v", "/primordial:/primordial",
271
+ "-v", f"{temp_job_dir}:{temp_job_dir}",
272
+ "-e", f"JOB_DIR={temp_job_dir}",
273
+ "-e", "AWS_BATCH_JOB_ARRAY_INDEX=0",
274
+ DEFAULT_IMAGE_URI,
275
+ ]
276
+
277
+ click.echo(f"Running: {' '.join(cmd)}")
278
+ click.echo()
279
+
280
+ try:
281
+ result = subprocess.run(cmd)
282
+ if result.returncode != 0:
283
+ click.echo(
284
+ click.style(f"Container exited with code {result.returncode}", fg="red"),
285
+ err=True,
286
+ )
287
+ raise SystemExit(result.returncode)
288
+
289
+ # Check for output
290
+ output_file = temp_output_dir / "embed_000.h5"
291
+ if output_file.exists():
292
+ # Move output to final location
293
+ final_output = input_path.with_suffix(".h5")
294
+ shutil.move(str(output_file), str(final_output))
295
+ click.echo()
296
+ click.echo(click.style("✓ Embedding complete!", fg="green"))
297
+ click.echo(f"Output: {final_output}")
298
+ else:
299
+ click.echo(click.style("Warning: No output file found", fg="yellow"))
300
+
301
+ # Clean up temp directory
302
+ shutil.rmtree(temp_job_dir, ignore_errors=True)
259
303
 
260
- except ImportError:
304
+ except FileNotFoundError:
261
305
  click.echo(
262
- click.style(
263
- "Error: T5Embedder requires 'embedders' extra. "
264
- "Install with: pip install 'dayhoff-tools[embedders]'",
265
- fg="red",
266
- ),
306
+ click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
267
307
  err=True,
268
308
  )
269
309
  raise SystemExit(1)
@@ -271,8 +311,6 @@ def _run_local_mode(input_path: Path):
271
311
 
272
312
  def _run_shell_mode(input_path: Path):
273
313
  """Drop into container shell for debugging."""
274
- import subprocess
275
-
276
314
  click.echo("Dropping into container shell...")
277
315
  click.echo(f"Input will be available at: /input/{input_path.name}")
278
316
  click.echo()
@@ -434,7 +434,10 @@ def list_studios(env: Optional[str]):
434
434
  help="Environment (dev, sand, prod) - auto-detected if not specified",
435
435
  )
436
436
  def attach_studio(
437
- engine_name_or_id: str, skip_ssh_config: bool, user: Optional[str], env: Optional[str]
437
+ engine_name_or_id: str,
438
+ skip_ssh_config: bool,
439
+ user: Optional[str],
440
+ env: Optional[str],
438
441
  ):
439
442
  """Attach your studio to an engine with progress tracking."""
440
443
 
@@ -517,7 +520,7 @@ def attach_studio(
517
520
  click.echo(f"\nConnect with:")
518
521
  click.echo(f" ssh {engine_name}")
519
522
 
520
- except Exception as e:
523
+ except Exception:
521
524
  # Get final status to show error details
522
525
  try:
523
526
  final_status = client.get_attachment_progress(operation_id)
@@ -540,7 +543,7 @@ def attach_studio(
540
543
  click.echo(f"Failed at step: {failed_step['name']}")
541
544
  if failed_step.get("error"):
542
545
  click.echo(f"Error: {failed_step['error']}")
543
- except:
546
+ except Exception:
544
547
  pass
545
548
 
546
549
  raise
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.14.2"
8
+ version = "1.14.4"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
File without changes