dayhoff-tools 1.14.3__py3-none-any.whl → 1.14.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  """T5 embedding pipeline command."""
2
2
 
3
3
  import os
4
+ import shutil
5
+ import subprocess
4
6
  from pathlib import Path
5
7
 
6
8
  import click
@@ -232,38 +234,76 @@ def _submit_batch_job(input_path: Path, workers: int, queue: str, seqs_per_chunk
232
234
 
233
235
 
234
236
  def _run_local_mode(input_path: Path):
235
- """Run embedding locally for a single chunk."""
236
- import subprocess
237
+ """Run embedding locally in a Docker container.
237
238
 
238
- click.echo("Running T5 embedding locally...")
239
+ This runs the embed container with the normal entrypoint, processing
240
+ a single chunk (index 0) for testing purposes.
241
+ """
242
+ click.echo("Running T5 embedding locally in container...")
239
243
  click.echo(f"Input: {input_path}")
240
244
 
241
- # Check if we have the embedder available
242
- try:
243
- from dayhoff_tools.embedders import T5Embedder
245
+ input_dir = input_path.parent
244
246
 
245
- output_file = input_path.with_suffix(".h5")
246
- click.echo(f"Output: {output_file}")
247
- click.echo()
247
+ # Create a temporary job directory structure in the input directory
248
+ # The worker expects JOB_DIR/input/chunk_000.fasta format
249
+ temp_job_dir = input_dir / ".local_embed_job"
250
+ temp_input_dir = temp_job_dir / "input"
251
+ temp_output_dir = temp_job_dir / "output"
248
252
 
249
- embedder = T5Embedder(
250
- max_seq_length=4500,
251
- large_protein_threshold=2500,
252
- batch_residue_limit=4500,
253
- )
254
- embedder.run(str(input_path), str(output_file))
253
+ # Clean up any previous run
254
+ if temp_job_dir.exists():
255
+ shutil.rmtree(temp_job_dir)
255
256
 
256
- click.echo()
257
- click.echo(click.style("✓ Embedding complete!", fg="green"))
258
- click.echo(f"Output: {output_file}")
257
+ temp_input_dir.mkdir(parents=True)
258
+ temp_output_dir.mkdir(parents=True)
259
+
260
+ # Symlink or copy the input file as chunk_000.fasta
261
+ chunk_path = temp_input_dir / "chunk_000.fasta"
262
+ chunk_path.symlink_to(input_path.resolve())
263
+
264
+ click.echo(f"Output will be at: {temp_output_dir}/embed_000.h5")
265
+ click.echo()
266
+
267
+ cmd = [
268
+ "docker", "run", "--rm",
269
+ "--gpus", "all",
270
+ "-v", "/primordial:/primordial",
271
+ "-v", f"{temp_job_dir}:{temp_job_dir}",
272
+ "-e", f"JOB_DIR={temp_job_dir}",
273
+ "-e", "AWS_BATCH_JOB_ARRAY_INDEX=0",
274
+ DEFAULT_IMAGE_URI,
275
+ ]
276
+
277
+ click.echo(f"Running: {' '.join(cmd)}")
278
+ click.echo()
279
+
280
+ try:
281
+ result = subprocess.run(cmd)
282
+ if result.returncode != 0:
283
+ click.echo(
284
+ click.style(f"Container exited with code {result.returncode}", fg="red"),
285
+ err=True,
286
+ )
287
+ raise SystemExit(result.returncode)
288
+
289
+ # Check for output
290
+ output_file = temp_output_dir / "embed_000.h5"
291
+ if output_file.exists():
292
+ # Move output to final location
293
+ final_output = input_path.with_suffix(".h5")
294
+ shutil.move(str(output_file), str(final_output))
295
+ click.echo()
296
+ click.echo(click.style("✓ Embedding complete!", fg="green"))
297
+ click.echo(f"Output: {final_output}")
298
+ else:
299
+ click.echo(click.style("Warning: No output file found", fg="yellow"))
300
+
301
+ # Clean up temp directory
302
+ shutil.rmtree(temp_job_dir, ignore_errors=True)
259
303
 
260
- except ImportError:
304
+ except FileNotFoundError:
261
305
  click.echo(
262
- click.style(
263
- "Error: T5Embedder requires 'embedders' extra. "
264
- "Install with: pip install 'dayhoff-tools[embedders]'",
265
- fg="red",
266
- ),
306
+ click.style("Error: Docker not found. Is Docker installed and running?", fg="red"),
267
307
  err=True,
268
308
  )
269
309
  raise SystemExit(1)
@@ -271,8 +311,6 @@ def _run_local_mode(input_path: Path):
271
311
 
272
312
  def _run_shell_mode(input_path: Path):
273
313
  """Drop into container shell for debugging."""
274
- import subprocess
275
-
276
314
  click.echo("Dropping into container shell...")
277
315
  click.echo(f"Input will be available at: /input/{input_path.name}")
278
316
  click.echo()
@@ -434,7 +434,10 @@ def list_studios(env: Optional[str]):
434
434
  help="Environment (dev, sand, prod) - auto-detected if not specified",
435
435
  )
436
436
  def attach_studio(
437
- engine_name_or_id: str, skip_ssh_config: bool, user: Optional[str], env: Optional[str]
437
+ engine_name_or_id: str,
438
+ skip_ssh_config: bool,
439
+ user: Optional[str],
440
+ env: Optional[str],
438
441
  ):
439
442
  """Attach your studio to an engine with progress tracking."""
440
443
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dayhoff-tools
3
- Version: 1.14.3
3
+ Version: 1.14.4
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -12,7 +12,7 @@ dayhoff_tools/cli/batch/aws_batch.py,sha256=L6lNpUaxyhMvLcM3dF8yEi2yCgTNdviaNMmG
12
12
  dayhoff_tools/cli/batch/commands/__init__.py,sha256=1xRzzL_mc1hz1Pv0OWNr-g6fkL5XbEsOTGHzrqddLCA,458
13
13
  dayhoff_tools/cli/batch/commands/boltz.py,sha256=M7UNebaV7EAoHY_Fhml7JGnTZjM5o1nkYL22pSmqnWA,10422
14
14
  dayhoff_tools/cli/batch/commands/cancel.py,sha256=5FHLRoq8nvOr8HuKxXUZqAyAdjQP8seaNSaeAdhOoE8,2890
15
- dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=D56Hyamzm-peVEiA3tDzotfpHd2bVsKfMprpoj_ADR8,9627
15
+ dayhoff_tools/cli/batch/commands/embed_t5.py,sha256=UtF8ulJcz15sdd1QzvPVPeioN_AMks1DZ7c6vKtwhDE,11052
16
16
  dayhoff_tools/cli/batch/commands/finalize.py,sha256=2kQTtHxdW52s0lrLUlc7tx7Ab092sxs8uSc9-f2utUg,7198
17
17
  dayhoff_tools/cli/batch/commands/list_jobs.py,sha256=y90_XqFI2qcvV2XUC01P3P2aIdtl5hXP3cBDEofsRtU,2432
18
18
  dayhoff_tools/cli/batch/commands/local.py,sha256=vOTojTAp4YZXIMLKNqc76oIE0oWMjB1mtTe6r19LrEc,3075
@@ -45,7 +45,7 @@ dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py,sha256=HA08pIMJW
45
45
  dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py,sha256=ntizeR0BJLdJOwCRBKPajc2xT-BL7SNnONxfgxXDgr8,11609
46
46
  dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py,sha256=6WvpnRawJVaQf_H81zuR1_66igRRVxPxjAt8e69xjp4,5394
47
47
  dayhoff_tools/cli/engines_studios/ssh_config.py,sha256=UCv-jf_zSuk7FUStkCQBAJz1QkxiSEwZbdSrwt_9SMU,2932
48
- dayhoff_tools/cli/engines_studios/studio_commands.py,sha256=kOoSfbhufNpNrqEsWu_Ohq1tu8XQajvJZnIcQgiyUTY,25971
48
+ dayhoff_tools/cli/engines_studios/studio_commands.py,sha256=KGSNZQS8MmM_DfQzT9SRZvuR3OK6NdIdOrqI2wJFyes,25984
49
49
  dayhoff_tools/cli/github_commands.py,sha256=pfrxI68LObGm_gtPlQN-gHPahHV4l9k9T4GqO99NNL0,8948
50
50
  dayhoff_tools/cli/main.py,sha256=6W-fdRfjkPGx4fnfAhmEKyFl6Ou1VOxy4IYFpcFWkrA,8489
51
51
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
@@ -70,7 +70,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
70
70
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
71
71
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
72
72
  dayhoff_tools/warehouse.py,sha256=UETBtZD3r7WgvURqfGbyHlT7cxoiVq8isjzMuerKw8I,24475
73
- dayhoff_tools-1.14.3.dist-info/METADATA,sha256=PflDa-XxwtgN4RplNJvPemoK9-muyPlmpWaEW5sqvhk,3184
74
- dayhoff_tools-1.14.3.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
75
- dayhoff_tools-1.14.3.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
76
- dayhoff_tools-1.14.3.dist-info/RECORD,,
73
+ dayhoff_tools-1.14.4.dist-info/METADATA,sha256=DO3OUdkIlcX9vFR1LbE6BUB7uO-Qsmhi2EjhzA3LkXE,3184
74
+ dayhoff_tools-1.14.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
75
+ dayhoff_tools-1.14.4.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
76
+ dayhoff_tools-1.14.4.dist-info/RECORD,,