boltz-vsynthes 1.0.37__py3-none-any.whl → 1.0.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
boltz/main.py CHANGED
@@ -272,7 +272,7 @@ def get_cache_path() -> str:
272
272
 
273
273
 
274
274
  def check_inputs(data: Path) -> list[Path]:
275
- """Check the input data and output directory.
275
+ """Check the input data.
276
276
 
277
277
  Parameters
278
278
  ----------
@@ -282,18 +282,21 @@ def check_inputs(data: Path) -> list[Path]:
282
282
  Returns
283
283
  -------
284
284
  list[Path]
285
- The list of input data.
285
+ The list of input files.
286
286
 
287
287
  """
288
- click.echo("Checking input data.")
289
-
290
288
  # Check if data is a directory
291
289
  if data.is_dir():
292
290
  data: list[Path] = list(data.glob("*"))
293
291
 
294
292
  # Filter out non .fasta or .yaml files, raise
295
293
  # an error on directory and other file types
294
+ filtered_data = []
296
295
  for d in data:
296
+ # Skip hidden files and directories
297
+ if d.name.startswith('.') or any(part.startswith('.') for part in d.parts):
298
+ continue
299
+
297
300
  if d.is_dir():
298
301
  msg = f"Found directory {d} instead of .fasta or .yaml."
299
302
  raise RuntimeError(msg)
@@ -303,6 +306,8 @@ def check_inputs(data: Path) -> list[Path]:
303
306
  "please provide a .fasta or .yaml file."
304
307
  )
305
308
  raise RuntimeError(msg)
309
+ filtered_data.append(d)
310
+ data = filtered_data
306
311
  else:
307
312
  data = [data]
308
313
 
@@ -493,13 +498,25 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
493
498
  ) -> None:
494
499
  try:
495
500
  # Parse data
496
- if path.suffix in (".fa", ".fas", ".fasta"):
501
+ if path.is_dir():
502
+ # Process all YAML and FASTA files in the directory
503
+ targets = []
504
+ for file_path in path.glob("*"):
505
+ if file_path.suffix in (".fa", ".fas", ".fasta"):
506
+ target = parse_fasta(file_path, ccd, mol_dir, boltz2)
507
+ targets.append(target)
508
+ elif file_path.suffix in (".yml", ".yaml"):
509
+ target = parse_yaml(file_path, ccd, mol_dir, boltz2)
510
+ if not isinstance(target, list):
511
+ target = [target]
512
+ targets.extend(target)
513
+ elif path.suffix in (".fa", ".fas", ".fasta"):
497
514
  target = parse_fasta(path, ccd, mol_dir, boltz2)
515
+ targets = [target]
498
516
  elif path.suffix in (".yml", ".yaml"):
499
- target = parse_yaml(path, ccd, mol_dir, boltz2)
500
- elif path.is_dir():
501
- msg = f"Found directory {path} instead of .fasta or .yaml, skipping."
502
- raise RuntimeError(msg) # noqa: TRY301
517
+ targets = parse_yaml(path, ccd, mol_dir, boltz2)
518
+ if not isinstance(targets, list):
519
+ targets = [targets]
503
520
  else:
504
521
  msg = (
505
522
  f"Unable to parse filetype {path.suffix}, "
@@ -507,96 +524,98 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
507
524
  )
508
525
  raise RuntimeError(msg) # noqa: TRY301
509
526
 
510
- # Get target id
511
- target_id = target.record.id
512
-
513
- # Get all MSA ids and decide whether to generate MSA
514
- to_generate = {}
515
- prot_id = const.chain_type_ids["PROTEIN"]
516
- for chain in target.record.chains:
517
- # Add to generate list, assigning entity id
518
- if (chain.mol_type == prot_id) and (chain.msa_id == 0):
519
- entity_id = chain.entity_id
520
- msa_id = f"{target_id}_{entity_id}"
521
- to_generate[msa_id] = target.sequences[entity_id]
522
- chain.msa_id = msa_dir / f"{msa_id}.csv"
523
-
524
- # We do not support msa generation for non-protein chains
525
- elif chain.msa_id == 0:
526
- chain.msa_id = -1
527
-
528
- # Generate MSA
529
- if to_generate and not use_msa_server:
530
- msg = "Missing MSA's in input and --use_msa_server flag not set."
531
- raise RuntimeError(msg) # noqa: TRY301
532
-
533
- if to_generate:
534
- msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
535
- click.echo(msg)
536
- compute_msa(
537
- data=to_generate,
538
- target_id=target_id,
539
- msa_dir=msa_dir,
540
- msa_server_url=msa_server_url,
541
- msa_pairing_strategy=msa_pairing_strategy,
542
- )
527
+ # Process each target
528
+ for target in targets:
529
+ # Get target id
530
+ target_id = target.record.id
531
+
532
+ # Get all MSA ids and decide whether to generate MSA
533
+ to_generate = {}
534
+ prot_id = const.chain_type_ids["PROTEIN"]
535
+ for chain in target.record.chains:
536
+ # Add to generate list, assigning entity id
537
+ if (chain.mol_type == prot_id) and (chain.msa_id == 0):
538
+ entity_id = chain.entity_id
539
+ msa_id = f"{target_id}_{entity_id}"
540
+ to_generate[msa_id] = target.sequences[entity_id]
541
+ chain.msa_id = msa_dir / f"{msa_id}.csv"
542
+
543
+ # We do not support msa generation for non-protein chains
544
+ elif chain.msa_id == 0:
545
+ chain.msa_id = -1
546
+
547
+ # Generate MSA
548
+ if to_generate and not use_msa_server:
549
+ msg = "Missing MSA's in input and --use_msa_server flag not set."
550
+ raise RuntimeError(msg) # noqa: TRY301
551
+
552
+ if to_generate:
553
+ msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
554
+ click.echo(msg)
555
+ compute_msa(
556
+ data=to_generate,
557
+ target_id=target_id,
558
+ msa_dir=msa_dir,
559
+ msa_server_url=msa_server_url,
560
+ msa_pairing_strategy=msa_pairing_strategy,
561
+ )
543
562
 
544
- # Parse MSA data
545
- msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
546
- msa_id_map = {}
547
- for msa_idx, msa_id in enumerate(msas):
548
- # Check that raw MSA exists
549
- msa_path = Path(msa_id)
550
- if not msa_path.exists():
551
- msg = f"MSA file {msa_path} not found."
552
- raise FileNotFoundError(msg) # noqa: TRY301
553
-
554
- # Dump processed MSA
555
- processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
556
- msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
557
- if not processed.exists():
558
- # Parse A3M
559
- if msa_path.suffix == ".a3m":
560
- msa: MSA = parse_a3m(
561
- msa_path,
562
- taxonomy=None,
563
- max_seqs=max_msa_seqs,
564
- )
565
- elif msa_path.suffix == ".csv":
566
- msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
567
- else:
568
- msg = f"MSA file {msa_path} not supported, only a3m or csv."
569
- raise RuntimeError(msg) # noqa: TRY301
570
-
571
- msa.dump(processed)
572
-
573
- # Modify records to point to processed MSA
574
- for c in target.record.chains:
575
- if (c.msa_id != -1) and (c.msa_id in msa_id_map):
576
- c.msa_id = msa_id_map[c.msa_id]
577
-
578
- # Dump templates
579
- for template_id, template in target.templates.items():
580
- name = f"{target.record.id}_{template_id}.npz"
581
- template_path = processed_templates_dir / name
582
- template.dump(template_path)
583
-
584
- # Dump constraints
585
- constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
586
- target.residue_constraints.dump(constraints_path)
587
-
588
- # Dump extra molecules
589
- Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
590
- with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
591
- pickle.dump(target.extra_mols, f)
592
-
593
- # Dump structure
594
- struct_path = structure_dir / f"{target.record.id}.npz"
595
- target.structure.dump(struct_path)
596
-
597
- # Dump record
598
- record_path = records_dir / f"{target.record.id}.json"
599
- target.record.dump(record_path)
563
+ # Parse MSA data
564
+ msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
565
+ msa_id_map = {}
566
+ for msa_idx, msa_id in enumerate(msas):
567
+ # Check that raw MSA exists
568
+ msa_path = Path(msa_id)
569
+ if not msa_path.exists():
570
+ msg = f"MSA file {msa_path} not found."
571
+ raise FileNotFoundError(msg) # noqa: TRY301
572
+
573
+ # Dump processed MSA
574
+ processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
575
+ msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
576
+ if not processed.exists():
577
+ # Parse A3M
578
+ if msa_path.suffix == ".a3m":
579
+ msa: MSA = parse_a3m(
580
+ msa_path,
581
+ taxonomy=None,
582
+ max_seqs=max_msa_seqs,
583
+ )
584
+ elif msa_path.suffix == ".csv":
585
+ msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
586
+ else:
587
+ msg = f"MSA file {msa_path} not supported, only a3m or csv."
588
+ raise RuntimeError(msg) # noqa: TRY301
589
+
590
+ msa.dump(processed)
591
+
592
+ # Modify records to point to processed MSA
593
+ for c in target.record.chains:
594
+ if (c.msa_id != -1) and (c.msa_id in msa_id_map):
595
+ c.msa_id = msa_id_map[c.msa_id]
596
+
597
+ # Dump templates
598
+ for template_id, template in target.templates.items():
599
+ name = f"{target.record.id}_{template_id}.npz"
600
+ template_path = processed_templates_dir / name
601
+ template.dump(template_path)
602
+
603
+ # Dump constraints
604
+ constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
605
+ target.residue_constraints.dump(constraints_path)
606
+
607
+ # Dump extra molecules
608
+ Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
609
+ with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
610
+ pickle.dump(target.extra_mols, f)
611
+
612
+ # Dump structure
613
+ struct_path = structure_dir / f"{target.record.id}.npz"
614
+ target.structure.dump(struct_path)
615
+
616
+ # Dump record
617
+ record_path = records_dir / f"{target.record.id}.json"
618
+ target.record.dump(record_path)
600
619
 
601
620
  except Exception as e: # noqa: BLE001
602
621
  import traceback
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 1.0.37
3
+ Version: 1.0.39
4
4
  Summary: Boltz for VSYNTHES
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
@@ -1,5 +1,5 @@
1
1
  boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
2
- boltz/main.py,sha256=AMYdcqTLOL5Mbo8P2ix1KeNwTijH5fWNzKUnLHBNtn0,39735
2
+ boltz/main.py,sha256=SHM-t-9wjwjTJmWR4N5SrAHxk2vgz7fTruz5shiixVc,40882
3
3
  boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
5
5
  boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
107
107
  boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
109
109
  boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
110
- boltz_vsynthes-1.0.37.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
111
- boltz_vsynthes-1.0.37.dist-info/METADATA,sha256=WhICTabdnBWjYVatPqvnCo9EutL801DxXi4tT5LcP2A,7171
112
- boltz_vsynthes-1.0.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
113
- boltz_vsynthes-1.0.37.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
114
- boltz_vsynthes-1.0.37.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
115
- boltz_vsynthes-1.0.37.dist-info/RECORD,,
110
+ boltz_vsynthes-1.0.39.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
111
+ boltz_vsynthes-1.0.39.dist-info/METADATA,sha256=s4YTm6DLYVooEmxNB0gz1_0aFspJeAqR5KWVZDyTEv4,7171
112
+ boltz_vsynthes-1.0.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
113
+ boltz_vsynthes-1.0.39.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
114
+ boltz_vsynthes-1.0.39.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
115
+ boltz_vsynthes-1.0.39.dist-info/RECORD,,