boltz-vsynthes 1.0.37__py3-none-any.whl → 1.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/main.py +117 -98
- {boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/METADATA +1 -1
- {boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/RECORD +7 -7
- {boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/WHEEL +0 -0
- {boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/entry_points.txt +0 -0
- {boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/licenses/LICENSE +0 -0
- {boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/top_level.txt +0 -0
boltz/main.py
CHANGED
@@ -272,7 +272,7 @@ def get_cache_path() -> str:
|
|
272
272
|
|
273
273
|
|
274
274
|
def check_inputs(data: Path) -> list[Path]:
|
275
|
-
"""Check the input data
|
275
|
+
"""Check the input data.
|
276
276
|
|
277
277
|
Parameters
|
278
278
|
----------
|
@@ -282,18 +282,21 @@ def check_inputs(data: Path) -> list[Path]:
|
|
282
282
|
Returns
|
283
283
|
-------
|
284
284
|
list[Path]
|
285
|
-
The list of input
|
285
|
+
The list of input files.
|
286
286
|
|
287
287
|
"""
|
288
|
-
click.echo("Checking input data.")
|
289
|
-
|
290
288
|
# Check if data is a directory
|
291
289
|
if data.is_dir():
|
292
290
|
data: list[Path] = list(data.glob("*"))
|
293
291
|
|
294
292
|
# Filter out non .fasta or .yaml files, raise
|
295
293
|
# an error on directory and other file types
|
294
|
+
filtered_data = []
|
296
295
|
for d in data:
|
296
|
+
# Skip hidden files and directories
|
297
|
+
if d.name.startswith('.') or any(part.startswith('.') for part in d.parts):
|
298
|
+
continue
|
299
|
+
|
297
300
|
if d.is_dir():
|
298
301
|
msg = f"Found directory {d} instead of .fasta or .yaml."
|
299
302
|
raise RuntimeError(msg)
|
@@ -303,6 +306,8 @@ def check_inputs(data: Path) -> list[Path]:
|
|
303
306
|
"please provide a .fasta or .yaml file."
|
304
307
|
)
|
305
308
|
raise RuntimeError(msg)
|
309
|
+
filtered_data.append(d)
|
310
|
+
data = filtered_data
|
306
311
|
else:
|
307
312
|
data = [data]
|
308
313
|
|
@@ -493,13 +498,25 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
|
|
493
498
|
) -> None:
|
494
499
|
try:
|
495
500
|
# Parse data
|
496
|
-
if path.
|
501
|
+
if path.is_dir():
|
502
|
+
# Process all YAML and FASTA files in the directory
|
503
|
+
targets = []
|
504
|
+
for file_path in path.glob("*"):
|
505
|
+
if file_path.suffix in (".fa", ".fas", ".fasta"):
|
506
|
+
target = parse_fasta(file_path, ccd, mol_dir, boltz2)
|
507
|
+
targets.append(target)
|
508
|
+
elif file_path.suffix in (".yml", ".yaml"):
|
509
|
+
target = parse_yaml(file_path, ccd, mol_dir, boltz2)
|
510
|
+
if not isinstance(target, list):
|
511
|
+
target = [target]
|
512
|
+
targets.extend(target)
|
513
|
+
elif path.suffix in (".fa", ".fas", ".fasta"):
|
497
514
|
target = parse_fasta(path, ccd, mol_dir, boltz2)
|
515
|
+
targets = [target]
|
498
516
|
elif path.suffix in (".yml", ".yaml"):
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
raise RuntimeError(msg) # noqa: TRY301
|
517
|
+
targets = parse_yaml(path, ccd, mol_dir, boltz2)
|
518
|
+
if not isinstance(targets, list):
|
519
|
+
targets = [targets]
|
503
520
|
else:
|
504
521
|
msg = (
|
505
522
|
f"Unable to parse filetype {path.suffix}, "
|
@@ -507,96 +524,98 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
|
|
507
524
|
)
|
508
525
|
raise RuntimeError(msg) # noqa: TRY301
|
509
526
|
|
510
|
-
#
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
msa_id
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
527
|
+
# Process each target
|
528
|
+
for target in targets:
|
529
|
+
# Get target id
|
530
|
+
target_id = target.record.id
|
531
|
+
|
532
|
+
# Get all MSA ids and decide whether to generate MSA
|
533
|
+
to_generate = {}
|
534
|
+
prot_id = const.chain_type_ids["PROTEIN"]
|
535
|
+
for chain in target.record.chains:
|
536
|
+
# Add to generate list, assigning entity id
|
537
|
+
if (chain.mol_type == prot_id) and (chain.msa_id == 0):
|
538
|
+
entity_id = chain.entity_id
|
539
|
+
msa_id = f"{target_id}_{entity_id}"
|
540
|
+
to_generate[msa_id] = target.sequences[entity_id]
|
541
|
+
chain.msa_id = msa_dir / f"{msa_id}.csv"
|
542
|
+
|
543
|
+
# We do not support msa generation for non-protein chains
|
544
|
+
elif chain.msa_id == 0:
|
545
|
+
chain.msa_id = -1
|
546
|
+
|
547
|
+
# Generate MSA
|
548
|
+
if to_generate and not use_msa_server:
|
549
|
+
msg = "Missing MSA's in input and --use_msa_server flag not set."
|
550
|
+
raise RuntimeError(msg) # noqa: TRY301
|
551
|
+
|
552
|
+
if to_generate:
|
553
|
+
msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
|
554
|
+
click.echo(msg)
|
555
|
+
compute_msa(
|
556
|
+
data=to_generate,
|
557
|
+
target_id=target_id,
|
558
|
+
msa_dir=msa_dir,
|
559
|
+
msa_server_url=msa_server_url,
|
560
|
+
msa_pairing_strategy=msa_pairing_strategy,
|
561
|
+
)
|
543
562
|
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
563
|
+
# Parse MSA data
|
564
|
+
msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
|
565
|
+
msa_id_map = {}
|
566
|
+
for msa_idx, msa_id in enumerate(msas):
|
567
|
+
# Check that raw MSA exists
|
568
|
+
msa_path = Path(msa_id)
|
569
|
+
if not msa_path.exists():
|
570
|
+
msg = f"MSA file {msa_path} not found."
|
571
|
+
raise FileNotFoundError(msg) # noqa: TRY301
|
572
|
+
|
573
|
+
# Dump processed MSA
|
574
|
+
processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
|
575
|
+
msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
|
576
|
+
if not processed.exists():
|
577
|
+
# Parse A3M
|
578
|
+
if msa_path.suffix == ".a3m":
|
579
|
+
msa: MSA = parse_a3m(
|
580
|
+
msa_path,
|
581
|
+
taxonomy=None,
|
582
|
+
max_seqs=max_msa_seqs,
|
583
|
+
)
|
584
|
+
elif msa_path.suffix == ".csv":
|
585
|
+
msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
|
586
|
+
else:
|
587
|
+
msg = f"MSA file {msa_path} not supported, only a3m or csv."
|
588
|
+
raise RuntimeError(msg) # noqa: TRY301
|
589
|
+
|
590
|
+
msa.dump(processed)
|
591
|
+
|
592
|
+
# Modify records to point to processed MSA
|
593
|
+
for c in target.record.chains:
|
594
|
+
if (c.msa_id != -1) and (c.msa_id in msa_id_map):
|
595
|
+
c.msa_id = msa_id_map[c.msa_id]
|
596
|
+
|
597
|
+
# Dump templates
|
598
|
+
for template_id, template in target.templates.items():
|
599
|
+
name = f"{target.record.id}_{template_id}.npz"
|
600
|
+
template_path = processed_templates_dir / name
|
601
|
+
template.dump(template_path)
|
602
|
+
|
603
|
+
# Dump constraints
|
604
|
+
constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
|
605
|
+
target.residue_constraints.dump(constraints_path)
|
606
|
+
|
607
|
+
# Dump extra molecules
|
608
|
+
Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
|
609
|
+
with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
|
610
|
+
pickle.dump(target.extra_mols, f)
|
611
|
+
|
612
|
+
# Dump structure
|
613
|
+
struct_path = structure_dir / f"{target.record.id}.npz"
|
614
|
+
target.structure.dump(struct_path)
|
615
|
+
|
616
|
+
# Dump record
|
617
|
+
record_path = records_dir / f"{target.record.id}.json"
|
618
|
+
target.record.dump(record_path)
|
600
619
|
|
601
620
|
except Exception as e: # noqa: BLE001
|
602
621
|
import traceback
|
@@ -1,5 +1,5 @@
|
|
1
1
|
boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
|
2
|
-
boltz/main.py,sha256=
|
2
|
+
boltz/main.py,sha256=SHM-t-9wjwjTJmWR4N5SrAHxk2vgz7fTruz5shiixVc,40882
|
3
3
|
boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
|
5
5
|
boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
|
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
|
|
107
107
|
boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
108
108
|
boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
|
109
109
|
boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
|
110
|
-
boltz_vsynthes-1.0.
|
111
|
-
boltz_vsynthes-1.0.
|
112
|
-
boltz_vsynthes-1.0.
|
113
|
-
boltz_vsynthes-1.0.
|
114
|
-
boltz_vsynthes-1.0.
|
115
|
-
boltz_vsynthes-1.0.
|
110
|
+
boltz_vsynthes-1.0.39.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
|
111
|
+
boltz_vsynthes-1.0.39.dist-info/METADATA,sha256=s4YTm6DLYVooEmxNB0gz1_0aFspJeAqR5KWVZDyTEv4,7171
|
112
|
+
boltz_vsynthes-1.0.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
113
|
+
boltz_vsynthes-1.0.39.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
|
114
|
+
boltz_vsynthes-1.0.39.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
|
115
|
+
boltz_vsynthes-1.0.39.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|