rc-foundry 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- foundry/inference_engines/checkpoint_registry.py +58 -11
- foundry/utils/alignment.py +10 -2
- foundry/version.py +2 -2
- foundry_cli/download_checkpoints.py +66 -66
- {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/METADATA +25 -20
- rc_foundry-0.1.7.dist-info/RECORD +311 -0
- rf3/configs/callbacks/default.yaml +5 -0
- rf3/configs/callbacks/dump_validation_structures.yaml +6 -0
- rf3/configs/callbacks/metrics_logging.yaml +10 -0
- rf3/configs/callbacks/train_logging.yaml +16 -0
- rf3/configs/dataloader/default.yaml +15 -0
- rf3/configs/datasets/base.yaml +31 -0
- rf3/configs/datasets/pdb_and_distillation.yaml +58 -0
- rf3/configs/datasets/pdb_only.yaml +17 -0
- rf3/configs/datasets/train/disorder_distillation.yaml +48 -0
- rf3/configs/datasets/train/domain_distillation.yaml +50 -0
- rf3/configs/datasets/train/monomer_distillation.yaml +49 -0
- rf3/configs/datasets/train/na_complex_distillation.yaml +50 -0
- rf3/configs/datasets/train/pdb/af3_weighted_sampling.yaml +8 -0
- rf3/configs/datasets/train/pdb/base.yaml +32 -0
- rf3/configs/datasets/train/pdb/plinder.yaml +54 -0
- rf3/configs/datasets/train/pdb/train_interface.yaml +51 -0
- rf3/configs/datasets/train/pdb/train_pn_unit.yaml +46 -0
- rf3/configs/datasets/train/rna_monomer_distillation.yaml +56 -0
- rf3/configs/datasets/val/af3_ab_set.yaml +11 -0
- rf3/configs/datasets/val/af3_validation.yaml +11 -0
- rf3/configs/datasets/val/base.yaml +32 -0
- rf3/configs/datasets/val/runs_and_poses.yaml +12 -0
- rf3/configs/debug/default.yaml +66 -0
- rf3/configs/debug/train_specific_examples.yaml +21 -0
- rf3/configs/experiment/pretrained/rf3.yaml +50 -0
- rf3/configs/experiment/pretrained/rf3_with_confidence.yaml +13 -0
- rf3/configs/experiment/quick-rf3-with-confidence.yaml +15 -0
- rf3/configs/experiment/quick-rf3.yaml +61 -0
- rf3/configs/hydra/default.yaml +18 -0
- rf3/configs/hydra/no_logging.yaml +7 -0
- rf3/configs/inference.yaml +7 -0
- rf3/configs/inference_engine/base.yaml +23 -0
- rf3/configs/inference_engine/rf3.yaml +33 -0
- rf3/configs/logger/csv.yaml +6 -0
- rf3/configs/logger/default.yaml +3 -0
- rf3/configs/logger/wandb.yaml +15 -0
- rf3/configs/model/components/ema.yaml +1 -0
- rf3/configs/model/components/rf3_net.yaml +177 -0
- rf3/configs/model/components/rf3_net_with_confidence_head.yaml +45 -0
- rf3/configs/model/optimizers/adam.yaml +5 -0
- rf3/configs/model/rf3.yaml +43 -0
- rf3/configs/model/rf3_with_confidence.yaml +7 -0
- rf3/configs/model/schedulers/af3.yaml +6 -0
- rf3/configs/paths/data/default.yaml +43 -0
- rf3/configs/paths/default.yaml +21 -0
- rf3/configs/train.yaml +42 -0
- rf3/configs/trainer/cpu.yaml +6 -0
- rf3/configs/trainer/ddp.yaml +5 -0
- rf3/configs/trainer/loss/losses/confidence_loss.yaml +29 -0
- rf3/configs/trainer/loss/losses/diffusion_loss.yaml +9 -0
- rf3/configs/trainer/loss/losses/distogram_loss.yaml +2 -0
- rf3/configs/trainer/loss/structure_prediction.yaml +4 -0
- rf3/configs/trainer/loss/structure_prediction_with_confidence.yaml +2 -0
- rf3/configs/trainer/metrics/structure_prediction.yaml +14 -0
- rf3/configs/trainer/rf3.yaml +20 -0
- rf3/configs/trainer/rf3_with_confidence.yaml +13 -0
- rf3/configs/validate.yaml +45 -0
- rfd3/cli.py +10 -4
- rfd3/configs/__init__.py +0 -0
- rfd3/configs/callbacks/design_callbacks.yaml +10 -0
- rfd3/configs/callbacks/metrics_logging.yaml +20 -0
- rfd3/configs/callbacks/train_logging.yaml +24 -0
- rfd3/configs/dataloader/default.yaml +15 -0
- rfd3/configs/dataloader/fast.yaml +11 -0
- rfd3/configs/datasets/conditions/dna_condition.yaml +3 -0
- rfd3/configs/datasets/conditions/island.yaml +28 -0
- rfd3/configs/datasets/conditions/ppi.yaml +2 -0
- rfd3/configs/datasets/conditions/sequence_design.yaml +17 -0
- rfd3/configs/datasets/conditions/tipatom.yaml +28 -0
- rfd3/configs/datasets/conditions/unconditional.yaml +21 -0
- rfd3/configs/datasets/design_base.yaml +97 -0
- rfd3/configs/datasets/train/pdb/af3_train_interface.yaml +46 -0
- rfd3/configs/datasets/train/pdb/af3_train_pn_unit.yaml +42 -0
- rfd3/configs/datasets/train/pdb/base.yaml +14 -0
- rfd3/configs/datasets/train/pdb/base_no_weights.yaml +19 -0
- rfd3/configs/datasets/train/pdb/base_transform_args.yaml +59 -0
- rfd3/configs/datasets/train/pdb/na_complex_distillation.yaml +20 -0
- rfd3/configs/datasets/train/pdb/pdb_base.yaml +11 -0
- rfd3/configs/datasets/train/pdb/rfd3_train_interface.yaml +22 -0
- rfd3/configs/datasets/train/pdb/rfd3_train_pn_unit.yaml +23 -0
- rfd3/configs/datasets/train/rfd3_monomer_distillation.yaml +38 -0
- rfd3/configs/datasets/val/bcov_ppi_easy_medium.yaml +9 -0
- rfd3/configs/datasets/val/design_validation_base.yaml +40 -0
- rfd3/configs/datasets/val/dna_binder_design5.yaml +9 -0
- rfd3/configs/datasets/val/dna_binder_long.yaml +13 -0
- rfd3/configs/datasets/val/dna_binder_short.yaml +13 -0
- rfd3/configs/datasets/val/indexed.yaml +9 -0
- rfd3/configs/datasets/val/mcsa_41.yaml +9 -0
- rfd3/configs/datasets/val/mcsa_41_short_rigid.yaml +10 -0
- rfd3/configs/datasets/val/ppi_inference.yaml +7 -0
- rfd3/configs/datasets/val/sm_binder_hbonds.yaml +13 -0
- rfd3/configs/datasets/val/sm_binder_hbonds_short.yaml +15 -0
- rfd3/configs/datasets/val/unconditional.yaml +9 -0
- rfd3/configs/datasets/val/unconditional_deep.yaml +9 -0
- rfd3/configs/datasets/val/unindexed.yaml +8 -0
- rfd3/configs/datasets/val/val_examples/bcov_ppi_easy_medium_with_ori.yaml +151 -0
- rfd3/configs/datasets/val/val_examples/bcov_ppi_easy_medium_with_ori_spoof_helical_bundle.yaml +7 -0
- rfd3/configs/datasets/val/val_examples/bcov_ppi_easy_medium_with_ori_varying_lengths.yaml +28 -0
- rfd3/configs/datasets/val/val_examples/bpem_ori_hb.yaml +212 -0
- rfd3/configs/debug/default.yaml +64 -0
- rfd3/configs/debug/train_specific_examples.yaml +21 -0
- rfd3/configs/dev.yaml +9 -0
- rfd3/configs/experiment/debug.yaml +14 -0
- rfd3/configs/experiment/pretrain.yaml +31 -0
- rfd3/configs/experiment/test-uncond.yaml +10 -0
- rfd3/configs/experiment/test-unindexed.yaml +21 -0
- rfd3/configs/hydra/default.yaml +18 -0
- rfd3/configs/hydra/no_logging.yaml +7 -0
- rfd3/configs/inference.yaml +9 -0
- rfd3/configs/inference_engine/base.yaml +15 -0
- rfd3/configs/inference_engine/dev.yaml +20 -0
- rfd3/configs/inference_engine/rfdiffusion3.yaml +65 -0
- rfd3/configs/logger/csv.yaml +6 -0
- rfd3/configs/logger/default.yaml +2 -0
- rfd3/configs/logger/wandb.yaml +15 -0
- rfd3/configs/model/components/ema.yaml +1 -0
- rfd3/configs/model/components/rfd3_net.yaml +131 -0
- rfd3/configs/model/optimizers/adam.yaml +5 -0
- rfd3/configs/model/rfd3_base.yaml +8 -0
- rfd3/configs/model/samplers/edm.yaml +21 -0
- rfd3/configs/model/samplers/symmetry.yaml +10 -0
- rfd3/configs/model/schedulers/af3.yaml +6 -0
- rfd3/configs/paths/data/default.yaml +18 -0
- rfd3/configs/paths/default.yaml +22 -0
- rfd3/configs/train.yaml +28 -0
- rfd3/configs/trainer/cpu.yaml +6 -0
- rfd3/configs/trainer/ddp.yaml +5 -0
- rfd3/configs/trainer/loss/losses/diffusion_loss.yaml +12 -0
- rfd3/configs/trainer/loss/losses/sequence_loss.yaml +3 -0
- rfd3/configs/trainer/metrics/design_metrics.yaml +22 -0
- rfd3/configs/trainer/rfd3_base.yaml +35 -0
- rfd3/configs/validate.yaml +34 -0
- rfd3/engine.py +19 -11
- rfd3/inference/input_parsing.py +1 -1
- rfd3/inference/legacy_input_parsing.py +17 -1
- rfd3/inference/parsing.py +1 -0
- rfd3/inference/symmetry/atom_array.py +1 -5
- rfd3/inference/symmetry/checks.py +53 -28
- rfd3/inference/symmetry/frames.py +8 -5
- rfd3/inference/symmetry/symmetry_utils.py +38 -60
- rfd3/run_inference.py +3 -1
- rfd3/utils/inference.py +23 -0
- rc_foundry-0.1.5.dist-info/RECORD +0 -180
- {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/WHEEL +0 -0
- {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/entry_points.txt +0 -0
- {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# TODO: Inherit from common config with default Transform pipeline
|
|
2
|
+
|
|
3
|
+
na_complex_distillation:
|
|
4
|
+
dataset:
|
|
5
|
+
_target_: atomworks.ml.datasets.StructuralDatasetWrapper
|
|
6
|
+
save_failed_examples_to_dir: null
|
|
7
|
+
|
|
8
|
+
# cif parser
|
|
9
|
+
cif_parser_args:
|
|
10
|
+
#assume_residues_all_resolved: true
|
|
11
|
+
cache_dir: null
|
|
12
|
+
load_from_cache: false
|
|
13
|
+
save_to_cache: false
|
|
14
|
+
|
|
15
|
+
# metadata parser
|
|
16
|
+
dataset_parser:
|
|
17
|
+
_target_: atomworks.ml.datasets.parsers.GenericDFParser
|
|
18
|
+
pn_unit_iid_colnames: null #[]
|
|
19
|
+
|
|
20
|
+
# metadata dataset
|
|
21
|
+
dataset:
|
|
22
|
+
_target_: atomworks.ml.datasets.PandasDataset
|
|
23
|
+
name: tf_distillation
|
|
24
|
+
id_column: example_id
|
|
25
|
+
data: ${paths.data.na_complex_distillation_parquet_dir}/transcriptionFactor_distillation_rf3.newDL.csv
|
|
26
|
+
columns_to_load:
|
|
27
|
+
- example_id
|
|
28
|
+
- path
|
|
29
|
+
transform:
|
|
30
|
+
_target_: ${datasets.pipeline_target}
|
|
31
|
+
is_inference: False
|
|
32
|
+
protein_msa_dirs: [{"dir": "${paths.data.na_complex_distillation_data_dir}/a3m/", "extension": ".a3m", "directory_depth": 1}]
|
|
33
|
+
rna_msa_dirs: []
|
|
34
|
+
n_recycles: ${datasets.n_recycles_train}
|
|
35
|
+
crop_size: ${datasets.crop_size}
|
|
36
|
+
n_msa: ${datasets.n_msa}
|
|
37
|
+
diffusion_batch_size: ${datasets.diffusion_batch_size_train}
|
|
38
|
+
max_atoms_in_crop: ${datasets.max_atoms_in_crop}
|
|
39
|
+
crop_contiguous_probability: 0.25
|
|
40
|
+
crop_spatial_probability: 0.75
|
|
41
|
+
pad_dna_p_skip: 0.0
|
|
42
|
+
run_confidence_head: ${datasets.run_confidence_head}
|
|
43
|
+
take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
|
|
44
|
+
use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
|
|
45
|
+
mirror_prob: 0.0
|
|
46
|
+
atomization_prob: ${datasets.atomization_prob}
|
|
47
|
+
ligand_dropout_prob: ${datasets.ligand_dropout_prob}
|
|
48
|
+
p_unconditional: ${datasets.p_unconditional}
|
|
49
|
+
p_dropout_atom_level_embeddings: ${datasets.p_dropout_atom_level_embeddings}
|
|
50
|
+
add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
weights:
|
|
2
|
+
_target_: atomworks.ml.samplers.calculate_weights_for_pdb_dataset_df
|
|
3
|
+
# We do not include beta here, since it is different for interfaces and chains
|
|
4
|
+
alphas:
|
|
5
|
+
a_prot: 3.0 # 3 for AF-3
|
|
6
|
+
a_nuc: 3.0 # 3 for AF-3
|
|
7
|
+
a_ligand: 1.0 # 1 for AF-3
|
|
8
|
+
a_loi: 5.0 # 5 for AF-3
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
dataset:
|
|
2
|
+
_target_: atomworks.ml.datasets.StructuralDatasetWrapper
|
|
3
|
+
save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
|
|
4
|
+
cif_parser_args:
|
|
5
|
+
cache_dir: null
|
|
6
|
+
load_from_cache: false
|
|
7
|
+
save_to_cache: false
|
|
8
|
+
dataset:
|
|
9
|
+
_target_: atomworks.ml.datasets.PandasDataset
|
|
10
|
+
# we will use the example_id as the unique column
|
|
11
|
+
id_column: example_id
|
|
12
|
+
transform:
|
|
13
|
+
# common Transform pipeline components for all PDB datasets
|
|
14
|
+
_target_: ${datasets.pipeline_target}
|
|
15
|
+
is_inference: False
|
|
16
|
+
protein_msa_dirs: ${paths.data.protein_msa_dirs}
|
|
17
|
+
rna_msa_dirs: ${paths.data.rna_msa_dirs}
|
|
18
|
+
n_recycles: ${datasets.n_recycles_train}
|
|
19
|
+
crop_size: ${datasets.crop_size}
|
|
20
|
+
n_msa: ${datasets.n_msa}
|
|
21
|
+
diffusion_batch_size: ${datasets.diffusion_batch_size_train}
|
|
22
|
+
max_atoms_in_crop: ${datasets.max_atoms_in_crop}
|
|
23
|
+
run_confidence_head: ${datasets.run_confidence_head}
|
|
24
|
+
p_unconditional: ${datasets.p_unconditional}
|
|
25
|
+
p_dropout_atom_level_embeddings: ${datasets.p_dropout_atom_level_embeddings}
|
|
26
|
+
take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
|
|
27
|
+
use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
|
|
28
|
+
mirror_prob: ${datasets.mirror_prob}
|
|
29
|
+
atomization_prob: ${datasets.atomization_prob}
|
|
30
|
+
ligand_dropout_prob: ${datasets.ligand_dropout_prob}
|
|
31
|
+
add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
|
|
32
|
+
add_cyclic_bonds: ${datasets.add_cyclic_bonds}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# PLINDER is a subset of the PDB, so we inherit from the base PDB config
|
|
2
|
+
|
|
3
|
+
defaults:
|
|
4
|
+
- base
|
|
5
|
+
|
|
6
|
+
dataset:
|
|
7
|
+
dataset_parser:
|
|
8
|
+
_target_: atomworks.ml.datasets.parsers.InterfacesDFParser
|
|
9
|
+
base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
|
|
10
|
+
dataset:
|
|
11
|
+
name: plinder
|
|
12
|
+
data: ${paths.data.pdb_data_dir}/interfaces_df_train_plinder.parquet
|
|
13
|
+
filters:
|
|
14
|
+
# filters common across all PDB datasets
|
|
15
|
+
- "deposition_date < '2024-01-01'"
|
|
16
|
+
- "resolution < 9.0"
|
|
17
|
+
- "num_polymer_pn_units <= 300"
|
|
18
|
+
# interface-specific filters
|
|
19
|
+
- "~(pn_unit_1_non_polymer_res_names.notnull() and pn_unit_1_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
|
|
20
|
+
- "~(pn_unit_2_non_polymer_res_names.notnull() and pn_unit_2_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
|
|
21
|
+
columns_to_load:
|
|
22
|
+
# columns common across all PDB datasets
|
|
23
|
+
- example_id
|
|
24
|
+
- pdb_id
|
|
25
|
+
- assembly_id
|
|
26
|
+
- deposition_date
|
|
27
|
+
- resolution
|
|
28
|
+
- num_polymer_pn_units
|
|
29
|
+
- method
|
|
30
|
+
- n_prot
|
|
31
|
+
- n_nuc
|
|
32
|
+
- n_ligand
|
|
33
|
+
- n_peptide
|
|
34
|
+
- total_num_atoms_in_unprocessed_assembly
|
|
35
|
+
# interface specific columns
|
|
36
|
+
- pn_unit_1_iid
|
|
37
|
+
- pn_unit_2_iid
|
|
38
|
+
- pn_unit_1_non_polymer_res_names
|
|
39
|
+
- pn_unit_2_non_polymer_res_names
|
|
40
|
+
- is_inter_molecule
|
|
41
|
+
- all_pn_unit_iids_after_processing
|
|
42
|
+
- involves_loi
|
|
43
|
+
- pli_qcov__50__strong__component
|
|
44
|
+
- pli_qcov__70__strong__component
|
|
45
|
+
- pli_qcov__50__weak__component
|
|
46
|
+
- pli_qcov__70__weak__component
|
|
47
|
+
transform:
|
|
48
|
+
# interface-specific Transform pipeline parameters
|
|
49
|
+
crop_contiguous_probability: 0.0
|
|
50
|
+
crop_spatial_probability: 1.0
|
|
51
|
+
|
|
52
|
+
weights:
|
|
53
|
+
_target_: atomworks.ml.samplers.calculate_weights_by_inverse_cluster_size
|
|
54
|
+
cluster_column: pli_qcov__50__weak__component # Need to ablate
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- base
|
|
3
|
+
- af3_weighted_sampling
|
|
4
|
+
|
|
5
|
+
dataset:
|
|
6
|
+
dataset_parser:
|
|
7
|
+
_target_: atomworks.ml.datasets.parsers.InterfacesDFParser
|
|
8
|
+
base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
|
|
9
|
+
dataset:
|
|
10
|
+
name: interface
|
|
11
|
+
data: ${paths.data.pdb_data_dir}/interfaces_df_train.parquet
|
|
12
|
+
filters:
|
|
13
|
+
# filters common across all PDB datasets
|
|
14
|
+
- "deposition_date < '2024-01-01'"
|
|
15
|
+
- "resolution < 9.0"
|
|
16
|
+
- "num_polymer_pn_units <= 300"
|
|
17
|
+
- "cluster.notnull()"
|
|
18
|
+
# interface specific filters
|
|
19
|
+
- "~(pn_unit_1_non_polymer_res_names.notnull() and pn_unit_1_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
|
|
20
|
+
- "~(pn_unit_2_non_polymer_res_names.notnull() and pn_unit_2_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
|
|
21
|
+
- "is_inter_molecule"
|
|
22
|
+
columns_to_load:
|
|
23
|
+
# columns common across all PDB datasets
|
|
24
|
+
- example_id
|
|
25
|
+
- pdb_id
|
|
26
|
+
- assembly_id
|
|
27
|
+
- deposition_date
|
|
28
|
+
- resolution
|
|
29
|
+
- num_polymer_pn_units
|
|
30
|
+
- method
|
|
31
|
+
- cluster
|
|
32
|
+
- n_prot
|
|
33
|
+
- n_nuc
|
|
34
|
+
- n_ligand
|
|
35
|
+
- n_peptide
|
|
36
|
+
- total_num_atoms_in_unprocessed_assembly
|
|
37
|
+
# interface specific columns
|
|
38
|
+
- pn_unit_1_iid
|
|
39
|
+
- pn_unit_2_iid
|
|
40
|
+
- pn_unit_1_non_polymer_res_names
|
|
41
|
+
- pn_unit_2_non_polymer_res_names
|
|
42
|
+
- is_inter_molecule
|
|
43
|
+
- all_pn_unit_iids_after_processing
|
|
44
|
+
- involves_loi
|
|
45
|
+
transform:
|
|
46
|
+
# interface-specific Transform pipeline parameters
|
|
47
|
+
crop_contiguous_probability: 0.0
|
|
48
|
+
crop_spatial_probability: 1.0
|
|
49
|
+
|
|
50
|
+
weights:
|
|
51
|
+
beta: 1.0
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- base
|
|
3
|
+
- af3_weighted_sampling
|
|
4
|
+
|
|
5
|
+
dataset:
|
|
6
|
+
dataset_parser:
|
|
7
|
+
_target_: atomworks.ml.datasets.parsers.PNUnitsDFParser
|
|
8
|
+
base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
|
|
9
|
+
dataset:
|
|
10
|
+
name: pn_unit
|
|
11
|
+
data: ${paths.data.pdb_data_dir}/pn_units_df_train.parquet
|
|
12
|
+
filters:
|
|
13
|
+
# filters common across all PDB datasets
|
|
14
|
+
- "deposition_date < '2024-01-01'"
|
|
15
|
+
- "resolution < 9.0"
|
|
16
|
+
- "num_polymer_pn_units <= 300"
|
|
17
|
+
- "cluster.notnull()"
|
|
18
|
+
# pn_unit specific filters
|
|
19
|
+
- "~(q_pn_unit_non_polymer_res_names.notnull() and q_pn_unit_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
|
|
20
|
+
columns_to_load:
|
|
21
|
+
# columns common across all PDB datasets
|
|
22
|
+
- example_id
|
|
23
|
+
- pdb_id
|
|
24
|
+
- assembly_id
|
|
25
|
+
- deposition_date
|
|
26
|
+
- resolution
|
|
27
|
+
- num_polymer_pn_units
|
|
28
|
+
- method
|
|
29
|
+
- cluster
|
|
30
|
+
- n_prot
|
|
31
|
+
- n_nuc
|
|
32
|
+
- n_ligand
|
|
33
|
+
- n_peptide
|
|
34
|
+
- total_num_atoms_in_unprocessed_assembly
|
|
35
|
+
# pn_unit specific columns
|
|
36
|
+
- q_pn_unit_iid
|
|
37
|
+
- q_pn_unit_non_polymer_res_names
|
|
38
|
+
- all_pn_unit_iids_after_processing
|
|
39
|
+
- q_pn_unit_is_loi
|
|
40
|
+
transform:
|
|
41
|
+
# pn_unit-specific Transform pipeline parameters
|
|
42
|
+
crop_contiguous_probability: 0.3333333333333333
|
|
43
|
+
crop_spatial_probability: 0.6666666666666667
|
|
44
|
+
|
|
45
|
+
weights:
|
|
46
|
+
beta: 0.5
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# TODO: Inherit from common config with default Transform pipeline
|
|
2
|
+
|
|
3
|
+
rna_monomer_distillation:
|
|
4
|
+
dataset:
|
|
5
|
+
_target_: atomworks.ml.datasets.StructuralDatasetWrapper
|
|
6
|
+
save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
|
|
7
|
+
|
|
8
|
+
# cif parser arguments
|
|
9
|
+
cif_parser_args:
|
|
10
|
+
cache_dir: null
|
|
11
|
+
load_from_cache: False
|
|
12
|
+
save_to_cache: False
|
|
13
|
+
|
|
14
|
+
# metadata parser
|
|
15
|
+
dataset_parser:
|
|
16
|
+
_target_: atomworks.ml.datasets.parsers.GenericDFParser
|
|
17
|
+
pn_unit_iid_colnames: null
|
|
18
|
+
|
|
19
|
+
# metadata dataset
|
|
20
|
+
dataset:
|
|
21
|
+
_target_: atomworks.ml.datasets.PandasDataset
|
|
22
|
+
name: rna_monomer_distillation
|
|
23
|
+
id_column: example_id
|
|
24
|
+
data: /projects/ml/afavor/rna_distillation/rna_distillation_filtered_df.parquet
|
|
25
|
+
columns_to_load:
|
|
26
|
+
- example_id
|
|
27
|
+
- path
|
|
28
|
+
- cluster_id
|
|
29
|
+
- seq_hash
|
|
30
|
+
- overall_plddt
|
|
31
|
+
- overall_pde
|
|
32
|
+
- overall_pae
|
|
33
|
+
|
|
34
|
+
transform:
|
|
35
|
+
_target_: ${datasets.pipeline_target}
|
|
36
|
+
is_inference: False
|
|
37
|
+
protein_msa_dirs: []
|
|
38
|
+
rna_msa_dirs: [{"dir": "/projects/ml/afavor/rna_distillation/all_MSAs_renamed", "extension": ".afa", "directory_depth": 2}]
|
|
39
|
+
n_recycles: ${datasets.n_recycles_train}
|
|
40
|
+
crop_size: ${datasets.crop_size}
|
|
41
|
+
n_msa: ${datasets.n_msa}
|
|
42
|
+
diffusion_batch_size: ${datasets.diffusion_batch_size_train}
|
|
43
|
+
max_atoms_in_crop: ${datasets.max_atoms_in_crop}
|
|
44
|
+
crop_contiguous_probability: 1.0
|
|
45
|
+
crop_spatial_probability: 0.0
|
|
46
|
+
pad_dna_p_skip: 0.0
|
|
47
|
+
b_factor_min: 0.6
|
|
48
|
+
run_confidence_head: ${datasets.run_confidence_head}
|
|
49
|
+
take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
|
|
50
|
+
use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
|
|
51
|
+
mirror_prob: 0.0
|
|
52
|
+
atomization_prob: ${datasets.atomization_prob}
|
|
53
|
+
ligand_dropout_prob: ${datasets.ligand_dropout_prob}
|
|
54
|
+
p_unconditional: ${datasets.p_unconditional}
|
|
55
|
+
p_dropout_atom_level_embeddings: ${datasets.p_dropout_atom_level_embeddings}
|
|
56
|
+
add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- base
|
|
3
|
+
|
|
4
|
+
dataset:
|
|
5
|
+
dataset_parser:
|
|
6
|
+
_target_: atomworks.ml.datasets.parsers.ValidationDFParserLikeAF3
|
|
7
|
+
base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
|
|
8
|
+
dataset:
|
|
9
|
+
_target_: atomworks.ml.datasets.PandasDataset
|
|
10
|
+
name: af3_validation
|
|
11
|
+
data: /net/scratch/rib7/rf3_ab_splits/entry_level_val_df.parquet
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- base
|
|
3
|
+
|
|
4
|
+
dataset:
|
|
5
|
+
dataset_parser:
|
|
6
|
+
_target_: atomworks.ml.datasets.parsers.ValidationDFParserLikeAF3
|
|
7
|
+
base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
|
|
8
|
+
dataset:
|
|
9
|
+
_target_: atomworks.ml.datasets.PandasDataset
|
|
10
|
+
name: af3_validation
|
|
11
|
+
data: ${paths.data.pdb_data_dir}/entry_level_val_df.parquet
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
dataset:
|
|
2
|
+
_target_: atomworks.ml.datasets.StructuralDatasetWrapper
|
|
3
|
+
save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
|
|
4
|
+
cif_parser_args:
|
|
5
|
+
cache_dir: null
|
|
6
|
+
load_from_cache: False
|
|
7
|
+
save_to_cache: False
|
|
8
|
+
dataset:
|
|
9
|
+
_target_: atomworks.ml.datasets.PandasDataset
|
|
10
|
+
# we will use the example_id as the unique column
|
|
11
|
+
id_column: example_id
|
|
12
|
+
# return all keys (do not subset)
|
|
13
|
+
transform:
|
|
14
|
+
# common Transform pipeline components for all PDB datasets
|
|
15
|
+
_target_: ${datasets.pipeline_target}
|
|
16
|
+
is_inference: True
|
|
17
|
+
protein_msa_dirs: ${paths.data.protein_msa_dirs}
|
|
18
|
+
rna_msa_dirs: ${paths.data.rna_msa_dirs}
|
|
19
|
+
n_recycles: ${datasets.n_recycles_validation}
|
|
20
|
+
crop_size: null # do not crop for inference
|
|
21
|
+
n_msa: ${datasets.n_msa}
|
|
22
|
+
diffusion_batch_size: ${datasets.diffusion_batch_size_inference}
|
|
23
|
+
max_atoms_in_crop: null # do not crop for inference
|
|
24
|
+
return_atom_array: True # return atom array for inference
|
|
25
|
+
run_confidence_head: ${datasets.run_confidence_head}
|
|
26
|
+
p_unconditional: 1.0 # unconditional for inference, unless explicitly overridden
|
|
27
|
+
p_dropout_atom_level_embeddings: 0.0 # always use embeddings in inference
|
|
28
|
+
take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
|
|
29
|
+
use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
|
|
30
|
+
add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
|
|
31
|
+
|
|
32
|
+
key_to_balance: ${datasets.key_to_balance}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- base
|
|
3
|
+
|
|
4
|
+
dataset:
|
|
5
|
+
dataset_parser:
|
|
6
|
+
_target_: atomworks.ml.datasets.parsers.ValidationDFParserLikeAF3
|
|
7
|
+
dataset:
|
|
8
|
+
_target_: atomworks.ml.datasets.PandasDataset
|
|
9
|
+
name: af3_validation
|
|
10
|
+
data: /projects/ml/datahub/dfs/af3_splits/2024_12_16/runs_n_poses_entry_level_df.parquet
|
|
11
|
+
filters:
|
|
12
|
+
- "n_tokens_total < 1000" # Subset to reasonably-sized examples for efficiency
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
|
|
3
|
+
defaults:
|
|
4
|
+
- override /logger: null
|
|
5
|
+
|
|
6
|
+
# default debugging setup, runs 1 full epoch
|
|
7
|
+
# other debugging configs can inherit from this one
|
|
8
|
+
|
|
9
|
+
# overwrite task name so debugging logs are stored in separate folder
|
|
10
|
+
task_name: "debug"
|
|
11
|
+
|
|
12
|
+
extras:
|
|
13
|
+
ignore_warnings: False
|
|
14
|
+
enforce_tags: False
|
|
15
|
+
|
|
16
|
+
# sets level of all command line loggers to 'DEBUG'
|
|
17
|
+
# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
|
|
18
|
+
hydra:
|
|
19
|
+
job_logging:
|
|
20
|
+
root:
|
|
21
|
+
level: DEBUG
|
|
22
|
+
# use the below to also set hydra loggers to 'DEBUG'
|
|
23
|
+
verbose: True
|
|
24
|
+
|
|
25
|
+
# Print example ID before forward pass
|
|
26
|
+
callbacks:
|
|
27
|
+
print_example_id_before_forward_pass:
|
|
28
|
+
_target_: foundry.callbacks.train_logging.PrintExampleIDBeforeForwardPassCallback
|
|
29
|
+
timing_logging:
|
|
30
|
+
_target_: foundry.callbacks.timing_logging.TimingCallback
|
|
31
|
+
log_every_n: 5
|
|
32
|
+
|
|
33
|
+
dataloader:
|
|
34
|
+
train:
|
|
35
|
+
dataloader_params:
|
|
36
|
+
batch_size: 1
|
|
37
|
+
num_workers: 0 # debuggers don't like multiprocessing -- work on main thread
|
|
38
|
+
pin_memory: False # disable gpu memory pin
|
|
39
|
+
prefetch_factor: null # must be null for num_workers=0
|
|
40
|
+
n_fallback_retries: 0 # disable fallback retries for debugging
|
|
41
|
+
|
|
42
|
+
val:
|
|
43
|
+
dataloader_params:
|
|
44
|
+
batch_size: 1
|
|
45
|
+
num_workers: 0
|
|
46
|
+
pin_memory: False
|
|
47
|
+
prefetch_factor: null # must be null for num_workers=0
|
|
48
|
+
|
|
49
|
+
datasets:
|
|
50
|
+
crop_size: 100 # set small crop size for quick debugging
|
|
51
|
+
diffusion_batch_size_train: 1
|
|
52
|
+
diffusion_batch_size_inference: 2
|
|
53
|
+
n_recycles_train: 1
|
|
54
|
+
n_recycles_validation: 1
|
|
55
|
+
n_msa: 128
|
|
56
|
+
key_to_balance: null # otherwise big examples will be processed first
|
|
57
|
+
|
|
58
|
+
trainer:
|
|
59
|
+
devices_per_node: 1
|
|
60
|
+
limit_train_batches: 2
|
|
61
|
+
limit_val_batches: 1
|
|
62
|
+
validate_every_n_epochs: 1
|
|
63
|
+
|
|
64
|
+
# Set tags to help identify debugging runs
|
|
65
|
+
tags:
|
|
66
|
+
- debug
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
|
|
3
|
+
# See: https://hydra.cc/docs/patterns/configuring_experiments/
|
|
4
|
+
|
|
5
|
+
# to execute this experiment run:
|
|
6
|
+
# python train.py +debug=train_single_example [any other arguments]
|
|
7
|
+
|
|
8
|
+
defaults:
|
|
9
|
+
- default
|
|
10
|
+
- gpu
|
|
11
|
+
|
|
12
|
+
datasets:
|
|
13
|
+
# you can add specific example IDs here to load a subset of the dataset (only training supported; PR's welcome to generalize to validation)
|
|
14
|
+
subset_to_example_ids:
|
|
15
|
+
- "{['pdb', 'pn_units']}{3px1}{1}{['A_3']}"
|
|
16
|
+
val: null
|
|
17
|
+
|
|
18
|
+
tags:
|
|
19
|
+
- debug
|
|
20
|
+
- train
|
|
21
|
+
- specific-examples
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
|
|
3
|
+
name: rf3
|
|
4
|
+
project: rf3
|
|
5
|
+
|
|
6
|
+
tags:
|
|
7
|
+
# list of tags to add to the run ( & on wandb to easily find & filter runs)
|
|
8
|
+
- full
|
|
9
|
+
|
|
10
|
+
defaults:
|
|
11
|
+
- override /datasets: pdb_and_distillation
|
|
12
|
+
- override /model: rf3
|
|
13
|
+
- override /trainer: rf3
|
|
14
|
+
|
|
15
|
+
ckpt_config:
|
|
16
|
+
_target_: foundry.utils.weights.CheckpointConfig
|
|
17
|
+
path: /net/software/containers/versions/modelhub_inference/ckpts/rf3-w-conf-run10-ep922-remapped.ckpt
|
|
18
|
+
reset_optimizer: true
|
|
19
|
+
|
|
20
|
+
model:
|
|
21
|
+
lr_scheduler:
|
|
22
|
+
base_lr: 0.9e-3 # 1/2 of original learning rate (1.8e-3)
|
|
23
|
+
net:
|
|
24
|
+
feature_initializer:
|
|
25
|
+
input_feature_embedder:
|
|
26
|
+
atom_attention_encoder:
|
|
27
|
+
c_atom_1d_features: 393 # 392 + 1 has_atom_level_embedding = 393
|
|
28
|
+
atom_1d_features:
|
|
29
|
+
- ref_pos
|
|
30
|
+
- ref_charge
|
|
31
|
+
- ref_mask
|
|
32
|
+
- ref_element
|
|
33
|
+
- ref_atom_name_chars
|
|
34
|
+
- ref_pos_ground_truth
|
|
35
|
+
- has_atom_level_embedding
|
|
36
|
+
use_atom_level_embedding: true
|
|
37
|
+
atom_level_embedding_dim: 384
|
|
38
|
+
diffusion_module:
|
|
39
|
+
atom_attention_encoder:
|
|
40
|
+
c_atom_1d_features: 393 # 392 + 1 has_atom_level_embedding = 393
|
|
41
|
+
atom_1d_features:
|
|
42
|
+
- ref_pos
|
|
43
|
+
- ref_charge
|
|
44
|
+
- ref_mask
|
|
45
|
+
- ref_element
|
|
46
|
+
- ref_atom_name_chars
|
|
47
|
+
- ref_pos_ground_truth
|
|
48
|
+
- has_atom_level_embedding
|
|
49
|
+
use_atom_level_embedding: true
|
|
50
|
+
atom_level_embedding_dim: 384
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
|
|
3
|
+
name: rf3-with-confidence
|
|
4
|
+
|
|
5
|
+
# For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/
|
|
6
|
+
defaults:
|
|
7
|
+
- pretrained/rf3
|
|
8
|
+
- override /model: rf3_with_confidence
|
|
9
|
+
- override /trainer: rf3_with_confidence
|
|
10
|
+
- _self_
|
|
11
|
+
|
|
12
|
+
datasets:
|
|
13
|
+
run_confidence_head: true
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
|
|
3
|
+
# Experiment that loads a small dataset for quick testing
|
|
4
|
+
|
|
5
|
+
name: quick-rf3-with-confidence
|
|
6
|
+
|
|
7
|
+
# For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/
|
|
8
|
+
defaults:
|
|
9
|
+
- quick-rf3
|
|
10
|
+
- override /model: rf3_with_confidence
|
|
11
|
+
- override /trainer: rf3_with_confidence
|
|
12
|
+
- _self_
|
|
13
|
+
|
|
14
|
+
datasets:
|
|
15
|
+
run_confidence_head: true
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
|
|
3
|
+
# Experiment that loads a small dataset for quick testing
|
|
4
|
+
|
|
5
|
+
name: quick-rf3
|
|
6
|
+
|
|
7
|
+
# For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/
|
|
8
|
+
defaults:
|
|
9
|
+
- pretrained/rf3
|
|
10
|
+
- override /datasets: pdb_only
|
|
11
|
+
|
|
12
|
+
tags:
|
|
13
|
+
# list of tags to add to the run ( & on wandb to easily find & filter runs)
|
|
14
|
+
- quick
|
|
15
|
+
|
|
16
|
+
project: test
|
|
17
|
+
|
|
18
|
+
paths:
|
|
19
|
+
data:
|
|
20
|
+
pdb_data_dir: /projects/ml/datahub/dfs/af3_splits/2024_12_16
|
|
21
|
+
|
|
22
|
+
trainer:
|
|
23
|
+
limit_train_batches: 4
|
|
24
|
+
limit_val_batches: 4
|
|
25
|
+
|
|
26
|
+
datasets:
|
|
27
|
+
train:
|
|
28
|
+
pdb:
|
|
29
|
+
# We must adjust the probability, since we set the monomer distillation dataset to null
|
|
30
|
+
probability: 1.0
|
|
31
|
+
sub_datasets:
|
|
32
|
+
interface:
|
|
33
|
+
dataset:
|
|
34
|
+
dataset:
|
|
35
|
+
# A small dataframe that loads quickly
|
|
36
|
+
data: /projects/ml/datahub/dfs/pdb/test_dfs/interfaces_df.parquet
|
|
37
|
+
filters:
|
|
38
|
+
- "total_num_atoms_in_unprocessed_assembly <= 3000"
|
|
39
|
+
- "cluster.notnull()"
|
|
40
|
+
pn_unit:
|
|
41
|
+
dataset:
|
|
42
|
+
dataset:
|
|
43
|
+
# A small dataframe that loads quickly
|
|
44
|
+
data: /projects/ml/datahub/dfs/pdb/test_dfs/pn_units_df.parquet
|
|
45
|
+
filters:
|
|
46
|
+
- "total_num_atoms_in_unprocessed_assembly <= 3000"
|
|
47
|
+
- "cluster.notnull()"
|
|
48
|
+
val:
|
|
49
|
+
af3_validation:
|
|
50
|
+
dataset:
|
|
51
|
+
dataset:
|
|
52
|
+
filters:
|
|
53
|
+
- "n_tokens_total < 500"
|
|
54
|
+
# (We often want to debug with a ligand)
|
|
55
|
+
- "interfaces_to_score.str.contains('protein-ligand')"
|
|
56
|
+
- example_id in ["{['validation']}{7psi}{1}{[]}", "{['validation']}{7lo1}{2}{[]}", "{['validation']}{6zg9}{1}{[]}", "{['validation']}{7vhy}{1}{[]}"]
|
|
57
|
+
|
|
58
|
+
model:
|
|
59
|
+
net:
|
|
60
|
+
inference_sampler:
|
|
61
|
+
num_timesteps: 50
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# https://hydra.cc/docs/configure_hydra/intro/
|
|
2
|
+
|
|
3
|
+
# enable color logging (requires `colorlog` to be installed)
|
|
4
|
+
# defaults:
|
|
5
|
+
# - override hydra_logging: colorlog
|
|
6
|
+
# - override job_logging: colorlog
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# output directory, generated dynamically on each run
|
|
10
|
+
run:
|
|
11
|
+
dir: ${paths.log_dir}/${task_name}/${name}/${now:%Y-%m-%d}_${now:%H-%M}_JOB_${oc.env:SLURM_JOB_ID,default}
|
|
12
|
+
|
|
13
|
+
# ... this is where the log file is written (i.e. the programs output)
|
|
14
|
+
job_logging:
|
|
15
|
+
handlers:
|
|
16
|
+
file:
|
|
17
|
+
# Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
|
|
18
|
+
filename: ${hydra.runtime.output_dir}/experiment.log
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
# ^ The "package" determines where the content of the config is placed in the output config
|
|
3
|
+
# For more information about overriding configs, see: https://hydra.cc/docs/advanced/overriding_packages/#overriding-packages-using-the-defaults-list
|
|
4
|
+
|
|
5
|
+
defaults:
|
|
6
|
+
- inference_engine: rf3
|
|
7
|
+
- _self_
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# @package _global_
|
|
2
|
+
|
|
3
|
+
defaults:
|
|
4
|
+
- /hydra: no_logging
|
|
5
|
+
|
|
6
|
+
# Parameters for RF3InferenceEngine.__init__()
|
|
7
|
+
ckpt_path: ???
|
|
8
|
+
num_nodes: 1
|
|
9
|
+
devices_per_node: 1
|
|
10
|
+
compress_outputs: false
|
|
11
|
+
|
|
12
|
+
# Parameters for RF3InferenceEngine.run()
|
|
13
|
+
inputs: ???
|
|
14
|
+
out_dir: ???
|
|
15
|
+
dump_predictions: true
|
|
16
|
+
dump_trajectories: false
|
|
17
|
+
one_model_per_file: false
|
|
18
|
+
annotate_b_factor_with_plddt: true
|
|
19
|
+
sharding_pattern: null
|
|
20
|
+
skip_existing: false
|
|
21
|
+
template_selection: null
|
|
22
|
+
ground_truth_conformer_selection: null
|
|
23
|
+
cyclic_chains: []
|