rc-foundry 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. foundry/inference_engines/checkpoint_registry.py +58 -11
  2. foundry/utils/alignment.py +10 -2
  3. foundry/version.py +2 -2
  4. foundry_cli/download_checkpoints.py +66 -66
  5. {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/METADATA +25 -20
  6. rc_foundry-0.1.7.dist-info/RECORD +311 -0
  7. rf3/configs/callbacks/default.yaml +5 -0
  8. rf3/configs/callbacks/dump_validation_structures.yaml +6 -0
  9. rf3/configs/callbacks/metrics_logging.yaml +10 -0
  10. rf3/configs/callbacks/train_logging.yaml +16 -0
  11. rf3/configs/dataloader/default.yaml +15 -0
  12. rf3/configs/datasets/base.yaml +31 -0
  13. rf3/configs/datasets/pdb_and_distillation.yaml +58 -0
  14. rf3/configs/datasets/pdb_only.yaml +17 -0
  15. rf3/configs/datasets/train/disorder_distillation.yaml +48 -0
  16. rf3/configs/datasets/train/domain_distillation.yaml +50 -0
  17. rf3/configs/datasets/train/monomer_distillation.yaml +49 -0
  18. rf3/configs/datasets/train/na_complex_distillation.yaml +50 -0
  19. rf3/configs/datasets/train/pdb/af3_weighted_sampling.yaml +8 -0
  20. rf3/configs/datasets/train/pdb/base.yaml +32 -0
  21. rf3/configs/datasets/train/pdb/plinder.yaml +54 -0
  22. rf3/configs/datasets/train/pdb/train_interface.yaml +51 -0
  23. rf3/configs/datasets/train/pdb/train_pn_unit.yaml +46 -0
  24. rf3/configs/datasets/train/rna_monomer_distillation.yaml +56 -0
  25. rf3/configs/datasets/val/af3_ab_set.yaml +11 -0
  26. rf3/configs/datasets/val/af3_validation.yaml +11 -0
  27. rf3/configs/datasets/val/base.yaml +32 -0
  28. rf3/configs/datasets/val/runs_and_poses.yaml +12 -0
  29. rf3/configs/debug/default.yaml +66 -0
  30. rf3/configs/debug/train_specific_examples.yaml +21 -0
  31. rf3/configs/experiment/pretrained/rf3.yaml +50 -0
  32. rf3/configs/experiment/pretrained/rf3_with_confidence.yaml +13 -0
  33. rf3/configs/experiment/quick-rf3-with-confidence.yaml +15 -0
  34. rf3/configs/experiment/quick-rf3.yaml +61 -0
  35. rf3/configs/hydra/default.yaml +18 -0
  36. rf3/configs/hydra/no_logging.yaml +7 -0
  37. rf3/configs/inference.yaml +7 -0
  38. rf3/configs/inference_engine/base.yaml +23 -0
  39. rf3/configs/inference_engine/rf3.yaml +33 -0
  40. rf3/configs/logger/csv.yaml +6 -0
  41. rf3/configs/logger/default.yaml +3 -0
  42. rf3/configs/logger/wandb.yaml +15 -0
  43. rf3/configs/model/components/ema.yaml +1 -0
  44. rf3/configs/model/components/rf3_net.yaml +177 -0
  45. rf3/configs/model/components/rf3_net_with_confidence_head.yaml +45 -0
  46. rf3/configs/model/optimizers/adam.yaml +5 -0
  47. rf3/configs/model/rf3.yaml +43 -0
  48. rf3/configs/model/rf3_with_confidence.yaml +7 -0
  49. rf3/configs/model/schedulers/af3.yaml +6 -0
  50. rf3/configs/paths/data/default.yaml +43 -0
  51. rf3/configs/paths/default.yaml +21 -0
  52. rf3/configs/train.yaml +42 -0
  53. rf3/configs/trainer/cpu.yaml +6 -0
  54. rf3/configs/trainer/ddp.yaml +5 -0
  55. rf3/configs/trainer/loss/losses/confidence_loss.yaml +29 -0
  56. rf3/configs/trainer/loss/losses/diffusion_loss.yaml +9 -0
  57. rf3/configs/trainer/loss/losses/distogram_loss.yaml +2 -0
  58. rf3/configs/trainer/loss/structure_prediction.yaml +4 -0
  59. rf3/configs/trainer/loss/structure_prediction_with_confidence.yaml +2 -0
  60. rf3/configs/trainer/metrics/structure_prediction.yaml +14 -0
  61. rf3/configs/trainer/rf3.yaml +20 -0
  62. rf3/configs/trainer/rf3_with_confidence.yaml +13 -0
  63. rf3/configs/validate.yaml +45 -0
  64. rfd3/cli.py +10 -4
  65. rfd3/configs/__init__.py +0 -0
  66. rfd3/configs/callbacks/design_callbacks.yaml +10 -0
  67. rfd3/configs/callbacks/metrics_logging.yaml +20 -0
  68. rfd3/configs/callbacks/train_logging.yaml +24 -0
  69. rfd3/configs/dataloader/default.yaml +15 -0
  70. rfd3/configs/dataloader/fast.yaml +11 -0
  71. rfd3/configs/datasets/conditions/dna_condition.yaml +3 -0
  72. rfd3/configs/datasets/conditions/island.yaml +28 -0
  73. rfd3/configs/datasets/conditions/ppi.yaml +2 -0
  74. rfd3/configs/datasets/conditions/sequence_design.yaml +17 -0
  75. rfd3/configs/datasets/conditions/tipatom.yaml +28 -0
  76. rfd3/configs/datasets/conditions/unconditional.yaml +21 -0
  77. rfd3/configs/datasets/design_base.yaml +97 -0
  78. rfd3/configs/datasets/train/pdb/af3_train_interface.yaml +46 -0
  79. rfd3/configs/datasets/train/pdb/af3_train_pn_unit.yaml +42 -0
  80. rfd3/configs/datasets/train/pdb/base.yaml +14 -0
  81. rfd3/configs/datasets/train/pdb/base_no_weights.yaml +19 -0
  82. rfd3/configs/datasets/train/pdb/base_transform_args.yaml +59 -0
  83. rfd3/configs/datasets/train/pdb/na_complex_distillation.yaml +20 -0
  84. rfd3/configs/datasets/train/pdb/pdb_base.yaml +11 -0
  85. rfd3/configs/datasets/train/pdb/rfd3_train_interface.yaml +22 -0
  86. rfd3/configs/datasets/train/pdb/rfd3_train_pn_unit.yaml +23 -0
  87. rfd3/configs/datasets/train/rfd3_monomer_distillation.yaml +38 -0
  88. rfd3/configs/datasets/val/bcov_ppi_easy_medium.yaml +9 -0
  89. rfd3/configs/datasets/val/design_validation_base.yaml +40 -0
  90. rfd3/configs/datasets/val/dna_binder_design5.yaml +9 -0
  91. rfd3/configs/datasets/val/dna_binder_long.yaml +13 -0
  92. rfd3/configs/datasets/val/dna_binder_short.yaml +13 -0
  93. rfd3/configs/datasets/val/indexed.yaml +9 -0
  94. rfd3/configs/datasets/val/mcsa_41.yaml +9 -0
  95. rfd3/configs/datasets/val/mcsa_41_short_rigid.yaml +10 -0
  96. rfd3/configs/datasets/val/ppi_inference.yaml +7 -0
  97. rfd3/configs/datasets/val/sm_binder_hbonds.yaml +13 -0
  98. rfd3/configs/datasets/val/sm_binder_hbonds_short.yaml +15 -0
  99. rfd3/configs/datasets/val/unconditional.yaml +9 -0
  100. rfd3/configs/datasets/val/unconditional_deep.yaml +9 -0
  101. rfd3/configs/datasets/val/unindexed.yaml +8 -0
  102. rfd3/configs/datasets/val/val_examples/bcov_ppi_easy_medium_with_ori.yaml +151 -0
  103. rfd3/configs/datasets/val/val_examples/bcov_ppi_easy_medium_with_ori_spoof_helical_bundle.yaml +7 -0
  104. rfd3/configs/datasets/val/val_examples/bcov_ppi_easy_medium_with_ori_varying_lengths.yaml +28 -0
  105. rfd3/configs/datasets/val/val_examples/bpem_ori_hb.yaml +212 -0
  106. rfd3/configs/debug/default.yaml +64 -0
  107. rfd3/configs/debug/train_specific_examples.yaml +21 -0
  108. rfd3/configs/dev.yaml +9 -0
  109. rfd3/configs/experiment/debug.yaml +14 -0
  110. rfd3/configs/experiment/pretrain.yaml +31 -0
  111. rfd3/configs/experiment/test-uncond.yaml +10 -0
  112. rfd3/configs/experiment/test-unindexed.yaml +21 -0
  113. rfd3/configs/hydra/default.yaml +18 -0
  114. rfd3/configs/hydra/no_logging.yaml +7 -0
  115. rfd3/configs/inference.yaml +9 -0
  116. rfd3/configs/inference_engine/base.yaml +15 -0
  117. rfd3/configs/inference_engine/dev.yaml +20 -0
  118. rfd3/configs/inference_engine/rfdiffusion3.yaml +65 -0
  119. rfd3/configs/logger/csv.yaml +6 -0
  120. rfd3/configs/logger/default.yaml +2 -0
  121. rfd3/configs/logger/wandb.yaml +15 -0
  122. rfd3/configs/model/components/ema.yaml +1 -0
  123. rfd3/configs/model/components/rfd3_net.yaml +131 -0
  124. rfd3/configs/model/optimizers/adam.yaml +5 -0
  125. rfd3/configs/model/rfd3_base.yaml +8 -0
  126. rfd3/configs/model/samplers/edm.yaml +21 -0
  127. rfd3/configs/model/samplers/symmetry.yaml +10 -0
  128. rfd3/configs/model/schedulers/af3.yaml +6 -0
  129. rfd3/configs/paths/data/default.yaml +18 -0
  130. rfd3/configs/paths/default.yaml +22 -0
  131. rfd3/configs/train.yaml +28 -0
  132. rfd3/configs/trainer/cpu.yaml +6 -0
  133. rfd3/configs/trainer/ddp.yaml +5 -0
  134. rfd3/configs/trainer/loss/losses/diffusion_loss.yaml +12 -0
  135. rfd3/configs/trainer/loss/losses/sequence_loss.yaml +3 -0
  136. rfd3/configs/trainer/metrics/design_metrics.yaml +22 -0
  137. rfd3/configs/trainer/rfd3_base.yaml +35 -0
  138. rfd3/configs/validate.yaml +34 -0
  139. rfd3/engine.py +19 -11
  140. rfd3/inference/input_parsing.py +1 -1
  141. rfd3/inference/legacy_input_parsing.py +17 -1
  142. rfd3/inference/parsing.py +1 -0
  143. rfd3/inference/symmetry/atom_array.py +1 -5
  144. rfd3/inference/symmetry/checks.py +53 -28
  145. rfd3/inference/symmetry/frames.py +8 -5
  146. rfd3/inference/symmetry/symmetry_utils.py +38 -60
  147. rfd3/run_inference.py +3 -1
  148. rfd3/utils/inference.py +23 -0
  149. rc_foundry-0.1.5.dist-info/RECORD +0 -180
  150. {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/WHEEL +0 -0
  151. {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/entry_points.txt +0 -0
  152. {rc_foundry-0.1.5.dist-info → rc_foundry-0.1.7.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,50 @@
1
+ # TODO: Inherit from common config with default Transform pipeline
2
+
3
+ na_complex_distillation:
4
+ dataset:
5
+ _target_: atomworks.ml.datasets.StructuralDatasetWrapper
6
+ save_failed_examples_to_dir: null
7
+
8
+ # cif parser
9
+ cif_parser_args:
10
+ #assume_residues_all_resolved: true
11
+ cache_dir: null
12
+ load_from_cache: false
13
+ save_to_cache: false
14
+
15
+ # metadata parser
16
+ dataset_parser:
17
+ _target_: atomworks.ml.datasets.parsers.GenericDFParser
18
+ pn_unit_iid_colnames: null #[]
19
+
20
+ # metadata dataset
21
+ dataset:
22
+ _target_: atomworks.ml.datasets.PandasDataset
23
+ name: tf_distillation
24
+ id_column: example_id
25
+ data: ${paths.data.na_complex_distillation_parquet_dir}/transcriptionFactor_distillation_rf3.newDL.csv
26
+ columns_to_load:
27
+ - example_id
28
+ - path
29
+ transform:
30
+ _target_: ${datasets.pipeline_target}
31
+ is_inference: False
32
+ protein_msa_dirs: [{"dir": "${paths.data.na_complex_distillation_data_dir}/a3m/", "extension": ".a3m", "directory_depth": 1}]
33
+ rna_msa_dirs: []
34
+ n_recycles: ${datasets.n_recycles_train}
35
+ crop_size: ${datasets.crop_size}
36
+ n_msa: ${datasets.n_msa}
37
+ diffusion_batch_size: ${datasets.diffusion_batch_size_train}
38
+ max_atoms_in_crop: ${datasets.max_atoms_in_crop}
39
+ crop_contiguous_probability: 0.25
40
+ crop_spatial_probability: 0.75
41
+ pad_dna_p_skip: 0.0
42
+ run_confidence_head: ${datasets.run_confidence_head}
43
+ take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
44
+ use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
45
+ mirror_prob: 0.0
46
+ atomization_prob: ${datasets.atomization_prob}
47
+ ligand_dropout_prob: ${datasets.ligand_dropout_prob}
48
+ p_unconditional: ${datasets.p_unconditional}
49
+ p_dropout_atom_level_embeddings: ${datasets.p_dropout_atom_level_embeddings}
50
+ add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
@@ -0,0 +1,8 @@
1
+ weights:
2
+ _target_: atomworks.ml.samplers.calculate_weights_for_pdb_dataset_df
3
+ # We do not include beta here, since it is different for interfaces and chains
4
+ alphas:
5
+ a_prot: 3.0 # 3 for AF-3
6
+ a_nuc: 3.0 # 3 for AF-3
7
+ a_ligand: 1.0 # 1 for AF-3
8
+ a_loi: 5.0 # 5 for AF-3
@@ -0,0 +1,32 @@
1
+ dataset:
2
+ _target_: atomworks.ml.datasets.StructuralDatasetWrapper
3
+ save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
4
+ cif_parser_args:
5
+ cache_dir: null
6
+ load_from_cache: false
7
+ save_to_cache: false
8
+ dataset:
9
+ _target_: atomworks.ml.datasets.PandasDataset
10
+ # we will use the example_id as the unique column
11
+ id_column: example_id
12
+ transform:
13
+ # common Transform pipeline components for all PDB datasets
14
+ _target_: ${datasets.pipeline_target}
15
+ is_inference: False
16
+ protein_msa_dirs: ${paths.data.protein_msa_dirs}
17
+ rna_msa_dirs: ${paths.data.rna_msa_dirs}
18
+ n_recycles: ${datasets.n_recycles_train}
19
+ crop_size: ${datasets.crop_size}
20
+ n_msa: ${datasets.n_msa}
21
+ diffusion_batch_size: ${datasets.diffusion_batch_size_train}
22
+ max_atoms_in_crop: ${datasets.max_atoms_in_crop}
23
+ run_confidence_head: ${datasets.run_confidence_head}
24
+ p_unconditional: ${datasets.p_unconditional}
25
+ p_dropout_atom_level_embeddings: ${datasets.p_dropout_atom_level_embeddings}
26
+ take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
27
+ use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
28
+ mirror_prob: ${datasets.mirror_prob}
29
+ atomization_prob: ${datasets.atomization_prob}
30
+ ligand_dropout_prob: ${datasets.ligand_dropout_prob}
31
+ add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
32
+ add_cyclic_bonds: ${datasets.add_cyclic_bonds}
@@ -0,0 +1,54 @@
1
+ # PLINDER is a subset of the PDB, so we inherit from the base PDB config
2
+
3
+ defaults:
4
+ - base
5
+
6
+ dataset:
7
+ dataset_parser:
8
+ _target_: atomworks.ml.datasets.parsers.InterfacesDFParser
9
+ base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
10
+ dataset:
11
+ name: plinder
12
+ data: ${paths.data.pdb_data_dir}/interfaces_df_train_plinder.parquet
13
+ filters:
14
+ # filters common across all PDB datasets
15
+ - "deposition_date < '2024-01-01'"
16
+ - "resolution < 9.0"
17
+ - "num_polymer_pn_units <= 300"
18
+ # interface-specific filters
19
+ - "~(pn_unit_1_non_polymer_res_names.notnull() and pn_unit_1_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
20
+ - "~(pn_unit_2_non_polymer_res_names.notnull() and pn_unit_2_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
21
+ columns_to_load:
22
+ # columns common across all PDB datasets
23
+ - example_id
24
+ - pdb_id
25
+ - assembly_id
26
+ - deposition_date
27
+ - resolution
28
+ - num_polymer_pn_units
29
+ - method
30
+ - n_prot
31
+ - n_nuc
32
+ - n_ligand
33
+ - n_peptide
34
+ - total_num_atoms_in_unprocessed_assembly
35
+ # interface specific columns
36
+ - pn_unit_1_iid
37
+ - pn_unit_2_iid
38
+ - pn_unit_1_non_polymer_res_names
39
+ - pn_unit_2_non_polymer_res_names
40
+ - is_inter_molecule
41
+ - all_pn_unit_iids_after_processing
42
+ - involves_loi
43
+ - pli_qcov__50__strong__component
44
+ - pli_qcov__70__strong__component
45
+ - pli_qcov__50__weak__component
46
+ - pli_qcov__70__weak__component
47
+ transform:
48
+ # interface-specific Transform pipeline parameters
49
+ crop_contiguous_probability: 0.0
50
+ crop_spatial_probability: 1.0
51
+
52
+ weights:
53
+ _target_: atomworks.ml.samplers.calculate_weights_by_inverse_cluster_size
54
+ cluster_column: pli_qcov__50__weak__component # Need to ablate
@@ -0,0 +1,51 @@
1
+ defaults:
2
+ - base
3
+ - af3_weighted_sampling
4
+
5
+ dataset:
6
+ dataset_parser:
7
+ _target_: atomworks.ml.datasets.parsers.InterfacesDFParser
8
+ base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
9
+ dataset:
10
+ name: interface
11
+ data: ${paths.data.pdb_data_dir}/interfaces_df_train.parquet
12
+ filters:
13
+ # filters common across all PDB datasets
14
+ - "deposition_date < '2024-01-01'"
15
+ - "resolution < 9.0"
16
+ - "num_polymer_pn_units <= 300"
17
+ - "cluster.notnull()"
18
+ # interface specific filters
19
+ - "~(pn_unit_1_non_polymer_res_names.notnull() and pn_unit_1_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
20
+ - "~(pn_unit_2_non_polymer_res_names.notnull() and pn_unit_2_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
21
+ - "is_inter_molecule"
22
+ columns_to_load:
23
+ # columns common across all PDB datasets
24
+ - example_id
25
+ - pdb_id
26
+ - assembly_id
27
+ - deposition_date
28
+ - resolution
29
+ - num_polymer_pn_units
30
+ - method
31
+ - cluster
32
+ - n_prot
33
+ - n_nuc
34
+ - n_ligand
35
+ - n_peptide
36
+ - total_num_atoms_in_unprocessed_assembly
37
+ # interface specific columns
38
+ - pn_unit_1_iid
39
+ - pn_unit_2_iid
40
+ - pn_unit_1_non_polymer_res_names
41
+ - pn_unit_2_non_polymer_res_names
42
+ - is_inter_molecule
43
+ - all_pn_unit_iids_after_processing
44
+ - involves_loi
45
+ transform:
46
+ # interface-specific Transform pipeline parameters
47
+ crop_contiguous_probability: 0.0
48
+ crop_spatial_probability: 1.0
49
+
50
+ weights:
51
+ beta: 1.0
@@ -0,0 +1,46 @@
1
+ defaults:
2
+ - base
3
+ - af3_weighted_sampling
4
+
5
+ dataset:
6
+ dataset_parser:
7
+ _target_: atomworks.ml.datasets.parsers.PNUnitsDFParser
8
+ base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
9
+ dataset:
10
+ name: pn_unit
11
+ data: ${paths.data.pdb_data_dir}/pn_units_df_train.parquet
12
+ filters:
13
+ # filters common across all PDB datasets
14
+ - "deposition_date < '2024-01-01'"
15
+ - "resolution < 9.0"
16
+ - "num_polymer_pn_units <= 300"
17
+ - "cluster.notnull()"
18
+ # pn_unit specific filters
19
+ - "~(q_pn_unit_non_polymer_res_names.notnull() and q_pn_unit_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
20
+ columns_to_load:
21
+ # columns common across all PDB datasets
22
+ - example_id
23
+ - pdb_id
24
+ - assembly_id
25
+ - deposition_date
26
+ - resolution
27
+ - num_polymer_pn_units
28
+ - method
29
+ - cluster
30
+ - n_prot
31
+ - n_nuc
32
+ - n_ligand
33
+ - n_peptide
34
+ - total_num_atoms_in_unprocessed_assembly
35
+ # pn_unit specific columns
36
+ - q_pn_unit_iid
37
+ - q_pn_unit_non_polymer_res_names
38
+ - all_pn_unit_iids_after_processing
39
+ - q_pn_unit_is_loi
40
+ transform:
41
+ # pn_unit-specific Transform pipeline parameters
42
+ crop_contiguous_probability: 0.3333333333333333
43
+ crop_spatial_probability: 0.6666666666666667
44
+
45
+ weights:
46
+ beta: 0.5
@@ -0,0 +1,56 @@
1
+ # TODO: Inherit from common config with default Transform pipeline
2
+
3
+ rna_monomer_distillation:
4
+ dataset:
5
+ _target_: atomworks.ml.datasets.StructuralDatasetWrapper
6
+ save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
7
+
8
+ # cif parser arguments
9
+ cif_parser_args:
10
+ cache_dir: null
11
+ load_from_cache: False
12
+ save_to_cache: False
13
+
14
+ # metadata parser
15
+ dataset_parser:
16
+ _target_: atomworks.ml.datasets.parsers.GenericDFParser
17
+ pn_unit_iid_colnames: null
18
+
19
+ # metadata dataset
20
+ dataset:
21
+ _target_: atomworks.ml.datasets.PandasDataset
22
+ name: rna_monomer_distillation
23
+ id_column: example_id
24
+ data: /projects/ml/afavor/rna_distillation/rna_distillation_filtered_df.parquet
25
+ columns_to_load:
26
+ - example_id
27
+ - path
28
+ - cluster_id
29
+ - seq_hash
30
+ - overall_plddt
31
+ - overall_pde
32
+ - overall_pae
33
+
34
+ transform:
35
+ _target_: ${datasets.pipeline_target}
36
+ is_inference: False
37
+ protein_msa_dirs: []
38
+ rna_msa_dirs: [{"dir": "/projects/ml/afavor/rna_distillation/all_MSAs_renamed", "extension": ".afa", "directory_depth": 2}]
39
+ n_recycles: ${datasets.n_recycles_train}
40
+ crop_size: ${datasets.crop_size}
41
+ n_msa: ${datasets.n_msa}
42
+ diffusion_batch_size: ${datasets.diffusion_batch_size_train}
43
+ max_atoms_in_crop: ${datasets.max_atoms_in_crop}
44
+ crop_contiguous_probability: 1.0
45
+ crop_spatial_probability: 0.0
46
+ pad_dna_p_skip: 0.0
47
+ b_factor_min: 0.6
48
+ run_confidence_head: ${datasets.run_confidence_head}
49
+ take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
50
+ use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
51
+ mirror_prob: 0.0
52
+ atomization_prob: ${datasets.atomization_prob}
53
+ ligand_dropout_prob: ${datasets.ligand_dropout_prob}
54
+ p_unconditional: ${datasets.p_unconditional}
55
+ p_dropout_atom_level_embeddings: ${datasets.p_dropout_atom_level_embeddings}
56
+ add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
@@ -0,0 +1,11 @@
1
+ defaults:
2
+ - base
3
+
4
+ dataset:
5
+ dataset_parser:
6
+ _target_: atomworks.ml.datasets.parsers.ValidationDFParserLikeAF3
7
+ base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
8
+ dataset:
9
+ _target_: atomworks.ml.datasets.PandasDataset
10
+ name: af3_validation
11
+ data: /net/scratch/rib7/rf3_ab_splits/entry_level_val_df.parquet
@@ -0,0 +1,11 @@
1
+ defaults:
2
+ - base
3
+
4
+ dataset:
5
+ dataset_parser:
6
+ _target_: atomworks.ml.datasets.parsers.ValidationDFParserLikeAF3
7
+ base_dir: /projects/ml/frozen_pdb_copies/2025_07_13_pdb
8
+ dataset:
9
+ _target_: atomworks.ml.datasets.PandasDataset
10
+ name: af3_validation
11
+ data: ${paths.data.pdb_data_dir}/entry_level_val_df.parquet
@@ -0,0 +1,32 @@
1
+ dataset:
2
+ _target_: atomworks.ml.datasets.StructuralDatasetWrapper
3
+ save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
4
+ cif_parser_args:
5
+ cache_dir: null
6
+ load_from_cache: False
7
+ save_to_cache: False
8
+ dataset:
9
+ _target_: atomworks.ml.datasets.PandasDataset
10
+ # we will use the example_id as the unique column
11
+ id_column: example_id
12
+ # return all keys (do not subset)
13
+ transform:
14
+ # common Transform pipeline components for all PDB datasets
15
+ _target_: ${datasets.pipeline_target}
16
+ is_inference: True
17
+ protein_msa_dirs: ${paths.data.protein_msa_dirs}
18
+ rna_msa_dirs: ${paths.data.rna_msa_dirs}
19
+ n_recycles: ${datasets.n_recycles_validation}
20
+ crop_size: null # do not crop for inference
21
+ n_msa: ${datasets.n_msa}
22
+ diffusion_batch_size: ${datasets.diffusion_batch_size_inference}
23
+ max_atoms_in_crop: null # do not crop for inference
24
+ return_atom_array: True # return atom array for inference
25
+ run_confidence_head: ${datasets.run_confidence_head}
26
+ p_unconditional: 1.0 # unconditional for inference, unless explicitly overridden
27
+ p_dropout_atom_level_embeddings: 0.0 # always use embeddings in inference
28
+ take_first_chiral_subordering: ${datasets.take_first_chiral_subordering}
29
+ use_element_for_atom_names_of_atomized_tokens: ${datasets.use_element_for_atom_names_of_atomized_tokens}
30
+ add_residue_is_paired_feature: ${datasets.add_residue_is_paired_feature}
31
+
32
+ key_to_balance: ${datasets.key_to_balance}
@@ -0,0 +1,12 @@
1
+ defaults:
2
+ - base
3
+
4
+ dataset:
5
+ dataset_parser:
6
+ _target_: atomworks.ml.datasets.parsers.ValidationDFParserLikeAF3
7
+ dataset:
8
+ _target_: atomworks.ml.datasets.PandasDataset
9
+ name: af3_validation
10
+ data: /projects/ml/datahub/dfs/af3_splits/2024_12_16/runs_n_poses_entry_level_df.parquet
11
+ filters:
12
+ - "n_tokens_total < 1000" # Subset to reasonably-sized examples for efficiency
@@ -0,0 +1,66 @@
1
+ # @package _global_
2
+
3
+ defaults:
4
+ - override /logger: null
5
+
6
+ # default debugging setup, runs 1 full epoch
7
+ # other debugging configs can inherit from this one
8
+
9
+ # overwrite task name so debugging logs are stored in separate folder
10
+ task_name: "debug"
11
+
12
+ extras:
13
+ ignore_warnings: False
14
+ enforce_tags: False
15
+
16
+ # sets level of all command line loggers to 'DEBUG'
17
+ # https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
18
+ hydra:
19
+ job_logging:
20
+ root:
21
+ level: DEBUG
22
+ # use the below to also set hydra loggers to 'DEBUG'
23
+ verbose: True
24
+
25
+ # Print example ID before forward pass
26
+ callbacks:
27
+ print_example_id_before_forward_pass:
28
+ _target_: foundry.callbacks.train_logging.PrintExampleIDBeforeForwardPassCallback
29
+ timing_logging:
30
+ _target_: foundry.callbacks.timing_logging.TimingCallback
31
+ log_every_n: 5
32
+
33
+ dataloader:
34
+ train:
35
+ dataloader_params:
36
+ batch_size: 1
37
+ num_workers: 0 # debuggers don't like multiprocessing -- work on main thread
38
+ pin_memory: False # disable gpu memory pin
39
+ prefetch_factor: null # must be null for num_workers=0
40
+ n_fallback_retries: 0 # disable fallback retries for debugging
41
+
42
+ val:
43
+ dataloader_params:
44
+ batch_size: 1
45
+ num_workers: 0
46
+ pin_memory: False
47
+ prefetch_factor: null # must be null for num_workers=0
48
+
49
+ datasets:
50
+ crop_size: 100 # set small crop size for quick debugging
51
+ diffusion_batch_size_train: 1
52
+ diffusion_batch_size_inference: 2
53
+ n_recycles_train: 1
54
+ n_recycles_validation: 1
55
+ n_msa: 128
56
+ key_to_balance: null # otherwise big examples will be processed first
57
+
58
+ trainer:
59
+ devices_per_node: 1
60
+ limit_train_batches: 2
61
+ limit_val_batches: 1
62
+ validate_every_n_epochs: 1
63
+
64
+ # Set tags to help identify debugging runs
65
+ tags:
66
+ - debug
@@ -0,0 +1,21 @@
1
+ # @package _global_
2
+
3
+ # See: https://hydra.cc/docs/patterns/configuring_experiments/
4
+
5
+ # to execute this experiment run:
6
+ # python train.py +debug=train_single_example [any other arguments]
7
+
8
+ defaults:
9
+ - default
10
+ - gpu
11
+
12
+ datasets:
13
+ # you can add specific example IDs here to load a subset of the dataset (only training supported; PR's welcome to generalize to validation)
14
+ subset_to_example_ids:
15
+ - "{['pdb', 'pn_units']}{3px1}{1}{['A_3']}"
16
+ val: null
17
+
18
+ tags:
19
+ - debug
20
+ - train
21
+ - specific-examples
@@ -0,0 +1,50 @@
1
+ # @package _global_
2
+
3
+ name: rf3
4
+ project: rf3
5
+
6
+ tags:
7
+ # list of tags to add to the run ( & on wandb to easily find & filter runs)
8
+ - full
9
+
10
+ defaults:
11
+ - override /datasets: pdb_and_distillation
12
+ - override /model: rf3
13
+ - override /trainer: rf3
14
+
15
+ ckpt_config:
16
+ _target_: foundry.utils.weights.CheckpointConfig
17
+ path: /net/software/containers/versions/modelhub_inference/ckpts/rf3-w-conf-run10-ep922-remapped.ckpt
18
+ reset_optimizer: true
19
+
20
+ model:
21
+ lr_scheduler:
22
+ base_lr: 0.9e-3 # 1/2 of original learning rate (1.8e-3)
23
+ net:
24
+ feature_initializer:
25
+ input_feature_embedder:
26
+ atom_attention_encoder:
27
+ c_atom_1d_features: 393 # 392 + 1 has_atom_level_embedding = 393
28
+ atom_1d_features:
29
+ - ref_pos
30
+ - ref_charge
31
+ - ref_mask
32
+ - ref_element
33
+ - ref_atom_name_chars
34
+ - ref_pos_ground_truth
35
+ - has_atom_level_embedding
36
+ use_atom_level_embedding: true
37
+ atom_level_embedding_dim: 384
38
+ diffusion_module:
39
+ atom_attention_encoder:
40
+ c_atom_1d_features: 393 # 392 + 1 has_atom_level_embedding = 393
41
+ atom_1d_features:
42
+ - ref_pos
43
+ - ref_charge
44
+ - ref_mask
45
+ - ref_element
46
+ - ref_atom_name_chars
47
+ - ref_pos_ground_truth
48
+ - has_atom_level_embedding
49
+ use_atom_level_embedding: true
50
+ atom_level_embedding_dim: 384
@@ -0,0 +1,13 @@
1
+ # @package _global_
2
+
3
+ name: rf3-with-confidence
4
+
5
+ # For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/
6
+ defaults:
7
+ - pretrained/rf3
8
+ - override /model: rf3_with_confidence
9
+ - override /trainer: rf3_with_confidence
10
+ - _self_
11
+
12
+ datasets:
13
+ run_confidence_head: true
@@ -0,0 +1,15 @@
1
+ # @package _global_
2
+
3
+ # Experiment that loads a small dataset for quick testing
4
+
5
+ name: quick-rf3-with-confidence
6
+
7
+ # For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/
8
+ defaults:
9
+ - quick-rf3
10
+ - override /model: rf3_with_confidence
11
+ - override /trainer: rf3_with_confidence
12
+ - _self_
13
+
14
+ datasets:
15
+ run_confidence_head: true
@@ -0,0 +1,61 @@
1
+ # @package _global_
2
+
3
+ # Experiment that loads a small dataset for quick testing
4
+
5
+ name: quick-rf3
6
+
7
+ # For explanation of the "override" syntax, see: https://hydra.cc/docs/upgrades/1.0_to_1.1/defaults_list_override/
8
+ defaults:
9
+ - pretrained/rf3
10
+ - override /datasets: pdb_only
11
+
12
+ tags:
13
+ # list of tags to add to the run ( & on wandb to easily find & filter runs)
14
+ - quick
15
+
16
+ project: test
17
+
18
+ paths:
19
+ data:
20
+ pdb_data_dir: /projects/ml/datahub/dfs/af3_splits/2024_12_16
21
+
22
+ trainer:
23
+ limit_train_batches: 4
24
+ limit_val_batches: 4
25
+
26
+ datasets:
27
+ train:
28
+ pdb:
29
+ # We must adjust the probability, since we set the monomer distillation dataset to null
30
+ probability: 1.0
31
+ sub_datasets:
32
+ interface:
33
+ dataset:
34
+ dataset:
35
+ # A small dataframe that loads quickly
36
+ data: /projects/ml/datahub/dfs/pdb/test_dfs/interfaces_df.parquet
37
+ filters:
38
+ - "total_num_atoms_in_unprocessed_assembly <= 3000"
39
+ - "cluster.notnull()"
40
+ pn_unit:
41
+ dataset:
42
+ dataset:
43
+ # A small dataframe that loads quickly
44
+ data: /projects/ml/datahub/dfs/pdb/test_dfs/pn_units_df.parquet
45
+ filters:
46
+ - "total_num_atoms_in_unprocessed_assembly <= 3000"
47
+ - "cluster.notnull()"
48
+ val:
49
+ af3_validation:
50
+ dataset:
51
+ dataset:
52
+ filters:
53
+ - "n_tokens_total < 500"
54
+ # (We often want to debug with a ligand)
55
+ - "interfaces_to_score.str.contains('protein-ligand')"
56
+ - example_id in ["{['validation']}{7psi}{1}{[]}", "{['validation']}{7lo1}{2}{[]}", "{['validation']}{6zg9}{1}{[]}", "{['validation']}{7vhy}{1}{[]}"]
57
+
58
+ model:
59
+ net:
60
+ inference_sampler:
61
+ num_timesteps: 50
@@ -0,0 +1,18 @@
1
+ # https://hydra.cc/docs/configure_hydra/intro/
2
+
3
+ # enable color logging (requires `colorlog` to be installed)
4
+ # defaults:
5
+ # - override hydra_logging: colorlog
6
+ # - override job_logging: colorlog
7
+
8
+
9
+ # output directory, generated dynamically on each run
10
+ run:
11
+ dir: ${paths.log_dir}/${task_name}/${name}/${now:%Y-%m-%d}_${now:%H-%M}_JOB_${oc.env:SLURM_JOB_ID,default}
12
+
13
+ # ... this is where the log file is written (i.e. the programs output)
14
+ job_logging:
15
+ handlers:
16
+ file:
17
+ # Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
18
+ filename: ${hydra.runtime.output_dir}/experiment.log
@@ -0,0 +1,7 @@
1
+ defaults:
2
+ - override job_logging: disabled
3
+ - override hydra_logging: disabled
4
+
5
+ output_subdir: null
6
+ run:
7
+ dir: .
@@ -0,0 +1,7 @@
1
+ # @package _global_
2
+ # ^ The "package" determines where the content of the config is placed in the output config
3
+ # For more information about overriding configs, see: https://hydra.cc/docs/advanced/overriding_packages/#overriding-packages-using-the-defaults-list
4
+
5
+ defaults:
6
+ - inference_engine: rf3
7
+ - _self_
@@ -0,0 +1,23 @@
1
+ # @package _global_
2
+
3
+ defaults:
4
+ - /hydra: no_logging
5
+
6
+ # Parameters for RF3InferenceEngine.__init__()
7
+ ckpt_path: ???
8
+ num_nodes: 1
9
+ devices_per_node: 1
10
+ compress_outputs: false
11
+
12
+ # Parameters for RF3InferenceEngine.run()
13
+ inputs: ???
14
+ out_dir: ???
15
+ dump_predictions: true
16
+ dump_trajectories: false
17
+ one_model_per_file: false
18
+ annotate_b_factor_with_plddt: true
19
+ sharding_pattern: null
20
+ skip_existing: false
21
+ template_selection: null
22
+ ground_truth_conformer_selection: null
23
+ cyclic_chains: []