EntDetect 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- EntDetect/Jwalk/GridTools.py +567 -0
- EntDetect/Jwalk/PDBTools.py +532 -0
- EntDetect/Jwalk/SASDTools.py +543 -0
- EntDetect/Jwalk/SurfaceTools.py +150 -0
- EntDetect/Jwalk/__init__.py +19 -0
- EntDetect/Jwalk/naccess.config.txt +255 -0
- EntDetect/__init__.py +10 -0
- EntDetect/_logging.py +71 -0
- EntDetect/change_resolution.py +2361 -0
- EntDetect/clustering.py +2626 -0
- EntDetect/compare_sim2exp.py +1927 -0
- EntDetect/entanglement_features.py +478 -0
- EntDetect/gaussian_entanglement.py +2067 -0
- EntDetect/order_params.py +1048 -0
- EntDetect/resources/__init__.py +11 -0
- EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
- EntDetect/resources/calc_K.pl +712 -0
- EntDetect/resources/calc_Q.pl +962 -0
- EntDetect/resources/pulchra +0 -0
- EntDetect/resources/shared_files/__init__.py +2 -0
- EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
- EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
- EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
- EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
- EntDetect/resources/stride +0 -0
- EntDetect/statistics.py +1344 -0
- EntDetect/utilities.py +201 -0
- entdetect-1.2.0.dist-info/METADATA +26 -0
- entdetect-1.2.0.dist-info/RECORD +45 -0
- entdetect-1.2.0.dist-info/WHEEL +5 -0
- entdetect-1.2.0.dist-info/entry_points.txt +11 -0
- entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
- entdetect-1.2.0.dist-info/top_level.txt +2 -0
- scripts/__init__.py +5 -0
- scripts/convert_cor_psf_to_pdb.py +103 -0
- scripts/run_Foldingpathway.py +162 -0
- scripts/run_MSM.py +152 -0
- scripts/run_OP_on_simulation_traj.py +194 -0
- scripts/run_change_resolution.py +63 -0
- scripts/run_compare_sim2exp.py +215 -0
- scripts/run_montecarlo.py +158 -0
- scripts/run_nativeNCLE.py +179 -0
- scripts/run_nonnative_entanglement_clustering.py +110 -0
- scripts/run_population_modeling.py +117 -0
- scripts/run_workflow4_nativeNCLE_batch.py +412 -0
scripts/__init__.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Convert CHARMM COR and PSF files to PDB format.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python scripts/convert_cor_psf_to_pdb.py --cor structure.cor --psf structure.psf --output structure.pdb
|
|
7
|
+
|
|
8
|
+
This script uses MDAnalysis to read CHARMM coordinate (.cor/.crd) and topology (.psf) files
|
|
9
|
+
and writes them as a single PDB file that can be used with EntDetect analysis tools.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
def convert_cor_psf_to_pdb(cor_file, psf_file, output_pdb):
|
|
17
|
+
"""
|
|
18
|
+
Convert CHARMM COR and PSF files to PDB format.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
cor_file : str
|
|
23
|
+
Path to CHARMM coordinate file (.cor or .crd)
|
|
24
|
+
psf_file : str
|
|
25
|
+
Path to CHARMM PSF topology file (.psf)
|
|
26
|
+
output_pdb : str
|
|
27
|
+
Path to output PDB file
|
|
28
|
+
"""
|
|
29
|
+
try:
|
|
30
|
+
import MDAnalysis as mda
|
|
31
|
+
except ImportError:
|
|
32
|
+
print("Error: MDAnalysis is required for this conversion.")
|
|
33
|
+
print("Install with: pip install MDAnalysis")
|
|
34
|
+
sys.exit(1)
|
|
35
|
+
|
|
36
|
+
# Validate input files exist
|
|
37
|
+
if not os.path.exists(cor_file):
|
|
38
|
+
print(f"Error: COR file not found: {cor_file}")
|
|
39
|
+
sys.exit(1)
|
|
40
|
+
if not os.path.exists(psf_file):
|
|
41
|
+
print(f"Error: PSF file not found: {psf_file}")
|
|
42
|
+
sys.exit(1)
|
|
43
|
+
|
|
44
|
+
print(f"Reading topology from: {psf_file}")
|
|
45
|
+
print(f"Reading coordinates from: {cor_file}")
|
|
46
|
+
|
|
47
|
+
# Load the structure with PSF topology and COR coordinates
|
|
48
|
+
u = mda.Universe(psf_file, cor_file)
|
|
49
|
+
|
|
50
|
+
print(f"Loaded structure with {len(u.atoms)} atoms")
|
|
51
|
+
|
|
52
|
+
# Set chainID to 'A' for all atoms if not already set
|
|
53
|
+
if not hasattr(u.atoms, 'chainIDs') or all(c == '' or c == 'X' for c in u.atoms.chainIDs):
|
|
54
|
+
print("Setting chainID to 'A' for all atoms")
|
|
55
|
+
u.add_TopologyAttr('chainIDs', ['A'] * len(u.atoms))
|
|
56
|
+
|
|
57
|
+
# Write to PDB format
|
|
58
|
+
print(f"Writing PDB to: {output_pdb}")
|
|
59
|
+
u.atoms.write(output_pdb)
|
|
60
|
+
|
|
61
|
+
print("Conversion complete!")
|
|
62
|
+
return output_pdb
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def main(argv=None):
|
|
66
|
+
parser = argparse.ArgumentParser(
|
|
67
|
+
description="Convert CHARMM COR/PSF files to PDB format",
|
|
68
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
69
|
+
epilog="""
|
|
70
|
+
Examples:
|
|
71
|
+
convert_cor_psf_to_pdb --cor model.cor --psf model.psf --output model.pdb
|
|
72
|
+
convert_cor_psf_to_pdb --cor 1zmr_ca.crd --psf 1zmr_ca.psf --output 1zmr_ca.pdb
|
|
73
|
+
""",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--cor",
|
|
78
|
+
"--crd",
|
|
79
|
+
type=str,
|
|
80
|
+
required=True,
|
|
81
|
+
help="Input CHARMM coordinate file (.cor or .crd)",
|
|
82
|
+
)
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--psf",
|
|
85
|
+
type=str,
|
|
86
|
+
required=True,
|
|
87
|
+
help="Input CHARMM PSF topology file (.psf)",
|
|
88
|
+
)
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"--output",
|
|
91
|
+
"-o",
|
|
92
|
+
type=str,
|
|
93
|
+
required=True,
|
|
94
|
+
help="Output PDB file",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
args = parser.parse_args(argv)
|
|
98
|
+
convert_cor_psf_to_pdb(args.cor, args.psf, args.output)
|
|
99
|
+
return 0
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
if __name__ == "__main__":
|
|
103
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from EntDetect.statistics import FoldingPathwayStats
|
|
2
|
+
from EntDetect._logging import setup_logger
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Compute folding pathway statistics and Jensen-Shannon divergence from an MSM
|
|
7
|
+
trajectory-type-annotated mapping CSV produced by run_MSM.py.
|
|
8
|
+
|
|
9
|
+
The input --msm_data_file must be a CSV with the columns produced by run_MSM.py
|
|
10
|
+
(traj, frame, microstate, metastablestate, Q, G, StateSample) plus a user-added
|
|
11
|
+
trajectory-type column (--traj_type_col) that labels each trajectory as belonging
|
|
12
|
+
to one of the types in --traj_type_list (e.g. 'A' for folded, 'B' for unfolded).
|
|
13
|
+
|
|
14
|
+
This classification is typically added by the user based on a Q threshold, e.g.:
|
|
15
|
+
df['traj_type_A80%Native'] = df.groupby('traj')['Q'].transform('max').ge(0.80).map({True:'A', False:'B'})
|
|
16
|
+
|
|
17
|
+
Examples
|
|
18
|
+
--------
|
|
19
|
+
Basic run — two trajectory types, no exclusions:
|
|
20
|
+
python scripts/run_Foldingpathway.py \\
|
|
21
|
+
--msm_data_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_MSMmapping_A80pctNative.csv \\
|
|
22
|
+
--meta_set_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_meta_set.csv \\
|
|
23
|
+
--traj_type_col traj_type_A80pctNative \\
|
|
24
|
+
--traj_type_list A B \\
|
|
25
|
+
--outdir $DATASTORE/outputs/workflow2/FoldingPathway_A80pctNative
|
|
26
|
+
|
|
27
|
+
Excluding mirror-image trajectories identified in Step 4:
|
|
28
|
+
python scripts/run_Foldingpathway.py \\
|
|
29
|
+
--msm_data_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_MSMmapping_A80pctNative.csv \\
|
|
30
|
+
--meta_set_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_meta_set.csv \\
|
|
31
|
+
--traj_type_col traj_type_A80pctNative \\
|
|
32
|
+
--traj_type_list A B \\
|
|
33
|
+
--outdir $DATASTORE/outputs/workflow2/FoldingPathway_A80pctNative \\
|
|
34
|
+
--rm_traj_list 65 75 155 162
|
|
35
|
+
|
|
36
|
+
Flags
|
|
37
|
+
-----
|
|
38
|
+
--msm_data_file CSV produced by run_MSM.py, annotated with a trajectory-type column
|
|
39
|
+
--meta_set_file meta_set CSV produced by run_MSM.py (microstates per metastable state)
|
|
40
|
+
--traj_type_col Column name in msm_data_file that contains trajectory-type labels
|
|
41
|
+
--traj_type_list Space-separated list of trajectory-type labels to compare (default: A B)
|
|
42
|
+
--outdir Output directory for folding pathway and JS-divergence results
|
|
43
|
+
--rm_traj_list Trajectory numbers to exclude (e.g. confirmed mirror conformations)
|
|
44
|
+
--n_window Rolling window size for state probability smoothing (default: 200)
|
|
45
|
+
--n_traj Total number of trajectories in the ensemble (default: 1000)
|
|
46
|
+
--state_type State level to analyse: metastablestate or microstate (default: metastablestate)
|
|
47
|
+
--log_level Logging verbosity: DEBUG, INFO, WARNING, ERROR (default: INFO)
|
|
48
|
+
--logdir Directory for log file (default: same as --outdir)
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def main(argv=None):
|
|
53
|
+
|
|
54
|
+
###---------------------------------------------------------------------------------------------------------
|
|
55
|
+
import sys, os
|
|
56
|
+
import argparse
|
|
57
|
+
import time
|
|
58
|
+
import logging
|
|
59
|
+
start_time = time.time()
|
|
60
|
+
###---------------------------------------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
###---------------------------------------------------------------------------------------------------------
|
|
63
|
+
parser = argparse.ArgumentParser(
|
|
64
|
+
description="Compute folding pathway statistics and Jensen-Shannon divergence from MSM output.")
|
|
65
|
+
|
|
66
|
+
# --- IO ---
|
|
67
|
+
parser.add_argument("--msm_data_file", type=str, required=True, help="CSV produced by run_MSM.py, annotated with a trajectory-type column")
|
|
68
|
+
parser.add_argument("--meta_set_file", type=str, required=True, help="meta_set CSV produced by run_MSM.py")
|
|
69
|
+
parser.add_argument("--outdir", type=str, required=True, help="Output directory for folding pathway and JS-divergence results")
|
|
70
|
+
|
|
71
|
+
# --- trajectory classification ---
|
|
72
|
+
parser.add_argument("--traj_type_col", type=str, required=True, help="Column name in msm_data_file containing trajectory-type labels")
|
|
73
|
+
parser.add_argument("--traj_type_list", type=str, nargs='+', default=['A', 'B'], help="Trajectory-type labels to compare (default: A B)")
|
|
74
|
+
|
|
75
|
+
# --- trajectory filtering ---
|
|
76
|
+
parser.add_argument("--rm_traj_list", type=int, nargs='+', default=[], help="Trajectory numbers to exclude (e.g. confirmed mirror conformations)")
|
|
77
|
+
|
|
78
|
+
# --- analysis settings ---
|
|
79
|
+
parser.add_argument("--n_window", type=int, default=200, help="Rolling window size for state probability smoothing (default: 200)")
|
|
80
|
+
parser.add_argument("--n_traj", type=int, default=1000, help="Total number of trajectories in the ensemble (default: 1000)")
|
|
81
|
+
parser.add_argument("--state_type", type=str, default='metastablestate',
|
|
82
|
+
choices=['metastablestate', 'microstate'], help="State level to analyse (default: metastablestate)")
|
|
83
|
+
|
|
84
|
+
# --- logging ---
|
|
85
|
+
parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging verbosity (default: INFO)")
|
|
86
|
+
parser.add_argument("--logdir", type=str, default=None, help="Directory for log file (default: same as --outdir)")
|
|
87
|
+
|
|
88
|
+
args = parser.parse_args(argv)
|
|
89
|
+
|
|
90
|
+
outdir = args.outdir
|
|
91
|
+
###---------------------------------------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
###---------------------------------------------------------------------------------------------------------
|
|
94
|
+
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
|
|
95
|
+
logdir = args.logdir if args.logdir is not None else outdir
|
|
96
|
+
|
|
97
|
+
os.makedirs(outdir, exist_ok=True)
|
|
98
|
+
|
|
99
|
+
logger = setup_logger('run_Foldingpathway', outdir=logdir, ID='FoldingPathwayStats', log_level=log_level)
|
|
100
|
+
setup_logger('FoldingPathwayStats', outdir=logdir, ID='FoldingPathwayStats', log_level=log_level)
|
|
101
|
+
logger.info(f'args: {args}')
|
|
102
|
+
###---------------------------------------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
###---------------------------------------------------------------------------------------------------------
|
|
105
|
+
# --- input validation ---
|
|
106
|
+
if not os.path.isfile(args.msm_data_file):
|
|
107
|
+
parser.error(f"--msm_data_file does not exist: {args.msm_data_file}")
|
|
108
|
+
|
|
109
|
+
if not os.path.isfile(args.meta_set_file):
|
|
110
|
+
parser.error(f"--meta_set_file does not exist: {args.meta_set_file}")
|
|
111
|
+
###---------------------------------------------------------------------------------------------------------
|
|
112
|
+
|
|
113
|
+
###---------------------------------------------------------------------------------------------------------
|
|
114
|
+
# Load MSM data and validate the trajectory-type column
|
|
115
|
+
logger.info(f'Loading MSM data from {args.msm_data_file}')
|
|
116
|
+
msm_data = pd.read_csv(args.msm_data_file)
|
|
117
|
+
logger.info(f'msm_data shape: {msm_data.shape}, columns: {msm_data.columns.tolist()}')
|
|
118
|
+
|
|
119
|
+
if args.traj_type_col not in msm_data.columns:
|
|
120
|
+
parser.error(
|
|
121
|
+
f"--traj_type_col '{args.traj_type_col}' not found in {args.msm_data_file}. "
|
|
122
|
+
f"Available columns: {msm_data.columns.tolist()}"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
present_types = set(msm_data[args.traj_type_col].unique())
|
|
126
|
+
missing_types = [t for t in args.traj_type_list if t not in present_types]
|
|
127
|
+
if missing_types:
|
|
128
|
+
parser.error(
|
|
129
|
+
f"--traj_type_list values {missing_types} not found in column '{args.traj_type_col}'. "
|
|
130
|
+
f"Values present: {sorted(present_types)}"
|
|
131
|
+
)
|
|
132
|
+
###---------------------------------------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
###---------------------------------------------------------------------------------------------------------
|
|
135
|
+
FP = FoldingPathwayStats(
|
|
136
|
+
msm_data=msm_data,
|
|
137
|
+
meta_set_file=args.meta_set_file,
|
|
138
|
+
tarj_type_col=args.traj_type_col,
|
|
139
|
+
traj_type_list=args.traj_type_list,
|
|
140
|
+
outdir=outdir,
|
|
141
|
+
n_window=args.n_window,
|
|
142
|
+
n_traj=args.n_traj,
|
|
143
|
+
state_type=args.state_type,
|
|
144
|
+
rm_traj_list=args.rm_traj_list,
|
|
145
|
+
log_level=log_level,
|
|
146
|
+
logdir=logdir,
|
|
147
|
+
)
|
|
148
|
+
logger.info(f'FoldingPathwayStats: {FP}')
|
|
149
|
+
|
|
150
|
+
folding_pathways = FP.post_trans()
|
|
151
|
+
logger.info(f'folding_pathways:\n{folding_pathways}')
|
|
152
|
+
|
|
153
|
+
JS_divergence = FP.JS_divergence()
|
|
154
|
+
logger.info(f'JS_divergence:\n{JS_divergence}')
|
|
155
|
+
###---------------------------------------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
logger.info(f'NORMAL TERMINATION - {time.time() - start_time:.1f} seconds')
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
raise SystemExit(main())
|
scripts/run_MSM.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
from EntDetect.clustering import MSMNonNativeEntanglementClustering
|
|
2
|
+
from EntDetect._logging import setup_logger
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Build a Markov state model (MSM) from pre-computed order-parameter data across
|
|
6
|
+
an ensemble of simulation trajectories.
|
|
7
|
+
|
|
8
|
+
Reads Q and G order-parameter files from --OPpath (which must contain Q/ and G/
|
|
9
|
+
subdirectories produced by run_OP_on_simulation_traj.py), groups frames into
|
|
10
|
+
microstates via k-means clustering, and then coarse-grains microstates into
|
|
11
|
+
metastable macro-states using PCCA+.
|
|
12
|
+
|
|
13
|
+
Examples
|
|
14
|
+
--------
|
|
15
|
+
Basic run — 10 metastable states, lag time 20:
|
|
16
|
+
python scripts/run_MSM.py \\
|
|
17
|
+
--outdir $DATASTORE/outputs/workflow2/MSM \\
|
|
18
|
+
--ID 1ZMR_prod \\
|
|
19
|
+
--OPpath $DATASTORE/outputs/workflow2/OP_demo/ \\
|
|
20
|
+
--start 0 \\
|
|
21
|
+
--n_large_states 10 \\
|
|
22
|
+
--lagtime 20
|
|
23
|
+
|
|
24
|
+
Excluding mirror-image trajectories (identified in Step 10):
|
|
25
|
+
python scripts/run_MSM.py \\
|
|
26
|
+
--outdir $DATASTORE/outputs/workflow2/MSM \\
|
|
27
|
+
--ID 1ZMR_prod \\
|
|
28
|
+
--OPpath $DATASTORE/outputs/workflow2/OP_demo/ \\
|
|
29
|
+
--start 0 \\
|
|
30
|
+
--n_large_states 10 \\
|
|
31
|
+
--lagtime 20 \\
|
|
32
|
+
--rm_traj_list 65 75 155 162
|
|
33
|
+
|
|
34
|
+
Flags
|
|
35
|
+
-----
|
|
36
|
+
--outdir Output directory for MSM results
|
|
37
|
+
--OPpath Directory containing Q/ and G/ subdirectories of per-trajectory OP files
|
|
38
|
+
--ID Base name for output files
|
|
39
|
+
--start First frame index to include, 0-based (default: 0)
|
|
40
|
+
--end Last frame index to include, 0-based (default: all frames)
|
|
41
|
+
--stride Frame stride for loading OP data (default: 1)
|
|
42
|
+
--n_large_states Number of metastable macro-states requested from PCCA+ (default: 10)
|
|
43
|
+
--n_small_states Number of inactive micro-state clusters (default: 1)
|
|
44
|
+
--n_cluster Number of k-means microstates (default: 400)
|
|
45
|
+
--kmean_stride Frame stride used during k-means clustering (default: 2)
|
|
46
|
+
--lagtime MSM lag time in frames (default: 20)
|
|
47
|
+
--dt MD timestep in ns, used for time-axis labelling (default: 1.5e-5)
|
|
48
|
+
--ITS Run implied timescale analysis to validate lag time: True/False (default: False)
|
|
49
|
+
--rm_traj_list Trajectory numbers to exclude (e.g. confirmed mirror conformations)
|
|
50
|
+
--log_level Logging verbosity: DEBUG, INFO, WARNING, ERROR (default: INFO)
|
|
51
|
+
--logdir Directory for log file (default: same as --outdir)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main(argv=None):
|
|
56
|
+
|
|
57
|
+
###---------------------------------------------------------------------------------------------------------
|
|
58
|
+
import sys, os
|
|
59
|
+
import argparse
|
|
60
|
+
import time
|
|
61
|
+
import logging
|
|
62
|
+
start_time = time.time()
|
|
63
|
+
###---------------------------------------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
###---------------------------------------------------------------------------------------------------------
|
|
66
|
+
parser = argparse.ArgumentParser(
|
|
67
|
+
description="Build a Markov state model from pre-computed order-parameter trajectories.")
|
|
68
|
+
|
|
69
|
+
# --- identity / IO ---
|
|
70
|
+
parser.add_argument("--outdir", type=str, required=True, help="Output directory for MSM results")
|
|
71
|
+
parser.add_argument("--OPpath", type=str, required=True, help="Directory containing Q/ and G/ subdirectories of per-trajectory OP files")
|
|
72
|
+
parser.add_argument("--ID", type=str, required=True, help="Base name for output files")
|
|
73
|
+
|
|
74
|
+
# --- frame selection ---
|
|
75
|
+
parser.add_argument("--start", type=int, default=0, help="First frame index to include, 0-based (default: 0)")
|
|
76
|
+
parser.add_argument("--end", type=int, default=99999999999, help="Last frame index to include, 0-based (default: all frames)")
|
|
77
|
+
parser.add_argument("--stride", type=int, default=1, help="Frame stride for loading OP data (default: 1)")
|
|
78
|
+
|
|
79
|
+
# --- MSM settings ---
|
|
80
|
+
parser.add_argument("--n_large_states", type=int, default=10, help="Number of metastable macro-states requested from PCCA+ (default: 10)")
|
|
81
|
+
parser.add_argument("--n_small_states", type=int, default=1, help="Number of inactive micro-state clusters (default: 1)")
|
|
82
|
+
parser.add_argument("--n_cluster", type=int, default=400, help="Number of k-means microstates (default: 400)")
|
|
83
|
+
parser.add_argument("--kmean_stride", type=int, default=2, help="Frame stride used during k-means clustering (default: 2)")
|
|
84
|
+
parser.add_argument("--lagtime", type=int, default=20, help="MSM lag time in frames (default: 20)")
|
|
85
|
+
parser.add_argument("--dt", type=float, default=0.015/1000, help="MD timestep in ns (default: 1.5e-5)")
|
|
86
|
+
parser.add_argument("--ITS", type=str, default='False', help="Run implied timescale analysis: True/False (default: False)")
|
|
87
|
+
|
|
88
|
+
# --- trajectory filtering ---
|
|
89
|
+
parser.add_argument("--rm_traj_list", type=int, nargs='+', default=[], help="Trajectory numbers to exclude (e.g. confirmed mirror conformations)")
|
|
90
|
+
|
|
91
|
+
# --- logging ---
|
|
92
|
+
parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging verbosity (default: INFO)")
|
|
93
|
+
parser.add_argument("--logdir", type=str, default=None, help="Directory for log file (default: same as --outdir)")
|
|
94
|
+
|
|
95
|
+
args = parser.parse_args(argv)
|
|
96
|
+
|
|
97
|
+
outdir = args.outdir
|
|
98
|
+
OPpath = args.OPpath
|
|
99
|
+
ID = args.ID
|
|
100
|
+
###---------------------------------------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
###---------------------------------------------------------------------------------------------------------
|
|
103
|
+
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
|
|
104
|
+
logdir = args.logdir if args.logdir is not None else outdir
|
|
105
|
+
|
|
106
|
+
logger = setup_logger('run_MSM', outdir=logdir, ID=ID, log_level=log_level)
|
|
107
|
+
setup_logger('MSMNonNativeEntanglementClustering', outdir=logdir, ID=ID, log_level=log_level)
|
|
108
|
+
logger.info(f'args: {args}')
|
|
109
|
+
###---------------------------------------------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
###---------------------------------------------------------------------------------------------------------
|
|
112
|
+
# --- input validation ---
|
|
113
|
+
if not os.path.isdir(OPpath):
|
|
114
|
+
parser.error(f"--OPpath does not exist or is not a directory: {OPpath}")
|
|
115
|
+
|
|
116
|
+
for subdir in ('Q', 'G'):
|
|
117
|
+
expected = os.path.join(OPpath, subdir)
|
|
118
|
+
if not os.path.isdir(expected):
|
|
119
|
+
parser.error(f"Expected subdirectory not found in --OPpath: {expected}")
|
|
120
|
+
|
|
121
|
+
os.makedirs(outdir, exist_ok=True)
|
|
122
|
+
###---------------------------------------------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
###---------------------------------------------------------------------------------------------------------
|
|
125
|
+
MSM = MSMNonNativeEntanglementClustering(
|
|
126
|
+
outdir=outdir,
|
|
127
|
+
ID=ID,
|
|
128
|
+
OPpath=OPpath,
|
|
129
|
+
start=args.start,
|
|
130
|
+
end=args.end,
|
|
131
|
+
stride=args.stride,
|
|
132
|
+
n_large_states=args.n_large_states,
|
|
133
|
+
n_small_states=args.n_small_states,
|
|
134
|
+
n_cluster=args.n_cluster,
|
|
135
|
+
kmean_stride=args.kmean_stride,
|
|
136
|
+
lagtime=args.lagtime,
|
|
137
|
+
dt=args.dt,
|
|
138
|
+
ITS=args.ITS,
|
|
139
|
+
rm_traj_list=args.rm_traj_list,
|
|
140
|
+
log_level=log_level,
|
|
141
|
+
logdir=logdir,
|
|
142
|
+
)
|
|
143
|
+
logger.info(f'MSMNonNativeEntanglementClustering: {MSM}')
|
|
144
|
+
MSM.run()
|
|
145
|
+
###---------------------------------------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
logger.info(f'NORMAL TERMINATION - {time.time() - start_time:.1f} seconds')
|
|
148
|
+
return 0
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
if __name__ == "__main__":
|
|
152
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from EntDetect.order_params import CalculateOP
|
|
2
|
+
from EntDetect._logging import setup_logger
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Calculate any combination of order parameters on CG and/or all-atom trajectories.
|
|
6
|
+
|
|
7
|
+
Available OPs: Q G K SASA XP
|
|
8
|
+
Q — fraction of native contacts
|
|
9
|
+
G — fraction of native contacts with a change of entanglement (+ entanglement features)
|
|
10
|
+
K — mirror symmetry order parameter
|
|
11
|
+
SASA — solvent accessible surface area (requires all-atom trajectory)
|
|
12
|
+
XP — Jwalk cross-link probability (requires all-atom trajectory + --xp_pdb)
|
|
13
|
+
|
|
14
|
+
For SASA/XP the all-atom trajectory is used; set --resolution aa and supply the
|
|
15
|
+
AA topology and DCD as --PSF and --DCD.
|
|
16
|
+
|
|
17
|
+
Examples
|
|
18
|
+
--------
|
|
19
|
+
CG — Q, G, K only:
|
|
20
|
+
python scripts/run_OP_on_simulation_traj.py \\
|
|
21
|
+
--Traj 420 --ID 1ZMR \\
|
|
22
|
+
--PSF $REFSTRUCT/1zmr_model_clean_ca.psf \\
|
|
23
|
+
--COR $REFSTRUCT/1zmr_model_clean_ca.cor \\
|
|
24
|
+
--DCD $DATASTORE/cg_trajectories/420_prod.dcd \\
|
|
25
|
+
--sec_elements $REFSTRUCT/secondary_struc_defs.txt \\
|
|
26
|
+
--domain $REFSTRUCT/domain_def.dat \\
|
|
27
|
+
--outdir $DATASTORE/outputs/OP_demo \\
|
|
28
|
+
--ops Q G K
|
|
29
|
+
|
|
30
|
+
AA trajectory — SASA and XP only:
|
|
31
|
+
python scripts/run_OP_on_simulation_traj.py \\
|
|
32
|
+
--Traj 420 --ID 1ZMR \\
|
|
33
|
+
--PSF $REFSTRUCT/1zmr_model_clean.pdb \\
|
|
34
|
+
--DCD $DATASTORE/aa_trajectories/420_prod_aa.dcd \\
|
|
35
|
+
--resolution aa \\
|
|
36
|
+
--outdir $DATASTORE/outputs/OP_demo_AA \\
|
|
37
|
+
--ops SASA XP \\
|
|
38
|
+
--xp_pdb $REFSTRUCT/1zmr_model_clean.pdb
|
|
39
|
+
|
|
40
|
+
Flags
|
|
41
|
+
-----
|
|
42
|
+
--Traj Trajectory number (used in output filenames)
|
|
43
|
+
--ID Base name for output files
|
|
44
|
+
--PSF Topology file (CG PSF or AA PDB)
|
|
45
|
+
--DCD DCD trajectory
|
|
46
|
+
--outdir Output directory (default: ./)
|
|
47
|
+
--start First frame index, 0-based (default: 0)
|
|
48
|
+
--ops OPs to compute: Q G K SASA XP (default: Q G K)
|
|
49
|
+
--resolution Trajectory resolution: cg (default) or aa
|
|
50
|
+
--contacts Contact type: calpha or heavy (default: calpha for cg, heavy for aa)
|
|
51
|
+
--ent_detection_method ENT detection: 1=GLN, 2=TLN (default), 3=GLN+TLN same termini
|
|
52
|
+
--no_topoly Disable topoly; use GLN-only workflow
|
|
53
|
+
--nproc CPU cores for G calculation (default: 10)
|
|
54
|
+
--COR CG COR reference coordinates (required for Q, G, K)
|
|
55
|
+
--sec_elements STRIDE secondary structure definitions (required for Q, G, K)
|
|
56
|
+
--domain Domain boundary definitions (required for Q, G, K)
|
|
57
|
+
--xp_pdb All-atom PDB for XP cross-link probability (required for XP)
|
|
58
|
+
--chunk_frames Frames per chunk for Combined_GE (default: None = single file)
|
|
59
|
+
--chunk_suffix Naming suffix for chunk files (default: _chunk)
|
|
60
|
+
--log_level Logging verbosity: DEBUG, INFO, WARNING, ERROR (default: INFO)
|
|
61
|
+
--logdir Directory for log file (default: same as --outdir)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
_CG_OPS = {'Q', 'G', 'K'}
|
|
65
|
+
_AA_OPS = {'SASA', 'XP'}
|
|
66
|
+
_ALL_OPS = _CG_OPS | _AA_OPS
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def main(argv=None):
|
|
70
|
+
|
|
71
|
+
###---------------------------------------------------------------------------------------------------------
|
|
72
|
+
import sys, os
|
|
73
|
+
import argparse
|
|
74
|
+
import time
|
|
75
|
+
import logging
|
|
76
|
+
start_time = time.time()
|
|
77
|
+
###---------------------------------------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
###---------------------------------------------------------------------------------------------------------
|
|
80
|
+
parser = argparse.ArgumentParser(
|
|
81
|
+
description="Calculate order parameters on CG and/or all-atom trajectories.")
|
|
82
|
+
# --- identity / IO ---
|
|
83
|
+
parser.add_argument("--Traj", type=str, required=True, help="Trajectory number (used in output filenames)")
|
|
84
|
+
parser.add_argument("--ID", type=str, required=True, help="Base name for output files")
|
|
85
|
+
parser.add_argument("--PSF", type=str, required=True, help="Topology file (CG PSF or AA PDB)")
|
|
86
|
+
parser.add_argument("--DCD", type=str, required=True, help="DCD trajectory")
|
|
87
|
+
parser.add_argument("--outdir", type=str, default='./', help="Output directory (default: ./)")
|
|
88
|
+
parser.add_argument("--start", type=int, default=0, help="First frame index, 0-based (default: 0)")
|
|
89
|
+
|
|
90
|
+
# --- which OPs ---
|
|
91
|
+
parser.add_argument("--ops", nargs='+', default=['Q', 'G', 'K'], choices=['Q', 'G', 'K', 'SASA', 'XP'], help="Order parameters to compute (default: Q G K)")
|
|
92
|
+
|
|
93
|
+
# --- trajectory settings ---
|
|
94
|
+
parser.add_argument("--resolution", choices=["cg", "aa"], default="cg", help="Trajectory resolution: cg (default) or aa")
|
|
95
|
+
parser.add_argument("--contacts", choices=["calpha", "heavy"], default=None, help="Contact type: calpha or heavy (default: calpha for cg, heavy for aa)")
|
|
96
|
+
parser.add_argument("--ent_detection_method", type=int, default=1, help="ENT detection: 1=GLN, 2=TLN (default), 3=GLN+TLN same termini")
|
|
97
|
+
parser.add_argument("--no_topoly", action="store_true", help="Disable topoly crossing detection (uses GLN-only workflow)")
|
|
98
|
+
parser.add_argument("--nproc", type=int, default=10, help="CPU cores for G (default: 10)")
|
|
99
|
+
|
|
100
|
+
# --- CG-specific inputs (required for Q/G/K) ---
|
|
101
|
+
parser.add_argument("--COR", type=str, default=None, help="CG COR reference coordinates")
|
|
102
|
+
parser.add_argument("--sec_elements", type=str, default=None, help="STRIDE secondary structure definitions file")
|
|
103
|
+
parser.add_argument("--domain", type=str, default=None, help="Domain boundary definitions file")
|
|
104
|
+
|
|
105
|
+
parser.add_argument("--xp_pdb", type=str, default=None, help="All-atom PDB for XP (required for XP)")
|
|
106
|
+
|
|
107
|
+
# --- G chunking (for large trajectories) ---
|
|
108
|
+
parser.add_argument("--chunk_frames", type=int, default=None, help="Frames per chunk for Combined_GE output (default: None = single file)")
|
|
109
|
+
parser.add_argument("--chunk_suffix", type=str, default="_chunk", help="Naming suffix for chunked files (default: _chunk)")
|
|
110
|
+
|
|
111
|
+
# --- logging ---
|
|
112
|
+
parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging verbosity (default: INFO)")
|
|
113
|
+
parser.add_argument("--logdir", type=str, default=None, help="Directory for log file (default: same as --outdir)")
|
|
114
|
+
args = parser.parse_args(argv)
|
|
115
|
+
|
|
116
|
+
ops = set(args.ops)
|
|
117
|
+
traj = args.Traj
|
|
118
|
+
ID = args.ID
|
|
119
|
+
outdir = args.outdir
|
|
120
|
+
###---------------------------------------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
###---------------------------------------------------------------------------------------------------------
|
|
123
|
+
# --- resolve derived settings ---
|
|
124
|
+
contacts = args.contacts if args.contacts is not None else ("calpha" if args.resolution == "cg" else "heavy")
|
|
125
|
+
Calpha = contacts == "calpha"
|
|
126
|
+
CG = args.resolution == "cg"
|
|
127
|
+
topoly = not args.no_topoly
|
|
128
|
+
|
|
129
|
+
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
|
|
130
|
+
log_id = f"{ID}_Traj{traj}"
|
|
131
|
+
logdir = args.logdir if args.logdir is not None else outdir
|
|
132
|
+
|
|
133
|
+
# Pre-configure all EntDetect loggers so they share one log file
|
|
134
|
+
logger = setup_logger('run_OP', outdir=logdir, ID=log_id, log_level=log_level)
|
|
135
|
+
for _cls in ['CalculateOP', 'GaussianEntanglement']:
|
|
136
|
+
setup_logger(_cls, outdir=logdir, ID=log_id, log_level=log_level)
|
|
137
|
+
logger.info(f'args: {args}')
|
|
138
|
+
###---------------------------------------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
###---------------------------------------------------------------------------------------------------------
|
|
141
|
+
# --- input validation ---
|
|
142
|
+
if ops & _CG_OPS and not all([args.COR, args.sec_elements, args.domain]):
|
|
143
|
+
parser.error("--COR, --sec_elements, and --domain are required when computing Q, G, or K.")
|
|
144
|
+
|
|
145
|
+
if ops & _AA_OPS:
|
|
146
|
+
if CG:
|
|
147
|
+
parser.error("SASA and XP require an all-atom trajectory: set --resolution aa.")
|
|
148
|
+
if 'XP' in ops and args.xp_pdb is None:
|
|
149
|
+
parser.error("--xp_pdb is required when XP is in --ops.")
|
|
150
|
+
###---------------------------------------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
###---------------------------------------------------------------------------------------------------------
|
|
153
|
+
# --- instantiate CalculateOP for primary (CG or AA) trajectory ---
|
|
154
|
+
CalcOP = CalculateOP(outdir=outdir,
|
|
155
|
+
Traj=traj,
|
|
156
|
+
ID=ID,
|
|
157
|
+
psf=args.PSF,
|
|
158
|
+
cor=args.COR,
|
|
159
|
+
sec_elements=args.sec_elements,
|
|
160
|
+
dcd=args.DCD,
|
|
161
|
+
domain=args.domain,
|
|
162
|
+
start=args.start,
|
|
163
|
+
ent_detection_method=args.ent_detection_method,
|
|
164
|
+
log_level=log_level,
|
|
165
|
+
logdir=logdir)
|
|
166
|
+
logger.info(f'CalculateOP (primary): {CalcOP}')
|
|
167
|
+
|
|
168
|
+
if 'Q' in ops:
|
|
169
|
+
Qdata_dict = CalcOP.Q()
|
|
170
|
+
logger.info(f'Q keys: {list(Qdata_dict.keys())}')
|
|
171
|
+
|
|
172
|
+
if 'G' in ops:
|
|
173
|
+
Gdata_dict = CalcOP.G(topoly=topoly, Calpha=Calpha, CG=CG, nproc=args.nproc, chunk_frames=args.chunk_frames, chunk_suffix=args.chunk_suffix)
|
|
174
|
+
logger.info(f'G keys: {list(Gdata_dict.keys())}')
|
|
175
|
+
|
|
176
|
+
if 'K' in ops:
|
|
177
|
+
Kdata_dict = CalcOP.K()
|
|
178
|
+
logger.info(f'K keys: {list(Kdata_dict.keys())}')
|
|
179
|
+
|
|
180
|
+
if 'SASA' in ops:
|
|
181
|
+
SASAdata_dict = CalcOP.SASA()
|
|
182
|
+
logger.info(f'SASA keys: {list(SASAdata_dict.keys())}')
|
|
183
|
+
|
|
184
|
+
if 'XP' in ops:
|
|
185
|
+
XPdata_dict = CalcOP.XP(pdb=args.xp_pdb, use_traj=True, nproc=args.nproc)
|
|
186
|
+
logger.info(f'XP keys: {list(XPdata_dict.keys())}')
|
|
187
|
+
###---------------------------------------------------------------------------------------------------------
|
|
188
|
+
|
|
189
|
+
logger.info(f'NORMAL TERMINATION - {time.time() - start_time:.1f} seconds')
|
|
190
|
+
return 0
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
if __name__ == "__main__":
|
|
194
|
+
raise SystemExit(main())
|