EntDetect 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. EntDetect/Jwalk/GridTools.py +567 -0
  2. EntDetect/Jwalk/PDBTools.py +532 -0
  3. EntDetect/Jwalk/SASDTools.py +543 -0
  4. EntDetect/Jwalk/SurfaceTools.py +150 -0
  5. EntDetect/Jwalk/__init__.py +19 -0
  6. EntDetect/Jwalk/naccess.config.txt +255 -0
  7. EntDetect/__init__.py +10 -0
  8. EntDetect/_logging.py +71 -0
  9. EntDetect/change_resolution.py +2361 -0
  10. EntDetect/clustering.py +2626 -0
  11. EntDetect/compare_sim2exp.py +1927 -0
  12. EntDetect/entanglement_features.py +478 -0
  13. EntDetect/gaussian_entanglement.py +2067 -0
  14. EntDetect/order_params.py +1048 -0
  15. EntDetect/resources/__init__.py +11 -0
  16. EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
  17. EntDetect/resources/calc_K.pl +712 -0
  18. EntDetect/resources/calc_Q.pl +962 -0
  19. EntDetect/resources/pulchra +0 -0
  20. EntDetect/resources/shared_files/__init__.py +2 -0
  21. EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
  22. EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
  23. EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
  24. EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
  25. EntDetect/resources/stride +0 -0
  26. EntDetect/statistics.py +1344 -0
  27. EntDetect/utilities.py +201 -0
  28. entdetect-1.2.0.dist-info/METADATA +26 -0
  29. entdetect-1.2.0.dist-info/RECORD +45 -0
  30. entdetect-1.2.0.dist-info/WHEEL +5 -0
  31. entdetect-1.2.0.dist-info/entry_points.txt +11 -0
  32. entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
  33. entdetect-1.2.0.dist-info/top_level.txt +2 -0
  34. scripts/__init__.py +5 -0
  35. scripts/convert_cor_psf_to_pdb.py +103 -0
  36. scripts/run_Foldingpathway.py +162 -0
  37. scripts/run_MSM.py +152 -0
  38. scripts/run_OP_on_simulation_traj.py +194 -0
  39. scripts/run_change_resolution.py +63 -0
  40. scripts/run_compare_sim2exp.py +215 -0
  41. scripts/run_montecarlo.py +158 -0
  42. scripts/run_nativeNCLE.py +179 -0
  43. scripts/run_nonnative_entanglement_clustering.py +110 -0
  44. scripts/run_population_modeling.py +117 -0
  45. scripts/run_workflow4_nativeNCLE_batch.py +412 -0
scripts/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Command-line entry points for EntDetect.
2
+
3
+ These modules are primarily thin wrappers around the core library in
4
+ `EntDetect/`.
5
+ """
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Convert CHARMM COR and PSF files to PDB format.
4
+
5
+ Usage:
6
+ python scripts/convert_cor_psf_to_pdb.py --cor structure.cor --psf structure.psf --output structure.pdb
7
+
8
+ This script uses MDAnalysis to read CHARMM coordinate (.cor/.crd) and topology (.psf) files
9
+ and writes them as a single PDB file that can be used with EntDetect analysis tools.
10
+ """
11
+
12
+ import argparse
13
+ import os
14
+ import sys
15
+
16
+ def convert_cor_psf_to_pdb(cor_file, psf_file, output_pdb):
17
+ """
18
+ Convert CHARMM COR and PSF files to PDB format.
19
+
20
+ Parameters
21
+ ----------
22
+ cor_file : str
23
+ Path to CHARMM coordinate file (.cor or .crd)
24
+ psf_file : str
25
+ Path to CHARMM PSF topology file (.psf)
26
+ output_pdb : str
27
+ Path to output PDB file
28
+ """
29
+ try:
30
+ import MDAnalysis as mda
31
+ except ImportError:
32
+ print("Error: MDAnalysis is required for this conversion.")
33
+ print("Install with: pip install MDAnalysis")
34
+ sys.exit(1)
35
+
36
+ # Validate input files exist
37
+ if not os.path.exists(cor_file):
38
+ print(f"Error: COR file not found: {cor_file}")
39
+ sys.exit(1)
40
+ if not os.path.exists(psf_file):
41
+ print(f"Error: PSF file not found: {psf_file}")
42
+ sys.exit(1)
43
+
44
+ print(f"Reading topology from: {psf_file}")
45
+ print(f"Reading coordinates from: {cor_file}")
46
+
47
+ # Load the structure with PSF topology and COR coordinates
48
+ u = mda.Universe(psf_file, cor_file)
49
+
50
+ print(f"Loaded structure with {len(u.atoms)} atoms")
51
+
52
+ # Set chainID to 'A' for all atoms if not already set
53
+ if not hasattr(u.atoms, 'chainIDs') or all(c == '' or c == 'X' for c in u.atoms.chainIDs):
54
+ print("Setting chainID to 'A' for all atoms")
55
+ u.add_TopologyAttr('chainIDs', ['A'] * len(u.atoms))
56
+
57
+ # Write to PDB format
58
+ print(f"Writing PDB to: {output_pdb}")
59
+ u.atoms.write(output_pdb)
60
+
61
+ print("Conversion complete!")
62
+ return output_pdb
63
+
64
+
65
+ def main(argv=None):
66
+ parser = argparse.ArgumentParser(
67
+ description="Convert CHARMM COR/PSF files to PDB format",
68
+ formatter_class=argparse.RawDescriptionHelpFormatter,
69
+ epilog="""
70
+ Examples:
71
+ convert_cor_psf_to_pdb --cor model.cor --psf model.psf --output model.pdb
72
+ convert_cor_psf_to_pdb --cor 1zmr_ca.crd --psf 1zmr_ca.psf --output 1zmr_ca.pdb
73
+ """,
74
+ )
75
+
76
+ parser.add_argument(
77
+ "--cor",
78
+ "--crd",
79
+ type=str,
80
+ required=True,
81
+ help="Input CHARMM coordinate file (.cor or .crd)",
82
+ )
83
+ parser.add_argument(
84
+ "--psf",
85
+ type=str,
86
+ required=True,
87
+ help="Input CHARMM PSF topology file (.psf)",
88
+ )
89
+ parser.add_argument(
90
+ "--output",
91
+ "-o",
92
+ type=str,
93
+ required=True,
94
+ help="Output PDB file",
95
+ )
96
+
97
+ args = parser.parse_args(argv)
98
+ convert_cor_psf_to_pdb(args.cor, args.psf, args.output)
99
+ return 0
100
+
101
+
102
+ if __name__ == "__main__":
103
+ raise SystemExit(main())
@@ -0,0 +1,162 @@
1
+ from EntDetect.statistics import FoldingPathwayStats
2
+ from EntDetect._logging import setup_logger
3
+ import pandas as pd
4
+
5
+ """
6
+ Compute folding pathway statistics and Jensen-Shannon divergence from an MSM
7
+ trajectory-type-annotated mapping CSV produced by run_MSM.py.
8
+
9
+ The input --msm_data_file must be a CSV with the columns produced by run_MSM.py
10
+ (traj, frame, microstate, metastablestate, Q, G, StateSample) plus a user-added
11
+ trajectory-type column (--traj_type_col) that labels each trajectory as belonging
12
+ to one of the types in --traj_type_list (e.g. 'A' for folded, 'B' for unfolded).
13
+
14
+ This classification is typically added by the user based on a Q threshold, e.g.:
15
+ df['traj_type_A80%Native'] = df.groupby('traj')['Q'].transform('max').ge(0.80).map({True:'A', False:'B'})
16
+
17
+ Examples
18
+ --------
19
+ Basic run — two trajectory types, no exclusions:
20
+ python scripts/run_Foldingpathway.py \\
21
+ --msm_data_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_MSMmapping_A80pctNative.csv \\
22
+ --meta_set_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_meta_set.csv \\
23
+ --traj_type_col traj_type_A80pctNative \\
24
+ --traj_type_list A B \\
25
+ --outdir $DATASTORE/outputs/workflow2/FoldingPathway_A80pctNative
26
+
27
+ Excluding mirror-image trajectories identified in Step 4:
28
+ python scripts/run_Foldingpathway.py \\
29
+ --msm_data_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_MSMmapping_A80pctNative.csv \\
30
+ --meta_set_file $DATASTORE/outputs/workflow2/MSM/1ZMR_prod_meta_set.csv \\
31
+ --traj_type_col traj_type_A80pctNative \\
32
+ --traj_type_list A B \\
33
+ --outdir $DATASTORE/outputs/workflow2/FoldingPathway_A80pctNative \\
34
+ --rm_traj_list 65 75 155 162
35
+
36
+ Flags
37
+ -----
38
+ --msm_data_file CSV produced by run_MSM.py, annotated with a trajectory-type column
39
+ --meta_set_file meta_set CSV produced by run_MSM.py (microstates per metastable state)
40
+ --traj_type_col Column name in msm_data_file that contains trajectory-type labels
41
+ --traj_type_list Space-separated list of trajectory-type labels to compare (default: A B)
42
+ --outdir Output directory for folding pathway and JS-divergence results
43
+ --rm_traj_list Trajectory numbers to exclude (e.g. confirmed mirror conformations)
44
+ --n_window Rolling window size for state probability smoothing (default: 200)
45
+ --n_traj Total number of trajectories in the ensemble (default: 1000)
46
+ --state_type State level to analyse: metastablestate or microstate (default: metastablestate)
47
+ --log_level Logging verbosity: DEBUG, INFO, WARNING, ERROR (default: INFO)
48
+ --logdir Directory for log file (default: same as --outdir)
49
+ """
50
+
51
+
52
+ def main(argv=None):
53
+
54
+ ###---------------------------------------------------------------------------------------------------------
55
+ import sys, os
56
+ import argparse
57
+ import time
58
+ import logging
59
+ start_time = time.time()
60
+ ###---------------------------------------------------------------------------------------------------------
61
+
62
+ ###---------------------------------------------------------------------------------------------------------
63
+ parser = argparse.ArgumentParser(
64
+ description="Compute folding pathway statistics and Jensen-Shannon divergence from MSM output.")
65
+
66
+ # --- IO ---
67
+ parser.add_argument("--msm_data_file", type=str, required=True, help="CSV produced by run_MSM.py, annotated with a trajectory-type column")
68
+ parser.add_argument("--meta_set_file", type=str, required=True, help="meta_set CSV produced by run_MSM.py")
69
+ parser.add_argument("--outdir", type=str, required=True, help="Output directory for folding pathway and JS-divergence results")
70
+
71
+ # --- trajectory classification ---
72
+ parser.add_argument("--traj_type_col", type=str, required=True, help="Column name in msm_data_file containing trajectory-type labels")
73
+ parser.add_argument("--traj_type_list", type=str, nargs='+', default=['A', 'B'], help="Trajectory-type labels to compare (default: A B)")
74
+
75
+ # --- trajectory filtering ---
76
+ parser.add_argument("--rm_traj_list", type=int, nargs='+', default=[], help="Trajectory numbers to exclude (e.g. confirmed mirror conformations)")
77
+
78
+ # --- analysis settings ---
79
+ parser.add_argument("--n_window", type=int, default=200, help="Rolling window size for state probability smoothing (default: 200)")
80
+ parser.add_argument("--n_traj", type=int, default=1000, help="Total number of trajectories in the ensemble (default: 1000)")
81
+ parser.add_argument("--state_type", type=str, default='metastablestate',
82
+ choices=['metastablestate', 'microstate'], help="State level to analyse (default: metastablestate)")
83
+
84
+ # --- logging ---
85
+ parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging verbosity (default: INFO)")
86
+ parser.add_argument("--logdir", type=str, default=None, help="Directory for log file (default: same as --outdir)")
87
+
88
+ args = parser.parse_args(argv)
89
+
90
+ outdir = args.outdir
91
+ ###---------------------------------------------------------------------------------------------------------
92
+
93
+ ###---------------------------------------------------------------------------------------------------------
94
+ log_level = getattr(logging, args.log_level.upper(), logging.INFO)
95
+ logdir = args.logdir if args.logdir is not None else outdir
96
+
97
+ os.makedirs(outdir, exist_ok=True)
98
+
99
+ logger = setup_logger('run_Foldingpathway', outdir=logdir, ID='FoldingPathwayStats', log_level=log_level)
100
+ setup_logger('FoldingPathwayStats', outdir=logdir, ID='FoldingPathwayStats', log_level=log_level)
101
+ logger.info(f'args: {args}')
102
+ ###---------------------------------------------------------------------------------------------------------
103
+
104
+ ###---------------------------------------------------------------------------------------------------------
105
+ # --- input validation ---
106
+ if not os.path.isfile(args.msm_data_file):
107
+ parser.error(f"--msm_data_file does not exist: {args.msm_data_file}")
108
+
109
+ if not os.path.isfile(args.meta_set_file):
110
+ parser.error(f"--meta_set_file does not exist: {args.meta_set_file}")
111
+ ###---------------------------------------------------------------------------------------------------------
112
+
113
+ ###---------------------------------------------------------------------------------------------------------
114
+ # Load MSM data and validate the trajectory-type column
115
+ logger.info(f'Loading MSM data from {args.msm_data_file}')
116
+ msm_data = pd.read_csv(args.msm_data_file)
117
+ logger.info(f'msm_data shape: {msm_data.shape}, columns: {msm_data.columns.tolist()}')
118
+
119
+ if args.traj_type_col not in msm_data.columns:
120
+ parser.error(
121
+ f"--traj_type_col '{args.traj_type_col}' not found in {args.msm_data_file}. "
122
+ f"Available columns: {msm_data.columns.tolist()}"
123
+ )
124
+
125
+ present_types = set(msm_data[args.traj_type_col].unique())
126
+ missing_types = [t for t in args.traj_type_list if t not in present_types]
127
+ if missing_types:
128
+ parser.error(
129
+ f"--traj_type_list values {missing_types} not found in column '{args.traj_type_col}'. "
130
+ f"Values present: {sorted(present_types)}"
131
+ )
132
+ ###---------------------------------------------------------------------------------------------------------
133
+
134
+ ###---------------------------------------------------------------------------------------------------------
135
+ FP = FoldingPathwayStats(
136
+ msm_data=msm_data,
137
+ meta_set_file=args.meta_set_file,
138
+ tarj_type_col=args.traj_type_col,
139
+ traj_type_list=args.traj_type_list,
140
+ outdir=outdir,
141
+ n_window=args.n_window,
142
+ n_traj=args.n_traj,
143
+ state_type=args.state_type,
144
+ rm_traj_list=args.rm_traj_list,
145
+ log_level=log_level,
146
+ logdir=logdir,
147
+ )
148
+ logger.info(f'FoldingPathwayStats: {FP}')
149
+
150
+ folding_pathways = FP.post_trans()
151
+ logger.info(f'folding_pathways:\n{folding_pathways}')
152
+
153
+ JS_divergence = FP.JS_divergence()
154
+ logger.info(f'JS_divergence:\n{JS_divergence}')
155
+ ###---------------------------------------------------------------------------------------------------------
156
+
157
+ logger.info(f'NORMAL TERMINATION - {time.time() - start_time:.1f} seconds')
158
+ return 0
159
+
160
+
161
+ if __name__ == "__main__":
162
+ raise SystemExit(main())
scripts/run_MSM.py ADDED
@@ -0,0 +1,152 @@
1
+ from EntDetect.clustering import MSMNonNativeEntanglementClustering
2
+ from EntDetect._logging import setup_logger
3
+
4
+ """
5
+ Build a Markov state model (MSM) from pre-computed order-parameter data across
6
+ an ensemble of simulation trajectories.
7
+
8
+ Reads Q and G order-parameter files from --OPpath (which must contain Q/ and G/
9
+ subdirectories produced by run_OP_on_simulation_traj.py), groups frames into
10
+ microstates via k-means clustering, and then coarse-grains microstates into
11
+ metastable macro-states using PCCA+.
12
+
13
+ Examples
14
+ --------
15
+ Basic run — 10 metastable states, lag time 20:
16
+ python scripts/run_MSM.py \\
17
+ --outdir $DATASTORE/outputs/workflow2/MSM \\
18
+ --ID 1ZMR_prod \\
19
+ --OPpath $DATASTORE/outputs/workflow2/OP_demo/ \\
20
+ --start 0 \\
21
+ --n_large_states 10 \\
22
+ --lagtime 20
23
+
24
+ Excluding mirror-image trajectories (identified in Step 10):
25
+ python scripts/run_MSM.py \\
26
+ --outdir $DATASTORE/outputs/workflow2/MSM \\
27
+ --ID 1ZMR_prod \\
28
+ --OPpath $DATASTORE/outputs/workflow2/OP_demo/ \\
29
+ --start 0 \\
30
+ --n_large_states 10 \\
31
+ --lagtime 20 \\
32
+ --rm_traj_list 65 75 155 162
33
+
34
+ Flags
35
+ -----
36
+ --outdir Output directory for MSM results
37
+ --OPpath Directory containing Q/ and G/ subdirectories of per-trajectory OP files
38
+ --ID Base name for output files
39
+ --start First frame index to include, 0-based (default: 0)
40
+ --end Last frame index to include, 0-based (default: all frames)
41
+ --stride Frame stride for loading OP data (default: 1)
42
+ --n_large_states Number of metastable macro-states requested from PCCA+ (default: 10)
43
+ --n_small_states Number of inactive micro-state clusters (default: 1)
44
+ --n_cluster Number of k-means microstates (default: 400)
45
+ --kmean_stride Frame stride used during k-means clustering (default: 2)
46
+ --lagtime MSM lag time in frames (default: 20)
47
+ --dt MD timestep in ns, used for time-axis labelling (default: 1.5e-5)
48
+ --ITS Run implied timescale analysis to validate lag time: True/False (default: False)
49
+ --rm_traj_list Trajectory numbers to exclude (e.g. confirmed mirror conformations)
50
+ --log_level Logging verbosity: DEBUG, INFO, WARNING, ERROR (default: INFO)
51
+ --logdir Directory for log file (default: same as --outdir)
52
+ """
53
+
54
+
55
+ def main(argv=None):
56
+
57
+ ###---------------------------------------------------------------------------------------------------------
58
+ import sys, os
59
+ import argparse
60
+ import time
61
+ import logging
62
+ start_time = time.time()
63
+ ###---------------------------------------------------------------------------------------------------------
64
+
65
+ ###---------------------------------------------------------------------------------------------------------
66
+ parser = argparse.ArgumentParser(
67
+ description="Build a Markov state model from pre-computed order-parameter trajectories.")
68
+
69
+ # --- identity / IO ---
70
+ parser.add_argument("--outdir", type=str, required=True, help="Output directory for MSM results")
71
+ parser.add_argument("--OPpath", type=str, required=True, help="Directory containing Q/ and G/ subdirectories of per-trajectory OP files")
72
+ parser.add_argument("--ID", type=str, required=True, help="Base name for output files")
73
+
74
+ # --- frame selection ---
75
+ parser.add_argument("--start", type=int, default=0, help="First frame index to include, 0-based (default: 0)")
76
+ parser.add_argument("--end", type=int, default=99999999999, help="Last frame index to include, 0-based (default: all frames)")
77
+ parser.add_argument("--stride", type=int, default=1, help="Frame stride for loading OP data (default: 1)")
78
+
79
+ # --- MSM settings ---
80
+ parser.add_argument("--n_large_states", type=int, default=10, help="Number of metastable macro-states requested from PCCA+ (default: 10)")
81
+ parser.add_argument("--n_small_states", type=int, default=1, help="Number of inactive micro-state clusters (default: 1)")
82
+ parser.add_argument("--n_cluster", type=int, default=400, help="Number of k-means microstates (default: 400)")
83
+ parser.add_argument("--kmean_stride", type=int, default=2, help="Frame stride used during k-means clustering (default: 2)")
84
+ parser.add_argument("--lagtime", type=int, default=20, help="MSM lag time in frames (default: 20)")
85
+ parser.add_argument("--dt", type=float, default=0.015/1000, help="MD timestep in ns (default: 1.5e-5)")
86
+ parser.add_argument("--ITS", type=str, default='False', help="Run implied timescale analysis: True/False (default: False)")
87
+
88
+ # --- trajectory filtering ---
89
+ parser.add_argument("--rm_traj_list", type=int, nargs='+', default=[], help="Trajectory numbers to exclude (e.g. confirmed mirror conformations)")
90
+
91
+ # --- logging ---
92
+ parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging verbosity (default: INFO)")
93
+ parser.add_argument("--logdir", type=str, default=None, help="Directory for log file (default: same as --outdir)")
94
+
95
+ args = parser.parse_args(argv)
96
+
97
+ outdir = args.outdir
98
+ OPpath = args.OPpath
99
+ ID = args.ID
100
+ ###---------------------------------------------------------------------------------------------------------
101
+
102
+ ###---------------------------------------------------------------------------------------------------------
103
+ log_level = getattr(logging, args.log_level.upper(), logging.INFO)
104
+ logdir = args.logdir if args.logdir is not None else outdir
105
+
106
+ logger = setup_logger('run_MSM', outdir=logdir, ID=ID, log_level=log_level)
107
+ setup_logger('MSMNonNativeEntanglementClustering', outdir=logdir, ID=ID, log_level=log_level)
108
+ logger.info(f'args: {args}')
109
+ ###---------------------------------------------------------------------------------------------------------
110
+
111
+ ###---------------------------------------------------------------------------------------------------------
112
+ # --- input validation ---
113
+ if not os.path.isdir(OPpath):
114
+ parser.error(f"--OPpath does not exist or is not a directory: {OPpath}")
115
+
116
+ for subdir in ('Q', 'G'):
117
+ expected = os.path.join(OPpath, subdir)
118
+ if not os.path.isdir(expected):
119
+ parser.error(f"Expected subdirectory not found in --OPpath: {expected}")
120
+
121
+ os.makedirs(outdir, exist_ok=True)
122
+ ###---------------------------------------------------------------------------------------------------------
123
+
124
+ ###---------------------------------------------------------------------------------------------------------
125
+ MSM = MSMNonNativeEntanglementClustering(
126
+ outdir=outdir,
127
+ ID=ID,
128
+ OPpath=OPpath,
129
+ start=args.start,
130
+ end=args.end,
131
+ stride=args.stride,
132
+ n_large_states=args.n_large_states,
133
+ n_small_states=args.n_small_states,
134
+ n_cluster=args.n_cluster,
135
+ kmean_stride=args.kmean_stride,
136
+ lagtime=args.lagtime,
137
+ dt=args.dt,
138
+ ITS=args.ITS,
139
+ rm_traj_list=args.rm_traj_list,
140
+ log_level=log_level,
141
+ logdir=logdir,
142
+ )
143
+ logger.info(f'MSMNonNativeEntanglementClustering: {MSM}')
144
+ MSM.run()
145
+ ###---------------------------------------------------------------------------------------------------------
146
+
147
+ logger.info(f'NORMAL TERMINATION - {time.time() - start_time:.1f} seconds')
148
+ return 0
149
+
150
+
151
+ if __name__ == "__main__":
152
+ raise SystemExit(main())
@@ -0,0 +1,194 @@
1
+ from EntDetect.order_params import CalculateOP
2
+ from EntDetect._logging import setup_logger
3
+
4
+ """
5
+ Calculate any combination of order parameters on CG and/or all-atom trajectories.
6
+
7
+ Available OPs: Q G K SASA XP
8
+ Q — fraction of native contacts
9
+ G — fraction of native contacts with a change of entanglement (+ entanglement features)
10
+ K — mirror symmetry order parameter
11
+ SASA — solvent accessible surface area (requires all-atom trajectory)
12
+ XP — Jwalk cross-link probability (requires all-atom trajectory + --xp_pdb)
13
+
14
+ For SASA/XP the all-atom trajectory is used; set --resolution aa and supply the
15
+ AA topology and DCD as --PSF and --DCD.
16
+
17
+ Examples
18
+ --------
19
+ CG — Q, G, K only:
20
+ python scripts/run_OP_on_simulation_traj.py \\
21
+ --Traj 420 --ID 1ZMR \\
22
+ --PSF $REFSTRUCT/1zmr_model_clean_ca.psf \\
23
+ --COR $REFSTRUCT/1zmr_model_clean_ca.cor \\
24
+ --DCD $DATASTORE/cg_trajectories/420_prod.dcd \\
25
+ --sec_elements $REFSTRUCT/secondary_struc_defs.txt \\
26
+ --domain $REFSTRUCT/domain_def.dat \\
27
+ --outdir $DATASTORE/outputs/OP_demo \\
28
+ --ops Q G K
29
+
30
+ AA trajectory — SASA and XP only:
31
+ python scripts/run_OP_on_simulation_traj.py \\
32
+ --Traj 420 --ID 1ZMR \\
33
+ --PSF $REFSTRUCT/1zmr_model_clean.pdb \\
34
+ --DCD $DATASTORE/aa_trajectories/420_prod_aa.dcd \\
35
+ --resolution aa \\
36
+ --outdir $DATASTORE/outputs/OP_demo_AA \\
37
+ --ops SASA XP \\
38
+ --xp_pdb $REFSTRUCT/1zmr_model_clean.pdb
39
+
40
+ Flags
41
+ -----
42
+ --Traj Trajectory number (used in output filenames)
43
+ --ID Base name for output files
44
+ --PSF Topology file (CG PSF or AA PDB)
45
+ --DCD DCD trajectory
46
+ --outdir Output directory (default: ./)
47
+ --start First frame index, 0-based (default: 0)
48
+ --ops OPs to compute: Q G K SASA XP (default: Q G K)
49
+ --resolution Trajectory resolution: cg (default) or aa
50
+ --contacts Contact type: calpha or heavy (default: calpha for cg, heavy for aa)
51
+ --ent_detection_method ENT detection: 1=GLN, 2=TLN (default), 3=GLN+TLN same termini
52
+ --no_topoly Disable topoly; use GLN-only workflow
53
+ --nproc CPU cores for G calculation (default: 10)
54
+ --COR CG COR reference coordinates (required for Q, G, K)
55
+ --sec_elements STRIDE secondary structure definitions (required for Q, G, K)
56
+ --domain Domain boundary definitions (required for Q, G, K)
57
+ --xp_pdb All-atom PDB for XP cross-link probability (required for XP)
58
+ --chunk_frames Frames per chunk for Combined_GE (default: None = single file)
59
+ --chunk_suffix Naming suffix for chunk files (default: _chunk)
60
+ --log_level Logging verbosity: DEBUG, INFO, WARNING, ERROR (default: INFO)
61
+ --logdir Directory for log file (default: same as --outdir)
62
+ """
63
+
64
+ _CG_OPS = {'Q', 'G', 'K'}
65
+ _AA_OPS = {'SASA', 'XP'}
66
+ _ALL_OPS = _CG_OPS | _AA_OPS
67
+
68
+
69
+ def main(argv=None):
70
+
71
+ ###---------------------------------------------------------------------------------------------------------
72
+ import sys, os
73
+ import argparse
74
+ import time
75
+ import logging
76
+ start_time = time.time()
77
+ ###---------------------------------------------------------------------------------------------------------
78
+
79
+ ###---------------------------------------------------------------------------------------------------------
80
+ parser = argparse.ArgumentParser(
81
+ description="Calculate order parameters on CG and/or all-atom trajectories.")
82
+ # --- identity / IO ---
83
+ parser.add_argument("--Traj", type=str, required=True, help="Trajectory number (used in output filenames)")
84
+ parser.add_argument("--ID", type=str, required=True, help="Base name for output files")
85
+ parser.add_argument("--PSF", type=str, required=True, help="Topology file (CG PSF or AA PDB)")
86
+ parser.add_argument("--DCD", type=str, required=True, help="DCD trajectory")
87
+ parser.add_argument("--outdir", type=str, default='./', help="Output directory (default: ./)")
88
+ parser.add_argument("--start", type=int, default=0, help="First frame index, 0-based (default: 0)")
89
+
90
+ # --- which OPs ---
91
+ parser.add_argument("--ops", nargs='+', default=['Q', 'G', 'K'], choices=['Q', 'G', 'K', 'SASA', 'XP'], help="Order parameters to compute (default: Q G K)")
92
+
93
+ # --- trajectory settings ---
94
+ parser.add_argument("--resolution", choices=["cg", "aa"], default="cg", help="Trajectory resolution: cg (default) or aa")
95
+ parser.add_argument("--contacts", choices=["calpha", "heavy"], default=None, help="Contact type: calpha or heavy (default: calpha for cg, heavy for aa)")
96
+ parser.add_argument("--ent_detection_method", type=int, default=1, help="ENT detection: 1=GLN, 2=TLN (default), 3=GLN+TLN same termini")
97
+ parser.add_argument("--no_topoly", action="store_true", help="Disable topoly crossing detection (uses GLN-only workflow)")
98
+ parser.add_argument("--nproc", type=int, default=10, help="CPU cores for G (default: 10)")
99
+
100
+ # --- CG-specific inputs (required for Q/G/K) ---
101
+ parser.add_argument("--COR", type=str, default=None, help="CG COR reference coordinates")
102
+ parser.add_argument("--sec_elements", type=str, default=None, help="STRIDE secondary structure definitions file")
103
+ parser.add_argument("--domain", type=str, default=None, help="Domain boundary definitions file")
104
+
105
+ parser.add_argument("--xp_pdb", type=str, default=None, help="All-atom PDB for XP (required for XP)")
106
+
107
+ # --- G chunking (for large trajectories) ---
108
+ parser.add_argument("--chunk_frames", type=int, default=None, help="Frames per chunk for Combined_GE output (default: None = single file)")
109
+ parser.add_argument("--chunk_suffix", type=str, default="_chunk", help="Naming suffix for chunked files (default: _chunk)")
110
+
111
+ # --- logging ---
112
+ parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Logging verbosity (default: INFO)")
113
+ parser.add_argument("--logdir", type=str, default=None, help="Directory for log file (default: same as --outdir)")
114
+ args = parser.parse_args(argv)
115
+
116
+ ops = set(args.ops)
117
+ traj = args.Traj
118
+ ID = args.ID
119
+ outdir = args.outdir
120
+ ###---------------------------------------------------------------------------------------------------------
121
+
122
+ ###---------------------------------------------------------------------------------------------------------
123
+ # --- resolve derived settings ---
124
+ contacts = args.contacts if args.contacts is not None else ("calpha" if args.resolution == "cg" else "heavy")
125
+ Calpha = contacts == "calpha"
126
+ CG = args.resolution == "cg"
127
+ topoly = not args.no_topoly
128
+
129
+ log_level = getattr(logging, args.log_level.upper(), logging.INFO)
130
+ log_id = f"{ID}_Traj{traj}"
131
+ logdir = args.logdir if args.logdir is not None else outdir
132
+
133
+ # Pre-configure all EntDetect loggers so they share one log file
134
+ logger = setup_logger('run_OP', outdir=logdir, ID=log_id, log_level=log_level)
135
+ for _cls in ['CalculateOP', 'GaussianEntanglement']:
136
+ setup_logger(_cls, outdir=logdir, ID=log_id, log_level=log_level)
137
+ logger.info(f'args: {args}')
138
+ ###---------------------------------------------------------------------------------------------------------
139
+
140
+ ###---------------------------------------------------------------------------------------------------------
141
+ # --- input validation ---
142
+ if ops & _CG_OPS and not all([args.COR, args.sec_elements, args.domain]):
143
+ parser.error("--COR, --sec_elements, and --domain are required when computing Q, G, or K.")
144
+
145
+ if ops & _AA_OPS:
146
+ if CG:
147
+ parser.error("SASA and XP require an all-atom trajectory: set --resolution aa.")
148
+ if 'XP' in ops and args.xp_pdb is None:
149
+ parser.error("--xp_pdb is required when XP is in --ops.")
150
+ ###---------------------------------------------------------------------------------------------------------
151
+
152
+ ###---------------------------------------------------------------------------------------------------------
153
+ # --- instantiate CalculateOP for primary (CG or AA) trajectory ---
154
+ CalcOP = CalculateOP(outdir=outdir,
155
+ Traj=traj,
156
+ ID=ID,
157
+ psf=args.PSF,
158
+ cor=args.COR,
159
+ sec_elements=args.sec_elements,
160
+ dcd=args.DCD,
161
+ domain=args.domain,
162
+ start=args.start,
163
+ ent_detection_method=args.ent_detection_method,
164
+ log_level=log_level,
165
+ logdir=logdir)
166
+ logger.info(f'CalculateOP (primary): {CalcOP}')
167
+
168
+ if 'Q' in ops:
169
+ Qdata_dict = CalcOP.Q()
170
+ logger.info(f'Q keys: {list(Qdata_dict.keys())}')
171
+
172
+ if 'G' in ops:
173
+ Gdata_dict = CalcOP.G(topoly=topoly, Calpha=Calpha, CG=CG, nproc=args.nproc, chunk_frames=args.chunk_frames, chunk_suffix=args.chunk_suffix)
174
+ logger.info(f'G keys: {list(Gdata_dict.keys())}')
175
+
176
+ if 'K' in ops:
177
+ Kdata_dict = CalcOP.K()
178
+ logger.info(f'K keys: {list(Kdata_dict.keys())}')
179
+
180
+ if 'SASA' in ops:
181
+ SASAdata_dict = CalcOP.SASA()
182
+ logger.info(f'SASA keys: {list(SASAdata_dict.keys())}')
183
+
184
+ if 'XP' in ops:
185
+ XPdata_dict = CalcOP.XP(pdb=args.xp_pdb, use_traj=True, nproc=args.nproc)
186
+ logger.info(f'XP keys: {list(XPdata_dict.keys())}')
187
+ ###---------------------------------------------------------------------------------------------------------
188
+
189
+ logger.info(f'NORMAL TERMINATION - {time.time() - start_time:.1f} seconds')
190
+ return 0
191
+
192
+
193
+ if __name__ == "__main__":
194
+ raise SystemExit(main())