cooper-beta 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ """
2
+ cooper_beta
3
+
4
+ A small toolkit/pipeline to detect beta-barrel-like protein chains from PDB/mmCIF structures.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from importlib.metadata import PackageNotFoundError, version
9
+
10
+ try:
11
+ __version__ = version("cooper-beta")
12
+ except PackageNotFoundError: # pragma: no cover - editable tree before metadata exists
13
+ __version__ = "0.0.0"
14
+
15
+ __all__ = [
16
+ "AppConfig",
17
+ "AnalysisReport",
18
+ "Config",
19
+ "ConfigValidationError",
20
+ "ChainNotFoundError",
21
+ "CooperBetaError",
22
+ "DetectionResult",
23
+ "DsspError",
24
+ "DsspNotFoundError",
25
+ "InputValidationError",
26
+ "LayerDiagnostic",
27
+ "PipelineRunResult",
28
+ "PreparedChainPayload",
29
+ "ProteinLoader",
30
+ "PCAAligner",
31
+ "ProteinSlicer",
32
+ "ResidueRecord",
33
+ "StructureParseError",
34
+ "BarrelAnalyzer",
35
+ "build_config",
36
+ "detect",
37
+ "find_dssp_binary",
38
+ "require_dssp_binary",
39
+ "main",
40
+ "__version__",
41
+ ]
42
+
43
+ from .alignment import PCAAligner
44
+ from .analyzer import BarrelAnalyzer
45
+ from .config import AppConfig, Config, build_config
46
+ from .exceptions import (
47
+ ChainNotFoundError,
48
+ ConfigValidationError,
49
+ CooperBetaError,
50
+ DsspError,
51
+ DsspNotFoundError,
52
+ InputValidationError,
53
+ StructureParseError,
54
+ )
55
+ from .loader import ProteinLoader
56
+ from .models import (
57
+ AnalysisReport,
58
+ DetectionResult,
59
+ LayerDiagnostic,
60
+ PipelineRunResult,
61
+ PreparedChainPayload,
62
+ ResidueRecord,
63
+ )
64
+ from .pipeline import detect, main
65
+ from .runtime import find_dssp_binary, require_dssp_binary
66
+ from .slicer import ProteinSlicer
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__": # pragma: no cover
6
+ main()
@@ -0,0 +1,75 @@
1
+ import numpy as np
2
+
3
+
4
+ class PCAAligner:
5
+ """
6
+ Use PCA to align the principal axis of a point cloud to the Z axis.
7
+ """
8
+ def __init__(self):
9
+ self.center = None
10
+ self.rotation_matrix = None
11
+ self.eigenvalues = None
12
+
13
+ def fit(self, points):
14
+ """
15
+ Compute the principal axes of the point cloud.
16
+
17
+ For best results, pass only C-alpha coordinates from beta-sheet residues.
18
+
19
+ Args:
20
+ points (np.ndarray): Coordinate array with shape ``(N, 3)``.
21
+ """
22
+ coords = np.array(points)
23
+ if coords.shape[0] < 3:
24
+ raise ValueError("At least three points are required for PCA alignment.")
25
+
26
+ # 1. Compute the centroid and center the coordinates.
27
+ self.center = np.mean(coords, axis=0)
28
+ centered_coords = coords - self.center
29
+
30
+ # 2. Build the covariance matrix.
31
+ # rowvar=False means columns are variables (x, y, z) and rows are samples.
32
+ cov_matrix = np.cov(centered_coords, rowvar=False)
33
+
34
+ # 3. Eigen decomposition.
35
+ # eigh is appropriate for symmetric matrices such as covariance matrices,
36
+ # and is usually more stable than eig here.
37
+ # eig_vals are returned in ascending order (min -> max), and each column
38
+ # of eig_vecs is a corresponding eigenvector.
39
+ eig_vals, eig_vecs = np.linalg.eigh(cov_matrix)
40
+
41
+ self.eigenvalues = eig_vals
42
+ self.rotation_matrix = eig_vecs
43
+
44
+ # Debug helper: variance contribution of each PCA axis.
45
+ # variance_ratio = eig_vals / np.sum(eig_vals)
46
+ # print(f"PCA Variance Ratios (X, Y, Z): {variance_ratio}")
47
+
48
+ def transform(self, points):
49
+ """
50
+ Project the point cloud into the PCA coordinate system.
51
+
52
+ After transformation, the Z axis corresponds to the dominant principal
53
+ axis of the original point cloud.
54
+ """
55
+ if self.rotation_matrix is None:
56
+ raise RuntimeError("Aligner is not fitted. Call fit() first.")
57
+
58
+ coords = np.array(points)
59
+
60
+ # 1. Translate to the centered coordinate system.
61
+ centered = coords - self.center
62
+
63
+ # 2. Rotate by projecting onto the eigenvector basis.
64
+ # result = centered @ eigenvectors
65
+ # Column 0 corresponds to the smallest eigenvalue direction (new X),
66
+ # column 1 to the middle eigenvalue direction (new Y), and column 2 to
67
+ # the largest eigenvalue direction (new Z, the principal axis).
68
+ transformed = np.dot(centered, self.rotation_matrix)
69
+
70
+ return transformed
71
+
72
+ def fit_transform(self, points):
73
+ """Convenience wrapper that runs ``fit`` followed by ``transform``."""
74
+ self.fit(points)
75
+ return self.transform(points)
@@ -0,0 +1,254 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ from scipy.spatial import cKDTree
5
+
6
+ from .config import AngleOrderRuleConfig
7
+ from .constants import (
8
+ EPSILON,
9
+ FULL_ROTATION_DEG,
10
+ MIN_ANGULAR_GAP_POINTS,
11
+ MIN_NEAREST_NEIGHBOR_POINTS,
12
+ MIN_SEQUENCE_ANGLE_ORDER_POINTS,
13
+ RAD_TO_DEG,
14
+ ROBUST_SIGMA_SCALE,
15
+ THREE_SIGMA_MULTIPLIER,
16
+ TOLERANCE,
17
+ )
18
+
19
+
20
+ def robust_center(points_xy: np.ndarray) -> tuple[float, float]:
21
+ """Return the mean center of a slice cross section."""
22
+ pts = np.asarray(points_xy, dtype=float)
23
+ center = np.mean(pts, axis=0)
24
+ return float(center[0]), float(center[1])
25
+
26
+
27
+ def radial_inlier_subset(
28
+ points_xy: np.ndarray,
29
+ *,
30
+ minimum_points: int,
31
+ ) -> tuple[np.ndarray, np.ndarray, float, float] | None:
32
+ """
33
+ Filter radial outliers and return the filtered points with a recentered origin.
34
+
35
+ The initial center is used only to identify gross radial outliers. Once those
36
+ points are removed, the center is recomputed on the retained subset so filtered
37
+ points do not keep biasing downstream angular statistics.
38
+ """
39
+ pts = np.asarray(points_xy, dtype=float)
40
+ if pts.ndim != 2 or pts.shape[0] < minimum_points:
41
+ return None
42
+
43
+ initial_center_x, initial_center_y = robust_center(pts)
44
+ delta_x = pts[:, 0] - initial_center_x
45
+ delta_y = pts[:, 1] - initial_center_y
46
+ radius = np.sqrt(delta_x * delta_x + delta_y * delta_y)
47
+
48
+ median_radius = float(np.median(radius))
49
+ mad_radius = float(np.median(np.abs(radius - median_radius)))
50
+ radius_sigma = float(ROBUST_SIGMA_SCALE * mad_radius)
51
+
52
+ if radius_sigma > TOLERANCE:
53
+ keep_mask = np.abs(radius - median_radius) <= (THREE_SIGMA_MULTIPLIER * radius_sigma)
54
+ filtered_points = pts[keep_mask]
55
+ else:
56
+ keep_mask = np.ones(pts.shape[0], dtype=bool)
57
+ filtered_points = pts
58
+
59
+ if filtered_points.shape[0] < minimum_points:
60
+ return None
61
+
62
+ center_x, center_y = robust_center(filtered_points)
63
+ return filtered_points, keep_mask, center_x, center_y
64
+
65
+
66
+ def collapse_points_by_strand(points: np.ndarray) -> np.ndarray:
67
+ """
68
+ Collapse multiple same-strand intersections within one slice to one point.
69
+
70
+ When a strand contributes more than one segment-plane intersection, we compare
71
+ barrel order at the strand level instead of the raw segment level. This keeps
72
+ ``seq_order`` closer to the scientific notion of strand order around the barrel.
73
+ """
74
+ pts = np.asarray(points, dtype=float)
75
+ if pts.ndim != 2 or pts.shape[1] < 4:
76
+ return pts
77
+
78
+ strand_ids = pts[:, 3].astype(int)
79
+ unique_strands = np.unique(strand_ids)
80
+ if unique_strands.size == pts.shape[0]:
81
+ return pts
82
+
83
+ collapsed = np.empty((unique_strands.size, 4), dtype=float)
84
+ for index, strand_id in enumerate(unique_strands):
85
+ strand_points = pts[strand_ids == strand_id]
86
+ collapsed[index, :2] = np.mean(strand_points[:, :2], axis=0)
87
+ collapsed[index, 2] = float(np.median(strand_points[:, 2]))
88
+ collapsed[index, 3] = float(strand_id)
89
+ return collapsed
90
+
91
+
92
+ def nearest_neighbor_spacing_stats(
93
+ points_xy: np.ndarray,
94
+ ) -> tuple[float, float, float, float] | None:
95
+ """Return robust nearest-neighbor spacing statistics for one slice."""
96
+ pts = np.asarray(points_xy, dtype=float)
97
+ if pts.ndim != 2 or pts.shape[0] < MIN_NEAREST_NEIGHBOR_POINTS:
98
+ return None
99
+
100
+ tree = cKDTree(pts)
101
+ distances, _ = tree.query(pts, k=2)
102
+ nearest_neighbor = distances[:, 1]
103
+
104
+ median_distance = float(np.median(nearest_neighbor))
105
+ mad_distance = float(np.median(np.abs(nearest_neighbor - median_distance)))
106
+ robust_sigma = float(ROBUST_SIGMA_SCALE * mad_distance)
107
+
108
+ if median_distance <= TOLERANCE:
109
+ robust_cv = float("inf")
110
+ else:
111
+ robust_cv = float(robust_sigma / median_distance)
112
+
113
+ if robust_sigma < EPSILON:
114
+ return median_distance, 0.0, 0.0, 1.0
115
+
116
+ inliers = np.abs(nearest_neighbor - median_distance) <= (
117
+ THREE_SIGMA_MULTIPLIER * robust_sigma
118
+ )
119
+ inlier_fraction = float(np.mean(inliers)) if len(inliers) else 0.0
120
+ return median_distance, robust_sigma, robust_cv, inlier_fraction
121
+
122
+
123
+ def angular_gap_stats(
124
+ points_xy: np.ndarray,
125
+ ) -> tuple[float, float, int, float, float] | None:
126
+ """Return max angular gap, coverage, used count, and center coordinates."""
127
+ filtered = radial_inlier_subset(points_xy, minimum_points=MIN_ANGULAR_GAP_POINTS)
128
+ if filtered is None:
129
+ return None
130
+ filtered_points, _, center_x, center_y = filtered
131
+
132
+ filtered_dx = filtered_points[:, 0] - center_x
133
+ filtered_dy = filtered_points[:, 1] - center_y
134
+ angle = np.sort(np.arctan2(filtered_dy, filtered_dx))
135
+
136
+ differences = np.diff(angle)
137
+ wrap_difference = (angle[0] + (2.0 * np.pi)) - angle[-1]
138
+ circular_differences = np.concatenate([differences, [wrap_difference]])
139
+
140
+ max_gap_radians = float(np.max(circular_differences))
141
+ max_gap_degrees = max_gap_radians * RAD_TO_DEG
142
+ coverage_degrees = FULL_ROTATION_DEG - max_gap_degrees
143
+
144
+ return (
145
+ max_gap_degrees,
146
+ coverage_degrees,
147
+ int(filtered_points.shape[0]),
148
+ center_x,
149
+ center_y,
150
+ )
151
+
152
+
153
+ def best_circular_affine_fit_cost(angle_pos_by_seq: np.ndarray) -> float:
154
+ """Return normalized sequence/angle-order mismatch under shift and reversal."""
155
+ positions = np.asarray(angle_pos_by_seq, dtype=int)
156
+ count = int(positions.size)
157
+ if count <= 1:
158
+ return 0.0
159
+
160
+ normalization = max(1.0, count / 2.0)
161
+ best_cost = float("inf")
162
+
163
+ base_order = np.arange(count, dtype=int)
164
+ for direction in (1, -1):
165
+ directed_order = (direction * base_order) % count
166
+ for shift in range(count):
167
+ predicted = (directed_order + shift) % count
168
+ distance = np.abs(positions - predicted)
169
+ distance = np.minimum(distance, count - distance)
170
+ candidate_cost = float(np.mean(distance)) / normalization
171
+ if candidate_cost < best_cost:
172
+ best_cost = candidate_cost
173
+
174
+ return float(min(1.0, max(0.0, best_cost)))
175
+
176
+
177
+ def sequence_angle_order_stats(
178
+ points: np.ndarray,
179
+ order_config: AngleOrderRuleConfig,
180
+ ) -> dict[str, float | int] | None:
181
+ """Compute sequence-order versus angle-order consistency on one slice."""
182
+ pts = collapse_points_by_strand(np.asarray(points, dtype=float))
183
+ if pts.ndim == 2 and pts.shape[1] >= 4:
184
+ pts = pts[:, :3]
185
+ if (
186
+ pts.ndim != 2
187
+ or pts.shape[0] < MIN_SEQUENCE_ANGLE_ORDER_POINTS
188
+ or pts.shape[1] < 3
189
+ ):
190
+ return None
191
+
192
+ filtered = radial_inlier_subset(pts[:, :2], minimum_points=MIN_SEQUENCE_ANGLE_ORDER_POINTS)
193
+ if filtered is None:
194
+ return None
195
+ filtered_xy, keep_mask, center_x, center_y = filtered
196
+
197
+ filtered_sequence_position = pts[keep_mask, 2]
198
+ count = int(filtered_xy.shape[0])
199
+
200
+ sequence_order = np.argsort(filtered_sequence_position, kind="mergesort")
201
+ filtered_dx = filtered_xy[:, 0] - center_x
202
+ filtered_dy = filtered_xy[:, 1] - center_y
203
+ angle = (np.arctan2(filtered_dy, filtered_dx) * RAD_TO_DEG) % FULL_ROTATION_DEG
204
+ angle_order = np.argsort(angle, kind="mergesort")
205
+
206
+ position_in_angle_order = np.empty(count, dtype=int)
207
+ position_in_angle_order[angle_order] = np.arange(count, dtype=int)
208
+ angle_position_by_sequence = position_in_angle_order[sequence_order]
209
+
210
+ local_steps = []
211
+ for index in range(count - 1):
212
+ step = abs(
213
+ int(angle_position_by_sequence[index + 1]) - int(angle_position_by_sequence[index])
214
+ )
215
+ local_steps.append(min(step, count - step))
216
+ local_steps_array = np.asarray(local_steps, dtype=float)
217
+
218
+ local_fraction = (
219
+ float(np.mean(local_steps_array <= float(order_config.local_step_max)))
220
+ if local_steps_array.size
221
+ else 1.0
222
+ )
223
+ mean_step = float(np.mean(local_steps_array)) if local_steps_array.size else 0.0
224
+ max_step = float(np.max(local_steps_array)) if local_steps_array.size else 0.0
225
+
226
+ mean_circular_distance = float(best_circular_affine_fit_cost(angle_position_by_sequence))
227
+
228
+ sequence_order_xy = filtered_xy[sequence_order]
229
+ euclidean_steps = np.diff(sequence_order_xy, axis=0)
230
+ sequence_neighbor_distance = np.sqrt(np.sum(euclidean_steps * euclidean_steps, axis=1))
231
+ if sequence_neighbor_distance.size:
232
+ median_neighbor_distance = float(np.median(sequence_neighbor_distance))
233
+ mad_neighbor_distance = float(
234
+ np.median(np.abs(sequence_neighbor_distance - median_neighbor_distance))
235
+ )
236
+ robust_sigma = float(ROBUST_SIGMA_SCALE * mad_neighbor_distance)
237
+ robust_cv = (
238
+ float(robust_sigma / median_neighbor_distance)
239
+ if median_neighbor_distance > TOLERANCE
240
+ else float("inf")
241
+ )
242
+ else:
243
+ median_neighbor_distance = 0.0
244
+ robust_cv = 0.0
245
+
246
+ return {
247
+ "order_used_n": int(count),
248
+ "order_local_frac": float(local_fraction),
249
+ "order_mean_step": float(mean_step),
250
+ "order_max_step": float(max_step),
251
+ "order_mean_circ_dist_norm": float(mean_circular_distance),
252
+ "seq_neighbor_dist_median": float(median_neighbor_distance),
253
+ "seq_neighbor_dist_robust_cv": float(robust_cv),
254
+ }