risk-network 0.0.14b2__py3-none-any.whl → 0.0.14b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/_neighborhoods/_domains.py +77 -26
- risk/_neighborhoods/_neighborhoods.py +45 -23
- risk/_network/_graph/_summary.py +18 -20
- {risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/METADATA +1 -1
- {risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/RECORD +9 -9
- {risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/WHEEL +0 -0
- {risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.14b2.dist-info → risk_network-0.0.14b3.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/_neighborhoods/_domains.py
CHANGED
@@ -54,37 +54,48 @@ def define_domains(
|
|
54
54
|
Raises:
|
55
55
|
ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
|
56
56
|
"""
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
# Validate args first; let user mistakes raise immediately
|
58
|
+
clustering_off = _validate_clustering_args(
|
59
|
+
linkage_criterion, linkage_method, linkage_metric, linkage_threshold
|
60
|
+
)
|
60
61
|
|
62
|
+
# If clustering is turned off, assign unique domains and skip
|
63
|
+
if clustering_off:
|
64
|
+
n_rows = len(top_annotation)
|
65
|
+
logger.warning("Clustering is turned off. Skipping clustering.")
|
66
|
+
top_annotation["domain"] = range(1, n_rows + 1)
|
67
|
+
else:
|
61
68
|
# Transpose the matrix to cluster annotations
|
62
69
|
m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
|
63
70
|
# Safeguard the matrix by replacing NaN, Inf, and -Inf values
|
64
71
|
m = _safeguard_matrix(m)
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
72
|
+
try:
|
73
|
+
# Optimize silhouette score across different linkage methods and distance metrics
|
74
|
+
(
|
75
|
+
best_linkage,
|
76
|
+
best_metric,
|
77
|
+
best_threshold,
|
78
|
+
) = _optimize_silhouette_across_linkage_and_metrics(
|
79
|
+
m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
|
80
|
+
)
|
81
|
+
# Perform hierarchical clustering
|
82
|
+
Z = linkage(m, method=best_linkage, metric=best_metric)
|
83
|
+
logger.warning(
|
84
|
+
f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
|
85
|
+
)
|
86
|
+
# Calculate the optimal threshold for clustering
|
87
|
+
max_d_optimal = np.max(Z[:, 2]) * best_threshold
|
88
|
+
# Assign domains to the annotation matrix
|
89
|
+
domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
|
90
|
+
top_annotation["domain"] = 0
|
91
|
+
top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
|
92
|
+
except (LinAlgError, ValueError):
|
93
|
+
# Numerical errors or degenerate input are handled gracefully (not user error)
|
94
|
+
n_rows = len(top_annotation)
|
95
|
+
logger.error(
|
96
|
+
"Clustering failed due to numerical or data degeneracy. Assigning unique domains."
|
97
|
+
)
|
98
|
+
top_annotation["domain"] = range(1, n_rows + 1)
|
88
99
|
|
89
100
|
# Create DataFrames to store domain information
|
90
101
|
node_to_significance = pd.DataFrame(
|
@@ -184,6 +195,46 @@ def trim_domains(
|
|
184
195
|
return valid_domains, valid_trimmed_domains_matrix
|
185
196
|
|
186
197
|
|
198
|
+
def _validate_clustering_args(
|
199
|
+
linkage_criterion: str,
|
200
|
+
linkage_method: str,
|
201
|
+
linkage_metric: str,
|
202
|
+
linkage_threshold: Union[float, str],
|
203
|
+
) -> bool:
|
204
|
+
"""
|
205
|
+
Validate user-provided clustering arguments.
|
206
|
+
|
207
|
+
Returns:
|
208
|
+
bool: True if clustering is turned off (criterion == 'off'); False otherwise.
|
209
|
+
|
210
|
+
Raises:
|
211
|
+
ValueError: If any argument is invalid (user error).
|
212
|
+
"""
|
213
|
+
# Allow opting out of clustering without raising
|
214
|
+
if linkage_criterion == "off":
|
215
|
+
return True
|
216
|
+
# Validate linkage method (allow "auto")
|
217
|
+
if linkage_method != "auto" and linkage_method not in LINKAGE_METHODS:
|
218
|
+
raise ValueError(
|
219
|
+
f"Invalid linkage_method '{linkage_method}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METHODS)}"
|
220
|
+
)
|
221
|
+
# Validate linkage metric (allow "auto")
|
222
|
+
if linkage_metric != "auto" and linkage_metric not in LINKAGE_METRICS:
|
223
|
+
raise ValueError(
|
224
|
+
f"Invalid linkage_metric '{linkage_metric}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METRICS)}"
|
225
|
+
)
|
226
|
+
# Validate linkage threshold (allow "auto"; otherwise must be float in (0, 1])
|
227
|
+
if linkage_threshold != "auto":
|
228
|
+
try:
|
229
|
+
lt = float(linkage_threshold)
|
230
|
+
except (TypeError, ValueError):
|
231
|
+
raise ValueError("linkage_threshold must be 'auto' or a float in the interval (0, 1].")
|
232
|
+
if not (0.0 < lt <= 1.0):
|
233
|
+
raise ValueError(f"linkage_threshold must be within (0, 1]. Received: {lt}")
|
234
|
+
|
235
|
+
return False
|
236
|
+
|
237
|
+
|
187
238
|
def _safeguard_matrix(matrix: np.ndarray) -> np.ndarray:
|
188
239
|
"""
|
189
240
|
Safeguard the matrix by replacing NaN, Inf, and -Inf values.
|
@@ -394,34 +394,33 @@ def _prune_neighbors(
|
|
394
394
|
# Identify indices with non-zero rows in the binary significance matrix
|
395
395
|
non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
|
396
396
|
median_distances = []
|
397
|
+
distance_lookup = {}
|
397
398
|
for node in non_zero_indices:
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
399
|
+
dist = _median_distance_to_significant_neighbors(
|
400
|
+
node, network, significant_binary_significance_matrix
|
401
|
+
)
|
402
|
+
if dist is not None:
|
403
|
+
median_distances.append(dist)
|
404
|
+
distance_lookup[node] = dist
|
405
|
+
|
406
|
+
if not median_distances:
|
407
|
+
logger.warning("No significant neighbors found for pruning.")
|
408
|
+
significant_significance_matrix = np.where(
|
409
|
+
significant_binary_significance_matrix == 1, significance_matrix, 0
|
410
|
+
)
|
411
|
+
return (
|
412
|
+
significance_matrix,
|
413
|
+
significant_binary_significance_matrix,
|
414
|
+
significant_significance_matrix,
|
415
|
+
)
|
408
416
|
|
409
417
|
# Calculate the distance threshold value based on rank
|
410
418
|
distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
|
411
419
|
# Prune nodes that are outliers based on the distance threshold
|
412
|
-
for
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
if significant_binary_significance_matrix[n].sum() != 0
|
417
|
-
]
|
418
|
-
if neighbors:
|
419
|
-
median_distance = np.median(
|
420
|
-
[_get_euclidean_distance(row_index, n, network) for n in neighbors]
|
421
|
-
)
|
422
|
-
if median_distance >= distance_threshold_value:
|
423
|
-
significance_matrix[row_index] = 0
|
424
|
-
significant_binary_significance_matrix[row_index] = 0
|
420
|
+
for node, dist in distance_lookup.items():
|
421
|
+
if dist >= distance_threshold_value:
|
422
|
+
significance_matrix[node] = 0
|
423
|
+
significant_binary_significance_matrix[node] = 0
|
425
424
|
|
426
425
|
# Create a matrix where non-significant entries are set to zero
|
427
426
|
significant_significance_matrix = np.where(
|
@@ -435,6 +434,29 @@ def _prune_neighbors(
|
|
435
434
|
)
|
436
435
|
|
437
436
|
|
437
|
+
def _median_distance_to_significant_neighbors(
|
438
|
+
node, network, significance_mask
|
439
|
+
) -> Union[float, None]:
|
440
|
+
"""
|
441
|
+
Calculate the median distance from a node to its significant neighbors.
|
442
|
+
|
443
|
+
Args:
|
444
|
+
node (Any): The node for which the median distance is being calculated.
|
445
|
+
network (nx.Graph): The network graph containing the nodes.
|
446
|
+
significance_mask (np.ndarray): Binary matrix indicating significant nodes.
|
447
|
+
|
448
|
+
Returns:
|
449
|
+
Union[float, None]: The median distance to significant neighbors, or None if no significant neighbors exist.
|
450
|
+
"""
|
451
|
+
neighbors = [n for n in network.neighbors(node) if significance_mask[n].sum() != 0]
|
452
|
+
if not neighbors:
|
453
|
+
return None
|
454
|
+
# Calculate distances to significant neighbors
|
455
|
+
distances = [_get_euclidean_distance(node, n, network) for n in neighbors]
|
456
|
+
|
457
|
+
return np.median(distances)
|
458
|
+
|
459
|
+
|
438
460
|
def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
|
439
461
|
"""
|
440
462
|
Calculate the Euclidean distance between two nodes in the network.
|
risk/_network/_graph/_summary.py
CHANGED
@@ -84,7 +84,7 @@ class Summary:
|
|
84
84
|
|
85
85
|
Returns:
|
86
86
|
pd.DataFrame: Processed DataFrame containing significance scores, p-values, q-values,
|
87
|
-
and annotation
|
87
|
+
and matched annotation members information.
|
88
88
|
"""
|
89
89
|
log_header("Loading analysis summary")
|
90
90
|
# Calculate significance and depletion q-values from p-value matrices in annotation
|
@@ -109,9 +109,9 @@ class Summary:
|
|
109
109
|
# Add minimum p-values and q-values to DataFrame
|
110
110
|
results[
|
111
111
|
[
|
112
|
-
"Enrichment P-
|
112
|
+
"Enrichment P-value",
|
113
113
|
"Enrichment Q-value",
|
114
|
-
"Depletion P-
|
114
|
+
"Depletion P-value",
|
115
115
|
"Depletion Q-value",
|
116
116
|
]
|
117
117
|
] = results.apply(
|
@@ -126,13 +126,13 @@ class Summary:
|
|
126
126
|
axis=1,
|
127
127
|
result_type="expand",
|
128
128
|
)
|
129
|
-
# Add annotation members and their counts
|
130
|
-
results["
|
129
|
+
# Add matched annotation members and their counts
|
130
|
+
results["Matched Members"] = results["Annotation"].apply(
|
131
131
|
lambda desc: self._get_annotation_members(desc)
|
132
132
|
)
|
133
|
-
results["
|
134
|
-
"
|
135
|
-
|
133
|
+
results["Matched Count"] = results["Matched Members"].apply(
|
134
|
+
lambda x: len(x.split(";")) if x else 0
|
135
|
+
)
|
136
136
|
|
137
137
|
# Reorder columns and drop rows with NaN values
|
138
138
|
results = (
|
@@ -140,12 +140,12 @@ class Summary:
|
|
140
140
|
[
|
141
141
|
"Domain ID",
|
142
142
|
"Annotation",
|
143
|
-
"
|
144
|
-
"
|
143
|
+
"Matched Members",
|
144
|
+
"Matched Count",
|
145
145
|
"Summed Significance Score",
|
146
|
-
"Enrichment P-
|
146
|
+
"Enrichment P-value",
|
147
147
|
"Enrichment Q-value",
|
148
|
-
"Depletion P-
|
148
|
+
"Depletion P-value",
|
149
149
|
"Depletion Q-value",
|
150
150
|
]
|
151
151
|
]
|
@@ -159,20 +159,18 @@ class Summary:
|
|
159
159
|
results = pd.merge(ordered_annotation, results, on="Annotation", how="left").fillna(
|
160
160
|
{
|
161
161
|
"Domain ID": -1,
|
162
|
-
"
|
163
|
-
"
|
162
|
+
"Matched Members": "",
|
163
|
+
"Matched Count": 0,
|
164
164
|
"Summed Significance Score": 0.0,
|
165
|
-
"Enrichment P-
|
165
|
+
"Enrichment P-value": 1.0,
|
166
166
|
"Enrichment Q-value": 1.0,
|
167
|
-
"Depletion P-
|
167
|
+
"Depletion P-value": 1.0,
|
168
168
|
"Depletion Q-value": 1.0,
|
169
169
|
}
|
170
170
|
)
|
171
|
-
# Convert "Domain ID" and "
|
171
|
+
# Convert "Domain ID" and "Matched Count" to integers
|
172
172
|
results["Domain ID"] = results["Domain ID"].astype(int)
|
173
|
-
results["
|
174
|
-
"Annotation Members in Network Count"
|
175
|
-
].astype(int)
|
173
|
+
results["Matched Count"] = results["Matched Count"].astype(int)
|
176
174
|
|
177
175
|
return results
|
178
176
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=14fTdsWCVA1DS1M7axwUvQzyssu4dRwwhdLdnN-5h1M,143
|
2
2
|
risk/_risk.py,sha256=VULCdM41BlWKM1ou4Qc579ffZ9dMZkfhAwKYgbaEeKM,1054
|
3
3
|
risk/_annotation/__init__.py,sha256=zr7w1DHkmvrkKFGKdPhrcvZHV-xsfd5TZOaWtFiP4Dc,164
|
4
4
|
risk/_annotation/_annotation.py,sha256=03vcnkdi4HGH5UUyokUyOdyyjXOLoKSmLFuK7VAl41c,15174
|
@@ -10,8 +10,8 @@ risk/_log/_parameters.py,sha256=8FkeeBtULDFVw3UijLArK-G3OIjy6YXyRXmPPckK7fU,5893
|
|
10
10
|
risk/_neighborhoods/__init__.py,sha256=eKwjpEUKSUmAirRZ_qPTVF7MLkvhCn_fulPVq158wM8,185
|
11
11
|
risk/_neighborhoods/_api.py,sha256=s1f4d_nEPWc66KDmOUUpRNXzp6dfoevw45ewOg9eMNo,23298
|
12
12
|
risk/_neighborhoods/_community.py,sha256=Tr-EHO91EWbMmNr_z21UCngiqWOlWIqcjwBig_VXI8c,17850
|
13
|
-
risk/_neighborhoods/_domains.py,sha256=
|
14
|
-
risk/_neighborhoods/_neighborhoods.py,sha256=
|
13
|
+
risk/_neighborhoods/_domains.py,sha256=Q3MUWW9KjuERpxs4H1dNFhalDjdatMkWSnB12BerUDU,16580
|
14
|
+
risk/_neighborhoods/_neighborhoods.py,sha256=9hpQCYG0d9fZLYj-fVACgLJBtw3dW8C-0YbE2OWuX-M,21436
|
15
15
|
risk/_neighborhoods/_stats/__init__.py,sha256=nL83A3unzpCTzRDPanCiqU1RsKPJJNDe46S9igoe3pg,264
|
16
16
|
risk/_neighborhoods/_stats/_tests.py,sha256=-ioHdyrsgW63YnypKFpanatauuKrF3LT7aMZ3b6otrU,12091
|
17
17
|
risk/_neighborhoods/_stats/_permutation/__init__.py,sha256=nfTaW29CK8OZCdFnpMVlHnFaqr1E4AZp6mvhlUazHXM,140
|
@@ -23,7 +23,7 @@ risk/_network/_graph/__init__.py,sha256=SFgxgxUiZK4vvw6bdQ04DSMXEr8xjMaQV-Wne6wA
|
|
23
23
|
risk/_network/_graph/_api.py,sha256=sp3_mLJDP_xQexYBjyM17iyzLb2oGmiC050kcw-jVho,8474
|
24
24
|
risk/_network/_graph/_graph.py,sha256=x2EWT_ZVwxh7m9a01yG4WMdmAxBxiaxX3CvkqP9QAXE,12486
|
25
25
|
risk/_network/_graph/_stats.py,sha256=6mxZkuL6LJlwKDsBbP22DAVkNUEhq-JZwYMKhFKD08k,7359
|
26
|
-
risk/_network/_graph/_summary.py,sha256=
|
26
|
+
risk/_network/_graph/_summary.py,sha256=I8FhMdpawGbvCJHPpsyvbtM7Qa0xXzwKvjnX9N8HSm8,10141
|
27
27
|
risk/_network/_plotter/__init__.py,sha256=qFRtQKSBGIqmUGwmA7VPL7hTHBb9yvRIt0nLISXnwkY,84
|
28
28
|
risk/_network/_plotter/_api.py,sha256=OaV1CCRGsz98wEEzyEhaq2CqEuZh6t2qS7g_rY6HJJs,1727
|
29
29
|
risk/_network/_plotter/_canvas.py,sha256=H7rPz4Gv7ED3bDHMif4cf2usdU4ifmxzXeug5A_no68,13599
|
@@ -34,8 +34,8 @@ risk/_network/_plotter/_plotter.py,sha256=F2hw-spUdsXjvuG36o0YFR3Pnd-CZOHYUq4vW0
|
|
34
34
|
risk/_network/_plotter/_utils/__init__.py,sha256=JXgjKiBWvXx0X2IeFnrOh5YZQGQoELbhJZ0Zh2mFEOo,211
|
35
35
|
risk/_network/_plotter/_utils/_colors.py,sha256=JCliSvz8_-TsjilaRHSEsqdXFBUYlzhXKOSRGdCm9Kw,19177
|
36
36
|
risk/_network/_plotter/_utils/_layout.py,sha256=GyGLc2U1WWUVL1Te9uPi_CLqlW_E4TImXRAL5TeA5D8,3633
|
37
|
-
risk_network-0.0.
|
38
|
-
risk_network-0.0.
|
39
|
-
risk_network-0.0.
|
40
|
-
risk_network-0.0.
|
41
|
-
risk_network-0.0.
|
37
|
+
risk_network-0.0.14b3.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
38
|
+
risk_network-0.0.14b3.dist-info/METADATA,sha256=SG8HbB0TBqNd_zgtKV1Ri23RoBIRy_poTAfeN9ZaSBA,6853
|
39
|
+
risk_network-0.0.14b3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
40
|
+
risk_network-0.0.14b3.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
41
|
+
risk_network-0.0.14b3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|