risk-network 0.0.8b26__tar.gz → 0.0.9b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/PKG-INFO +1 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/__init__.py +1 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/annotations/annotations.py +39 -38
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/log/__init__.py +1 -1
- risk_network-0.0.8b26/risk/log/config.py → risk_network-0.0.9b1/risk/log/console.py +2 -2
- risk_network-0.0.9b1/risk/log/enrichment.py +18 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/log/params.py +1 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/domains.py +15 -15
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/neighborhoods.py +101 -89
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/graph.py +25 -25
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/canvas.py +3 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/contour.py +1 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/labels.py +1 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/network.py +28 -28
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/utils/color.py +27 -27
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/risk.py +20 -18
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/stats.py +13 -13
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/PKG-INFO +1 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/SOURCES.txt +2 -1
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/LICENSE +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/MANIFEST.in +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/README.md +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/pyproject.toml +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/annotations/__init__.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/annotations/io.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/constants.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/__init__.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/community.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/__init__.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/geometry.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/io.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/__init__.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/plotter.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/utils/layout.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/__init__.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/hypergeom.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/permutation/__init__.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/permutation/permutation.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/permutation/test_functions.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/poisson.py +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/dependency_links.txt +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/requires.txt +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/top_level.txt +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/setup.cfg +0 -0
- {risk_network-0.0.8b26 → risk_network-0.0.9b1}/setup.py +0 -0
@@ -83,69 +83,69 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
|
|
83
83
|
def define_top_annotations(
|
84
84
|
network: nx.Graph,
|
85
85
|
ordered_annotation_labels: List[str],
|
86
|
-
|
87
|
-
|
88
|
-
|
86
|
+
neighborhood_significance_sums: List[int],
|
87
|
+
significant_significance_matrix: np.ndarray,
|
88
|
+
significant_binary_significance_matrix: np.ndarray,
|
89
89
|
min_cluster_size: int = 5,
|
90
90
|
max_cluster_size: int = 1000,
|
91
91
|
) -> pd.DataFrame:
|
92
|
-
"""Define top annotations based on neighborhood
|
92
|
+
"""Define top annotations based on neighborhood significance sums and binary significance matrix.
|
93
93
|
|
94
94
|
Args:
|
95
95
|
network (NetworkX graph): The network graph.
|
96
96
|
ordered_annotation_labels (list of str): List of ordered annotation labels.
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
neighborhood_significance_sums (list of int): List of neighborhood significance sums.
|
98
|
+
significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
|
99
|
+
significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
|
100
100
|
min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
|
101
101
|
max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
|
102
102
|
|
103
103
|
Returns:
|
104
104
|
pd.DataFrame: DataFrame with top annotations and their properties.
|
105
105
|
"""
|
106
|
-
# Sum the columns of the significant
|
107
|
-
|
108
|
-
# Create DataFrame to store annotations, their neighborhood
|
109
|
-
|
106
|
+
# Sum the columns of the significant significance matrix (positive floating point values)
|
107
|
+
significant_significance_scores = significant_significance_matrix.sum(axis=0)
|
108
|
+
# Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
|
109
|
+
annotations_significance_matrix = pd.DataFrame(
|
110
110
|
{
|
111
111
|
"id": range(len(ordered_annotation_labels)),
|
112
112
|
"full_terms": ordered_annotation_labels,
|
113
|
-
"
|
114
|
-
"
|
113
|
+
"significant_neighborhood_significance_sums": neighborhood_significance_sums,
|
114
|
+
"significant_significance_score": significant_significance_scores,
|
115
115
|
}
|
116
116
|
)
|
117
|
-
|
117
|
+
annotations_significance_matrix["significant_annotations"] = False
|
118
118
|
# Apply size constraints to identify potential significant annotations
|
119
|
-
|
119
|
+
annotations_significance_matrix.loc[
|
120
120
|
(
|
121
|
-
|
121
|
+
annotations_significance_matrix["significant_neighborhood_significance_sums"]
|
122
122
|
>= min_cluster_size
|
123
123
|
)
|
124
124
|
& (
|
125
|
-
|
125
|
+
annotations_significance_matrix["significant_neighborhood_significance_sums"]
|
126
126
|
<= max_cluster_size
|
127
127
|
),
|
128
128
|
"significant_annotations",
|
129
129
|
] = True
|
130
130
|
# Initialize columns for connected components analysis
|
131
|
-
|
132
|
-
|
133
|
-
|
131
|
+
annotations_significance_matrix["num_connected_components"] = 0
|
132
|
+
annotations_significance_matrix["size_connected_components"] = None
|
133
|
+
annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
|
134
134
|
"size_connected_components"
|
135
135
|
].astype(object)
|
136
|
-
|
136
|
+
annotations_significance_matrix["num_large_connected_components"] = 0
|
137
137
|
|
138
|
-
for attribute in
|
139
|
-
|
138
|
+
for attribute in annotations_significance_matrix.index.values[
|
139
|
+
annotations_significance_matrix["significant_annotations"]
|
140
140
|
]:
|
141
|
-
# Identify
|
142
|
-
|
143
|
-
compress(list(network),
|
141
|
+
# Identify significant neighborhoods based on the binary significance matrix
|
142
|
+
significant_neighborhoods = list(
|
143
|
+
compress(list(network), significant_binary_significance_matrix[:, attribute])
|
144
144
|
)
|
145
|
-
|
146
|
-
# Analyze connected components within the
|
145
|
+
significant_network = nx.subgraph(network, significant_neighborhoods)
|
146
|
+
# Analyze connected components within the significant subnetwork
|
147
147
|
connected_components = sorted(
|
148
|
-
nx.connected_components(
|
148
|
+
nx.connected_components(significant_network), key=len, reverse=True
|
149
149
|
)
|
150
150
|
size_connected_components = np.array([len(c) for c in connected_components])
|
151
151
|
|
@@ -159,23 +159,24 @@ def define_top_annotations(
|
|
159
159
|
num_large_connected_components = len(filtered_size_connected_components)
|
160
160
|
|
161
161
|
# Assign the number of connected components
|
162
|
-
|
162
|
+
annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
|
163
163
|
num_connected_components
|
164
164
|
)
|
165
165
|
# Filter out attributes with more than one connected component
|
166
|
-
|
167
|
-
|
166
|
+
annotations_significance_matrix.loc[
|
167
|
+
annotations_significance_matrix["num_connected_components"] > 1,
|
168
|
+
"significant_annotations",
|
168
169
|
] = False
|
169
170
|
# Assign the number of large connected components
|
170
|
-
|
171
|
+
annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
|
171
172
|
num_large_connected_components
|
172
173
|
)
|
173
174
|
# Assign the size of connected components, ensuring it is always a list
|
174
|
-
|
175
|
+
annotations_significance_matrix.at[attribute, "size_connected_components"] = (
|
175
176
|
filtered_size_connected_components.tolist()
|
176
177
|
)
|
177
178
|
|
178
|
-
return
|
179
|
+
return annotations_significance_matrix
|
179
180
|
|
180
181
|
|
181
182
|
def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
|
@@ -184,16 +185,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
|
|
184
185
|
|
185
186
|
Args:
|
186
187
|
words_column (pd.Series): A pandas Series containing strings to process.
|
187
|
-
scores_column (pd.Series): A pandas Series containing
|
188
|
+
scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
|
188
189
|
|
189
190
|
Returns:
|
190
|
-
str: A coherent description formed from the most frequent and significant words, weighed by
|
191
|
+
str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
|
191
192
|
"""
|
192
193
|
# Handle case where all scores are the same
|
193
194
|
if scores_column.max() == scores_column.min():
|
194
195
|
normalized_scores = pd.Series([1] * len(scores_column))
|
195
196
|
else:
|
196
|
-
# Normalize the
|
197
|
+
# Normalize the significance scores to be between 0 and 1
|
197
198
|
normalized_scores = (scores_column - scores_column.min()) / (
|
198
199
|
scores_column.max() - scores_column.min()
|
199
200
|
)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
"""
|
2
|
+
risk/log/enrichment
|
3
|
+
~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import csv
|
7
|
+
import json
|
8
|
+
import warnings
|
9
|
+
from datetime import datetime
|
10
|
+
from functools import wraps
|
11
|
+
from typing import Any, Dict
|
12
|
+
|
13
|
+
import numpy as np
|
14
|
+
|
15
|
+
from .console import logger, log_header
|
16
|
+
|
17
|
+
# Suppress all warnings - this is to resolve warnings from multiprocessing
|
18
|
+
warnings.filterwarnings("ignore")
|
@@ -20,17 +20,17 @@ from risk.log import logger
|
|
20
20
|
|
21
21
|
def define_domains(
|
22
22
|
top_annotations: pd.DataFrame,
|
23
|
-
|
23
|
+
significant_neighborhoods_significance: np.ndarray,
|
24
24
|
linkage_criterion: str,
|
25
25
|
linkage_method: str,
|
26
26
|
linkage_metric: str,
|
27
27
|
) -> pd.DataFrame:
|
28
|
-
"""Define domains and assign nodes to these domains based on their
|
28
|
+
"""Define domains and assign nodes to these domains based on their significance scores and clustering,
|
29
29
|
handling errors by assigning unique domains when clustering fails.
|
30
30
|
|
31
31
|
Args:
|
32
32
|
top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
|
33
|
-
|
33
|
+
significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
|
34
34
|
linkage_criterion (str): The clustering criterion for defining groups.
|
35
35
|
linkage_method (str): The linkage method for clustering.
|
36
36
|
linkage_metric (str): The linkage metric for clustering.
|
@@ -40,7 +40,7 @@ def define_domains(
|
|
40
40
|
"""
|
41
41
|
try:
|
42
42
|
# Transpose the matrix to cluster annotations
|
43
|
-
m =
|
43
|
+
m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
|
44
44
|
best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
|
45
45
|
m, linkage_criterion, linkage_method, linkage_metric
|
46
46
|
)
|
@@ -65,13 +65,13 @@ def define_domains(
|
|
65
65
|
top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
|
66
66
|
|
67
67
|
# Create DataFrames to store domain information
|
68
|
-
|
69
|
-
data=
|
68
|
+
node_to_significance = pd.DataFrame(
|
69
|
+
data=significant_neighborhoods_significance,
|
70
70
|
columns=[top_annotations.index.values, top_annotations["domain"]],
|
71
71
|
)
|
72
|
-
node_to_domain =
|
72
|
+
node_to_domain = node_to_significance.groupby(level="domain", axis=1).sum()
|
73
73
|
|
74
|
-
# Find the maximum
|
74
|
+
# Find the maximum significance score for each node
|
75
75
|
t_max = node_to_domain.loc[:, 1:].max(axis=1)
|
76
76
|
t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
|
77
77
|
t_idxmax[t_max == 0] = 0
|
@@ -119,27 +119,27 @@ def trim_domains_and_top_annotations(
|
|
119
119
|
top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
|
120
120
|
domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
|
121
121
|
|
122
|
-
# Normalize "num
|
122
|
+
# Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
|
123
123
|
top_annotations["normalized_value"] = top_annotations.groupby("domain")[
|
124
|
-
"
|
124
|
+
"significant_neighborhood_significance_sums"
|
125
125
|
].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
|
126
|
-
# Modify the lambda function to pass both full_terms and
|
126
|
+
# Modify the lambda function to pass both full_terms and significant_significance_score
|
127
127
|
top_annotations["combined_terms"] = top_annotations.apply(
|
128
128
|
lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
|
129
129
|
)
|
130
130
|
|
131
|
-
# Perform the groupby operation while retaining the other columns and adding the weighting with
|
131
|
+
# Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
|
132
132
|
domain_labels = (
|
133
133
|
top_annotations.groupby("domain")
|
134
134
|
.agg(
|
135
135
|
full_terms=("full_terms", lambda x: list(x)),
|
136
|
-
|
136
|
+
significance_scores=("significant_significance_score", lambda x: list(x)),
|
137
137
|
)
|
138
138
|
.reset_index()
|
139
139
|
)
|
140
140
|
domain_labels["combined_terms"] = domain_labels.apply(
|
141
141
|
lambda row: get_weighted_description(
|
142
|
-
pd.Series(row["full_terms"]), pd.Series(row["
|
142
|
+
pd.Series(row["full_terms"]), pd.Series(row["significance_scores"])
|
143
143
|
),
|
144
144
|
axis=1,
|
145
145
|
)
|
@@ -150,7 +150,7 @@ def trim_domains_and_top_annotations(
|
|
150
150
|
"domain": "id",
|
151
151
|
"combined_terms": "normalized_description",
|
152
152
|
"full_terms": "full_descriptions",
|
153
|
-
"
|
153
|
+
"significance_scores": "significance_scores",
|
154
154
|
}
|
155
155
|
).set_index("id")
|
156
156
|
|