risk-network 0.0.9b9__py3-none-any.whl → 0.0.9b11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.9"
10
+ __version__ = "0.0.9-beta.11"
@@ -4,10 +4,12 @@ risk/neighborhoods/community
4
4
  """
5
5
 
6
6
  import community as community_louvain
7
+ import igraph as ig
8
+ import markov_clustering as mc
7
9
  import networkx as nx
8
10
  import numpy as np
9
- import markov_clustering as mc
10
- from networkx.algorithms.community import asyn_lpa_communities, greedy_modularity_communities
11
+ from leidenalg import find_partition, RBConfigurationVertexPartition
12
+ from networkx.algorithms.community import greedy_modularity_communities
11
13
 
12
14
 
13
15
  def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
@@ -21,18 +23,19 @@ def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
21
23
  """
22
24
  # Detect communities using the Greedy Modularity method
23
25
  communities = greedy_modularity_communities(network)
26
+ # Get the list of nodes in the original NetworkX graph
27
+ nodes = list(network.nodes())
28
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
24
29
  # Create a binary neighborhood matrix
25
- n_nodes = network.number_of_nodes()
30
+ n_nodes = len(nodes)
26
31
  neighborhoods = np.zeros((n_nodes, n_nodes), dtype=int)
27
- # Create a mapping from node to index in the matrix
28
- node_index = {node: i for i, node in enumerate(network.nodes())}
29
32
  # Fill in the neighborhood matrix for nodes in the same community
30
33
  for community in communities:
31
34
  # Iterate through all pairs of nodes in the same community
32
35
  for node_i in community:
33
- idx_i = node_index[node_i]
34
36
  for node_j in community:
35
- idx_j = node_index[node_j]
37
+ idx_i = node_index_map[node_i]
38
+ idx_j = node_index_map[node_j]
36
39
  # Set them as neighbors (1) in the binary matrix
37
40
  neighborhoods[idx_i, idx_j] = 1
38
41
 
@@ -46,21 +49,61 @@ def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
46
49
  network (nx.Graph): The network graph.
47
50
 
48
51
  Returns:
49
- np.ndarray: Binary neighborhood matrix on Label Propagation.
52
+ np.ndarray: A binary neighborhood matrix on Label Propagation.
50
53
  """
51
54
  # Apply Label Propagation for community detection
52
55
  communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
56
+ # Get the list of nodes in the network
57
+ nodes = list(network.nodes())
58
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
53
59
  # Create a binary neighborhood matrix
54
- num_nodes = network.number_of_nodes()
60
+ num_nodes = len(nodes)
55
61
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
56
- # Create a mapping from node to index in the matrix
57
- node_index = {node: i for i, node in enumerate(network.nodes())}
58
- # Assign neighborhoods based on community labels
62
+ # Assign neighborhoods based on community labels using the mapped indices
59
63
  for community in communities:
60
64
  for node_i in community:
61
- idx_i = node_index[node_i]
62
65
  for node_j in community:
63
- idx_j = node_index[node_j]
66
+ idx_i = node_index_map[node_i]
67
+ idx_j = node_index_map[node_j]
68
+ neighborhoods[idx_i, idx_j] = 1
69
+
70
+ return neighborhoods
71
+
72
+
73
+ def calculate_leiden_neighborhoods(
74
+ network: nx.Graph, resolution: float = 1.0, random_seed: int = 888
75
+ ) -> np.ndarray:
76
+ """Calculate neighborhoods using the Leiden method.
77
+
78
+ Args:
79
+ network (nx.Graph): The network graph.
80
+ resolution (float, optional): Resolution parameter for the Leiden method. Defaults to 1.0.
81
+ random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
82
+
83
+ Returns:
84
+ np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
85
+ """
86
+ # Convert NetworkX graph to iGraph
87
+ igraph_network = ig.Graph.from_networkx(network)
88
+ # Apply Leiden algorithm using RBConfigurationVertexPartition, which supports resolution
89
+ partition = find_partition(
90
+ igraph_network,
91
+ partition_type=RBConfigurationVertexPartition,
92
+ resolution_parameter=resolution,
93
+ seed=random_seed,
94
+ )
95
+ # Get the list of nodes in the original NetworkX graph
96
+ nodes = list(network.nodes())
97
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
98
+ # Create a binary neighborhood matrix
99
+ num_nodes = len(nodes)
100
+ neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
101
+ # Assign neighborhoods based on community partitions using the mapped indices
102
+ for community in partition:
103
+ for node_i in community:
104
+ for node_j in community:
105
+ idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
106
+ idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
64
107
  neighborhoods[idx_i, idx_j] = 1
65
108
 
66
109
  return neighborhoods
@@ -77,28 +120,29 @@ def calculate_louvain_neighborhoods(
77
120
  random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
78
121
 
79
122
  Returns:
80
- np.ndarray: Binary neighborhood matrix on the Louvain method.
123
+ np.ndarray: A binary neighborhood matrix on the Louvain method.
81
124
  """
82
125
  # Apply Louvain method to partition the network
83
126
  partition = community_louvain.best_partition(
84
127
  network, resolution=resolution, random_state=random_seed
85
128
  )
129
+ # Get the list of nodes in the network and create a mapping to indices
130
+ nodes = list(network.nodes())
131
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
86
132
  # Create a binary neighborhood matrix
87
- num_nodes = network.number_of_nodes()
133
+ num_nodes = len(nodes)
88
134
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
89
- # Create a mapping from node to index in the matrix
90
- node_index = {node: i for i, node in enumerate(network.nodes())}
91
135
  # Group nodes by community
92
136
  community_groups = {}
93
137
  for node, community in partition.items():
94
138
  community_groups.setdefault(community, []).append(node)
95
139
 
96
- # Assign neighborhoods based on community partitions
140
+ # Assign neighborhoods based on community partitions using the mapped indices
97
141
  for community, nodes in community_groups.items():
98
142
  for node_i in nodes:
99
- idx_i = node_index[node_i]
100
143
  for node_j in nodes:
101
- idx_j = node_index[node_j]
144
+ idx_i = node_index_map[node_i]
145
+ idx_j = node_index_map[node_j]
102
146
  neighborhoods[idx_i, idx_j] = 1
103
147
 
104
148
  return neighborhoods
@@ -111,53 +155,76 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
111
155
  network (nx.Graph): The network graph.
112
156
 
113
157
  Returns:
114
- np.ndarray: Binary neighborhood matrix on Markov Clustering.
158
+ np.ndarray: A binary neighborhood matrix on Markov Clustering.
115
159
  """
116
- # Convert the graph to an adjacency matrix
117
- adjacency_matrix = nx.to_numpy_array(network)
118
- # Run Markov Clustering (MCL)
119
- result = mc.run_mcl(adjacency_matrix) # MCL with default parameters
120
- # Get clusters (communities) from MCL result
160
+ # Step 1: Convert the graph to an adjacency matrix
161
+ nodes = list(network.nodes())
162
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
163
+ # Step 2: Create a reverse mapping from index to node
164
+ index_node_map = {idx: node for node, idx in node_index_map.items()}
165
+ adjacency_matrix = nx.to_numpy_array(network, nodelist=nodes)
166
+ # Step 3: Run Markov Clustering (MCL) on the adjacency matrix
167
+ result = mc.run_mcl(adjacency_matrix)
168
+ # Step 4: Get clusters (communities) from MCL result
121
169
  clusters = mc.get_clusters(result)
122
- # Create a binary neighborhood matrix
123
- num_nodes = network.number_of_nodes()
170
+ # Step 5: Create a binary neighborhood matrix
171
+ num_nodes = len(nodes)
124
172
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
125
- # Create a mapping from node to index in the matrix
126
- node_index = {node: i for i, node in enumerate(network.nodes())}
127
- # Assign neighborhoods based on MCL clusters
173
+ # Step 6: Assign neighborhoods based on MCL clusters using the original node labels
128
174
  for cluster in clusters:
129
175
  for node_i in cluster:
130
- idx_i = node_index[node_i]
131
176
  for node_j in cluster:
132
- idx_j = node_index[node_j]
177
+ # Map the matrix indices back to the original node labels
178
+ original_node_i = index_node_map[node_i]
179
+ original_node_j = index_node_map[node_j]
180
+ idx_i = node_index_map[original_node_i]
181
+ idx_j = node_index_map[original_node_j]
133
182
  neighborhoods[idx_i, idx_j] = 1
134
183
 
135
184
  return neighborhoods
136
185
 
137
186
 
138
187
  def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
139
- """Apply Spin Glass Community Detection to the network.
188
+ """Apply Spinglass Community Detection to the network, handling disconnected components.
140
189
 
141
190
  Args:
142
- network (nx.Graph): The network graph.
191
+ network (nx.Graph): The input network graph with 'x' and 'y' attributes for node positions.
143
192
 
144
193
  Returns:
145
- np.ndarray: Binary neighborhood matrix on Spin Glass communities.
194
+ np.ndarray: A binary neighborhood matrix based on Spinglass communities.
146
195
  """
147
- # Apply Asynchronous Label Propagation (LPA)
148
- communities = asyn_lpa_communities(network)
149
- # Create a binary neighborhood matrix
150
- num_nodes = network.number_of_nodes()
196
+ # Step 1: Find connected components in the graph
197
+ components = list(nx.connected_components(network))
198
+ # Prepare to store community results
199
+ nodes = list(network.nodes())
200
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
201
+ num_nodes = len(nodes)
151
202
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
152
- # Create a mapping from node to index in the matrix
153
- node_index = {node: i for i, node in enumerate(network.nodes())}
154
- # Assign neighborhoods based on community labels from LPA
155
- for community in communities:
156
- for node_i in community:
157
- idx_i = node_index[node_i]
158
- for node_j in community:
159
- idx_j = node_index[node_j]
160
- neighborhoods[idx_i, idx_j] = 1
203
+ # Step 2: Run Spinglass on each connected component
204
+ for component in components:
205
+ # Extract the subgraph corresponding to the current component
206
+ subgraph = network.subgraph(component)
207
+ # Convert the subgraph to an iGraph object
208
+ igraph_subgraph = ig.Graph.from_networkx(subgraph)
209
+ # Ensure the subgraph is connected before running Spinglass
210
+ if not igraph_subgraph.is_connected():
211
+ print("Warning: Subgraph is not connected. Skipping...")
212
+ continue
213
+
214
+ # Apply Spinglass community detection
215
+ try:
216
+ communities = igraph_subgraph.community_spinglass()
217
+ except Exception as e:
218
+ print(f"Error running Spinglass on component: {e}")
219
+ continue
220
+
221
+ # Step 3: Assign neighborhoods based on community labels
222
+ for community in communities:
223
+ for node_i in community:
224
+ for node_j in community:
225
+ idx_i = node_index_map[igraph_subgraph.vs[node_i]["_nx_name"]]
226
+ idx_j = node_index_map[igraph_subgraph.vs[node_j]["_nx_name"]]
227
+ neighborhoods[idx_i, idx_j] = 1
161
228
 
162
229
  return neighborhoods
163
230
 
@@ -169,21 +236,24 @@ def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
169
236
  network (nx.Graph): The network graph.
170
237
 
171
238
  Returns:
172
- np.ndarray: Binary neighborhood matrix on Walktrap communities.
239
+ np.ndarray: A binary neighborhood matrix on Walktrap communities.
173
240
  """
174
- # Apply Asynchronous Label Propagation (LPA)
175
- communities = asyn_lpa_communities(network)
241
+ # Convert NetworkX graph to iGraph
242
+ igraph_network = ig.Graph.from_networkx(network)
243
+ # Apply Walktrap community detection
244
+ communities = igraph_network.community_walktrap().as_clustering()
245
+ # Get the list of nodes in the original NetworkX graph
246
+ nodes = list(network.nodes())
247
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
176
248
  # Create a binary neighborhood matrix
177
- num_nodes = network.number_of_nodes()
249
+ num_nodes = len(nodes)
178
250
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
179
- # Create a mapping from node to index in the matrix
180
- node_index = {node: i for i, node in enumerate(network.nodes())}
181
- # Assign neighborhoods based on community labels from LPA
251
+ # Assign neighborhoods based on community labels
182
252
  for community in communities:
183
253
  for node_i in community:
184
- idx_i = node_index[node_i]
185
254
  for node_j in community:
186
- idx_j = node_index[node_j]
255
+ idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
256
+ idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
187
257
  neighborhoods[idx_i, idx_j] = 1
188
258
 
189
259
  return neighborhoods
@@ -15,6 +15,7 @@ from sklearn.metrics.pairwise import cosine_similarity
15
15
  from risk.neighborhoods.community import (
16
16
  calculate_greedy_modularity_neighborhoods,
17
17
  calculate_label_propagation_neighborhoods,
18
+ calculate_leiden_neighborhoods,
18
19
  calculate_louvain_neighborhoods,
19
20
  calculate_markov_clustering_neighborhoods,
20
21
  calculate_spinglass_neighborhoods,
@@ -29,22 +30,20 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
29
30
  def get_network_neighborhoods(
30
31
  network: nx.Graph,
31
32
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
32
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 1.0,
33
- louvain_resolution: float = 1.0,
33
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 1.0,
34
+ louvain_resolution: float = 0.1,
35
+ leiden_resolution: float = 1.0,
34
36
  random_seed: int = 888,
35
37
  ) -> np.ndarray:
36
38
  """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
37
39
 
38
40
  Args:
39
41
  network (nx.Graph): The network graph.
40
- distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
41
- metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
42
- 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
43
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
44
- Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
45
- Defaults to 1.0.
46
- louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
47
- random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
42
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
43
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
44
+ louvain_resolution (float, optional): Resolution parameter for the Louvain method.
45
+ leiden_resolution (float, optional): Resolution parameter for the Leiden method.
46
+ random_seed (int, optional): Random seed for methods requiring random initialization.
48
47
 
49
48
  Returns:
50
49
  np.ndarray: Summed neighborhood matrix from all selected algorithms.
@@ -53,14 +52,13 @@ def get_network_neighborhoods(
53
52
  random.seed(random_seed)
54
53
  np.random.seed(random_seed)
55
54
 
56
- # Ensure distance_metric is a list/tuple for multi-algorithm handling
55
+ # Ensure distance_metric and edge_rank_percentile are lists
57
56
  if isinstance(distance_metric, (str, np.ndarray)):
58
57
  distance_metric = [distance_metric]
59
- # Ensure edge_length_threshold is a list/tuple for multi-threshold handling
60
- if isinstance(edge_length_threshold, (float, int)):
61
- edge_length_threshold = [edge_length_threshold] * len(distance_metric)
62
- # Check that the number of distance metrics matches the number of edge length thresholds
63
- if len(distance_metric) != len(edge_length_threshold):
58
+ if isinstance(edge_rank_percentile, (float, int)):
59
+ edge_rank_percentile = [edge_rank_percentile] * len(distance_metric)
60
+
61
+ if len(distance_metric) != len(edge_rank_percentile):
64
62
  raise ValueError(
65
63
  "The number of distance metrics must match the number of edge length thresholds."
66
64
  )
@@ -70,80 +68,114 @@ def get_network_neighborhoods(
70
68
  combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
71
69
 
72
70
  # Loop through each distance metric and corresponding edge length threshold
73
- for metric, threshold in zip(distance_metric, edge_length_threshold):
74
- # Create a subgraph based on the specific edge length threshold for this algorithm
75
- subgraph = _create_percentile_limited_subgraph(network, edge_length_percentile=threshold)
76
- # Call the appropriate neighborhood function based on the metric
77
- if metric == "louvain":
78
- neighborhoods = calculate_louvain_neighborhoods(
79
- subgraph, louvain_resolution, random_seed=random_seed
80
- )
81
- elif metric == "greedy_modularity":
71
+ for metric, threshold in zip(distance_metric, edge_rank_percentile):
72
+ # Create a subgraph based on the edge length threshold
73
+ subgraph = _create_percentile_limited_subgraph(network, edge_rank_percentile=threshold)
74
+ subgraph_nodes = list(subgraph.nodes)
75
+ # Calculate neighborhoods based on the specified metric
76
+ if metric == "greedy_modularity":
82
77
  neighborhoods = calculate_greedy_modularity_neighborhoods(subgraph)
83
78
  elif metric == "label_propagation":
84
79
  neighborhoods = calculate_label_propagation_neighborhoods(subgraph)
80
+ elif metric == "leiden":
81
+ neighborhoods = calculate_leiden_neighborhoods(
82
+ subgraph, leiden_resolution, random_seed=random_seed
83
+ )
84
+ elif metric == "louvain":
85
+ neighborhoods = calculate_louvain_neighborhoods(
86
+ subgraph, louvain_resolution, random_seed=random_seed
87
+ )
85
88
  elif metric == "markov_clustering":
86
89
  neighborhoods = calculate_markov_clustering_neighborhoods(subgraph)
87
- elif metric == "walktrap":
88
- neighborhoods = calculate_walktrap_neighborhoods(subgraph)
89
90
  elif metric == "spinglass":
90
91
  neighborhoods = calculate_spinglass_neighborhoods(subgraph)
92
+ elif metric == "walktrap":
93
+ neighborhoods = calculate_walktrap_neighborhoods(subgraph)
91
94
  else:
92
95
  raise ValueError(
93
- "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
94
- "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
96
+ "Invalid distance metric specified. Please choose from 'greedy_modularity', 'label_propagation',"
97
+ "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
95
98
  )
96
99
 
97
- # Sum the neighborhood matrices
98
- combined_neighborhoods += neighborhoods
100
+ # Expand the neighborhood matrix to match the original network's size
101
+ expanded_neighborhoods = expand_neighborhood_matrix(
102
+ neighborhoods, subgraph_nodes, num_nodes
103
+ )
104
+ # Sum the expanded neighborhood matrices
105
+ combined_neighborhoods += expanded_neighborhoods
99
106
 
100
- # Ensure that the maximum value in each row is set to 1
101
- # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
102
- # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
103
- # focusing on the most significant connection per row.
104
- combined_neighborhoods = _set_max_to_one(combined_neighborhoods)
107
+ # Convert combined_neighborhoods to binary: values > 0 are set to 1
108
+ combined_neighborhoods = (combined_neighborhoods > 0).astype(int)
105
109
 
106
110
  return combined_neighborhoods
107
111
 
108
112
 
109
- def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
110
- """Create a subgraph containing all nodes and edges where the edge length is below the
111
- specified percentile of all edge lengths in the input graph.
113
+ def expand_neighborhood_matrix(
114
+ subgraph_matrix: np.ndarray, subgraph_nodes: list, original_size: int
115
+ ) -> np.ndarray:
116
+ """Expand a subgraph neighborhood matrix back to the size of the original graph.
112
117
 
113
118
  Args:
114
- G (nx.Graph): The input graph with 'length' attributes on edges.
115
- edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
119
+ subgraph_matrix (np.ndarray): The neighborhood matrix for the subgraph.
120
+ subgraph_nodes (list): List of nodes in the subgraph, corresponding to rows/columns in subgraph_matrix.
121
+ original_size (int): The number of nodes in the original graph.
116
122
 
117
123
  Returns:
118
- nx.Graph: A subgraph with all nodes and edges where the edge length is below the
119
- calculated threshold length.
124
+ np.ndarray: The expanded matrix with the original size, with subgraph values mapped correctly.
120
125
  """
121
- # Extract edge lengths and handle missing lengths
122
- edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
123
- if not edge_lengths:
124
- raise ValueError(
125
- "No edge lengths found in the graph. Ensure edges have 'length' attributes."
126
- )
126
+ expanded_matrix = np.zeros((original_size, original_size), dtype=int)
127
+ for i, node_i in enumerate(subgraph_nodes):
128
+ for j, node_j in enumerate(subgraph_nodes):
129
+ expanded_matrix[node_i, node_j] = subgraph_matrix[i, j]
130
+ return expanded_matrix
127
131
 
128
- # Calculate the specific edge length for the given percentile
129
- percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
130
- # Create the subgraph by directly filtering edges during iteration
131
- subgraph = nx.Graph()
132
- subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
133
- # Add edges below the specified percentile length in a single pass
134
- for u, v, d in G.edges(data=True):
135
- if d.get("length", 1) <= percentile_length:
136
- subgraph.add_edge(u, v, **d)
137
132
 
138
- # Return the subgraph; optionally check if it's too sparse
133
+ def _create_percentile_limited_subgraph(G: nx.Graph, edge_rank_percentile: float) -> nx.Graph:
134
+ """Create a subgraph containing all nodes and edges where the edge length is within the
135
+ specified rank percentile of all edges in the input graph. Isolated nodes are removed.
136
+
137
+ Args:
138
+ G (nx.Graph): The input graph with 'length' attributes on edges.
139
+ edge_rank_percentile (float): The rank percentile (between 0 and 1) to filter edges.
140
+
141
+ Returns:
142
+ nx.Graph: A subgraph with nodes and edges where the edge length is within the
143
+ specified percentile, with isolated nodes removed, retaining all original attributes.
144
+ """
145
+ # Extract edges with their lengths
146
+ edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
147
+ if not edges_with_length:
148
+ raise ValueError("No edge lengths found. Ensure edges have 'length' attributes.")
149
+
150
+ # Sort edges by length in ascending order
151
+ edges_with_length.sort(key=lambda x: x[2]["length"])
152
+ # Calculate the cutoff based on the specified rank percentile
153
+ cutoff_index = int(edge_rank_percentile * len(edges_with_length))
154
+ if cutoff_index == 0:
155
+ raise ValueError("The rank percentile is too low, resulting in no edges being included.")
156
+
157
+ # Keep only the edges within the specified percentile
158
+ selected_edges = edges_with_length[:cutoff_index]
159
+ # Create a new subgraph with the selected edges, retaining all attributes
160
+ subgraph = nx.Graph()
161
+ subgraph.add_edges_from((u, v, d) for u, v, d in selected_edges)
162
+ # Copy over all node attributes from the original graph
163
+ subgraph.add_nodes_from((node, G.nodes[node]) for node in subgraph.nodes())
164
+
165
+ # Remove isolated nodes (if any)
166
+ isolated_nodes = [node for node, degree in subgraph.degree() if degree == 0]
167
+ subgraph.remove_nodes_from(isolated_nodes)
168
+ # Check if the resulting subgraph has no edges
139
169
  if subgraph.number_of_edges() == 0:
140
- raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
170
+ raise ValueError("The resulting subgraph has no edges. Adjust the rank percentile.")
141
171
 
142
172
  return subgraph
143
173
 
144
174
 
145
- def _set_max_to_one(matrix: np.ndarray) -> np.ndarray:
146
- """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0.
175
+ def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
176
+ """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
177
+ useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
178
+ maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
147
179
 
148
180
  Args:
149
181
  matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
@@ -617,8 +617,7 @@ class Labels:
617
617
  """
618
618
  # Return custom labels if domain is in ids_to_labels
619
619
  if ids_to_labels and domain in ids_to_labels:
620
- terms = ids_to_labels[domain].replace(" ", TERM_DELIMITER)
621
- return terms
620
+ return ids_to_labels[domain]
622
621
 
623
622
  else:
624
623
  terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
risk/risk.py CHANGED
@@ -52,7 +52,8 @@ class RISK(NetworkIO, AnnotationsIO):
52
52
  annotations: Dict[str, Any],
53
53
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
54
54
  louvain_resolution: float = 0.1,
55
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
55
+ leiden_resolution: float = 1.0,
56
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
56
57
  null_distribution: str = "network",
57
58
  random_seed: int = 888,
58
59
  ) -> Dict[str, Any]:
@@ -65,7 +66,8 @@ class RISK(NetworkIO, AnnotationsIO):
65
66
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
66
67
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
67
68
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
68
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
69
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
70
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
69
71
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
70
72
  Defaults to 0.5.
71
73
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
@@ -79,7 +81,8 @@ class RISK(NetworkIO, AnnotationsIO):
79
81
  params.log_neighborhoods(
80
82
  distance_metric=distance_metric,
81
83
  louvain_resolution=louvain_resolution,
82
- edge_length_threshold=edge_length_threshold,
84
+ leiden_resolution=leiden_resolution,
85
+ edge_rank_percentile=edge_rank_percentile,
83
86
  statistical_test_function="hypergeom",
84
87
  null_distribution=null_distribution,
85
88
  random_seed=random_seed,
@@ -93,7 +96,8 @@ class RISK(NetworkIO, AnnotationsIO):
93
96
  network,
94
97
  distance_metric,
95
98
  louvain_resolution=louvain_resolution,
96
- edge_length_threshold=edge_length_threshold,
99
+ leiden_resolution=leiden_resolution,
100
+ edge_rank_percentile=edge_rank_percentile,
97
101
  random_seed=random_seed,
98
102
  )
99
103
  # Run hypergeometric test to compute neighborhood significance
@@ -112,7 +116,8 @@ class RISK(NetworkIO, AnnotationsIO):
112
116
  annotations: Dict[str, Any],
113
117
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
114
118
  louvain_resolution: float = 0.1,
115
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
119
+ leiden_resolution: float = 1.0,
120
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
116
121
  null_distribution: str = "network",
117
122
  random_seed: int = 888,
118
123
  ) -> Dict[str, Any]:
@@ -125,7 +130,8 @@ class RISK(NetworkIO, AnnotationsIO):
125
130
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
126
131
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
127
132
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
128
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
133
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
134
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
129
135
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
130
136
  Defaults to 0.5.
131
137
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
@@ -139,7 +145,8 @@ class RISK(NetworkIO, AnnotationsIO):
139
145
  params.log_neighborhoods(
140
146
  distance_metric=distance_metric,
141
147
  louvain_resolution=louvain_resolution,
142
- edge_length_threshold=edge_length_threshold,
148
+ leiden_resolution=leiden_resolution,
149
+ edge_rank_percentile=edge_rank_percentile,
143
150
  statistical_test_function="poisson",
144
151
  null_distribution=null_distribution,
145
152
  random_seed=random_seed,
@@ -153,7 +160,8 @@ class RISK(NetworkIO, AnnotationsIO):
153
160
  network,
154
161
  distance_metric,
155
162
  louvain_resolution=louvain_resolution,
156
- edge_length_threshold=edge_length_threshold,
163
+ leiden_resolution=leiden_resolution,
164
+ edge_rank_percentile=edge_rank_percentile,
157
165
  random_seed=random_seed,
158
166
  )
159
167
  # Run Poisson test to compute neighborhood significance
@@ -172,7 +180,8 @@ class RISK(NetworkIO, AnnotationsIO):
172
180
  annotations: Dict[str, Any],
173
181
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
174
182
  louvain_resolution: float = 0.1,
175
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
183
+ leiden_resolution: float = 1.0,
184
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
176
185
  score_metric: str = "sum",
177
186
  null_distribution: str = "network",
178
187
  num_permutations: int = 1000,
@@ -188,7 +197,8 @@ class RISK(NetworkIO, AnnotationsIO):
188
197
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
189
198
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
190
199
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
191
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
200
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
201
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
192
202
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
193
203
  Defaults to 0.5.
194
204
  score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
@@ -205,7 +215,8 @@ class RISK(NetworkIO, AnnotationsIO):
205
215
  params.log_neighborhoods(
206
216
  distance_metric=distance_metric,
207
217
  louvain_resolution=louvain_resolution,
208
- edge_length_threshold=edge_length_threshold,
218
+ leiden_resolution=leiden_resolution,
219
+ edge_rank_percentile=edge_rank_percentile,
209
220
  statistical_test_function="permutation",
210
221
  score_metric=score_metric,
211
222
  null_distribution=null_distribution,
@@ -222,7 +233,8 @@ class RISK(NetworkIO, AnnotationsIO):
222
233
  network,
223
234
  distance_metric,
224
235
  louvain_resolution=louvain_resolution,
225
- edge_length_threshold=edge_length_threshold,
236
+ leiden_resolution=leiden_resolution,
237
+ edge_rank_percentile=edge_rank_percentile,
226
238
  random_seed=random_seed,
227
239
  )
228
240
 
@@ -408,7 +420,8 @@ class RISK(NetworkIO, AnnotationsIO):
408
420
  network: nx.Graph,
409
421
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
410
422
  louvain_resolution: float = 0.1,
411
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
423
+ leiden_resolution: float = 1.0,
424
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
412
425
  random_seed: int = 888,
413
426
  ) -> np.ndarray:
414
427
  """Load significant neighborhoods for the network.
@@ -420,7 +433,8 @@ class RISK(NetworkIO, AnnotationsIO):
420
433
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
421
434
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
422
435
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
423
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
436
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
437
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
424
438
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
425
439
  Defaults to 0.5.
426
440
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
@@ -431,19 +445,22 @@ class RISK(NetworkIO, AnnotationsIO):
431
445
  # Display the chosen distance metric
432
446
  if distance_metric == "louvain":
433
447
  for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
448
+ elif distance_metric == "leiden":
449
+ for_print_distance_metric = f"leiden (resolution={leiden_resolution})"
434
450
  else:
435
451
  for_print_distance_metric = distance_metric
436
452
  # Log and display neighborhood settings
437
453
  logger.debug(f"Distance metric: '{for_print_distance_metric}'")
438
- logger.debug(f"Edge length threshold: {edge_length_threshold}")
454
+ logger.debug(f"Edge length threshold: {edge_rank_percentile}")
439
455
  logger.debug(f"Random seed: {random_seed}")
440
456
 
441
457
  # Compute neighborhoods based on the network and distance metric
442
458
  neighborhoods = get_network_neighborhoods(
443
459
  network,
444
460
  distance_metric,
445
- edge_length_threshold,
461
+ edge_rank_percentile,
446
462
  louvain_resolution=louvain_resolution,
463
+ leiden_resolution=leiden_resolution,
447
464
  random_seed=random_seed,
448
465
  )
449
466
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.9b9
3
+ Version: 0.0.9b11
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -695,6 +695,7 @@ Requires-Python: >=3.8
695
695
  Description-Content-Type: text/markdown
696
696
  License-File: LICENSE
697
697
  Requires-Dist: ipywidgets
698
+ Requires-Dist: leidenalg
698
699
  Requires-Dist: markov-clustering
699
700
  Requires-Dist: matplotlib
700
701
  Requires-Dist: networkx
@@ -702,6 +703,7 @@ Requires-Dist: nltk==3.8.1
702
703
  Requires-Dist: numpy
703
704
  Requires-Dist: openpyxl
704
705
  Requires-Dist: pandas
706
+ Requires-Dist: python-igraph
705
707
  Requires-Dist: python-louvain
706
708
  Requires-Dist: scikit-learn
707
709
  Requires-Dist: scipy
@@ -1,6 +1,6 @@
1
- risk/__init__.py,sha256=wOa6WyqiT30Dx0Mk77M-4ilHgFK5rf0eRjG7ceeU8Gw,112
1
+ risk/__init__.py,sha256=lx2px2aUNVzyhWttIeSRddS0TyVt5tkNaoDp26k1OmE,113
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
- risk/risk.py,sha256=kntBxYwAEpoAjXN_l6BM3yxFKyuAKmd8OMGl2P00pZ4,22416
3
+ risk/risk.py,sha256=MXu8T93NUgMDl3NaZDbm0j9c4KWwzx-kmp9Rd1ax0N4,23534
4
4
  risk/annotations/__init__.py,sha256=kXgadEXaCh0z8OyhOhTj7c3qXGmWgOhaSZ4gSzSb59U,147
5
5
  risk/annotations/annotations.py,sha256=WVT9wzTm8lTpMw_3SnbyljWR77yExo0rb1zVgJza8nw,14284
6
6
  risk/annotations/io.py,sha256=tk1dAsxIwW5oLxB294ppiuZd4__Y5pj8se8KhitRSNA,10554
@@ -8,9 +8,9 @@ risk/log/__init__.py,sha256=gy7C5L6D222AYUChq5lkc0LsCJ_QMQPaFiBJKbecdac,201
8
8
  risk/log/console.py,sha256=C52s3FgQ2e9kQWcXL8m7rs_pnKXt5Yy8PBHmQkOTiNo,4537
9
9
  risk/log/parameters.py,sha256=o4StqYCa0kt7_Ht4mKa1DwwvhGUwkC_dGBaiUIc0GB0,5683
10
10
  risk/neighborhoods/__init__.py,sha256=C-SD0G-9skSLjLFdAB6v6lAjO8la2v6Fqy63h2MY28k,186
11
- risk/neighborhoods/community.py,sha256=MAgIblbuisEPwVU6mFZd4Yd9NUKlaHK99suw51r1Is0,7065
11
+ risk/neighborhoods/community.py,sha256=z8oZGBUZLriYBbT4-0uKDn8iM8SmN5dubrHb7zxcn4w,10431
12
12
  risk/neighborhoods/domains.py,sha256=t91xSpx9Ty9hSlhRq2_XwyPpBP7sjKhovcPPvkwWtf0,11398
13
- risk/neighborhoods/neighborhoods.py,sha256=0TAP-xi4hgtnrK0cKQPHQHq9IVGHOMF1wYEcx6tsxRA,22241
13
+ risk/neighborhoods/neighborhoods.py,sha256=D-t1v6EcmKn6B8uZK5X5tU9uOV0OfkKzvETGfZWRnic,23561
14
14
  risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
15
15
  risk/network/geometry.py,sha256=gFtYUj9j9aul4paKq_qSGJn39Nazxu_MXv8m-tYYtrk,6840
16
16
  risk/network/io.py,sha256=AWSbZGLZHtl72KSlafQlcYoG00YLSznG7UYDi_wDT7M,22958
@@ -20,7 +20,7 @@ risk/network/graph/summary.py,sha256=h2bpUjfwI1NMflkKwplGQEGPswfAtunormdTIEQYbvs
20
20
  risk/network/plot/__init__.py,sha256=MfmaXJgAZJgXZ2wrhK8pXwzETlcMaLChhWXKAozniAo,98
21
21
  risk/network/plot/canvas.py,sha256=W8dFv4XYTzCWXBchgsc0esOQRn4usM4LkwNGPSDMobE,13357
22
22
  risk/network/plot/contour.py,sha256=VONX9l6owrZvWtR0mWQ6z2GSd1YXIv5wV_sf5ROQLT4,15581
23
- risk/network/plot/labels.py,sha256=eorP80CmAbHmt7de2qHna1tHGKL8YiHknwFW2R3tvjI,45734
23
+ risk/network/plot/labels.py,sha256=aU_ClDGVPHyQ3H5E_ygx8hsMhrpJB0i9Cn65PlLmw7s,45679
24
24
  risk/network/plot/network.py,sha256=_K8Am2y6zSGrm3fAgMbXxzgspbugJi3uK4_tG8qqGoI,14015
25
25
  risk/network/plot/plotter.py,sha256=eS1vHqvOA2O001Rq7WiDcgqcehJ3fg4OPfvkezH4erw,5771
26
26
  risk/network/plot/utils/colors.py,sha256=9zuU2O-Nkpljb1yVGUR_IjqD1y-wH6Bf6Vm1MMVB0Lo,18718
@@ -32,8 +32,8 @@ risk/stats/stats.py,sha256=z8NrhiVj4BzJ250bVLfytpmfC7RzYu7mBuIZD_l0aCA,7222
32
32
  risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
33
33
  risk/stats/permutation/permutation.py,sha256=meBNSrbRa9P8WJ54n485l0H7VQJlMSfHqdN4aCKYCtQ,10105
34
34
  risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
35
- risk_network-0.0.9b9.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
36
- risk_network-0.0.9b9.dist-info/METADATA,sha256=iBZOFuXgFhOXgRkBH90TC3Fkk7r1iAX6Yq0bPuGjxck,47497
37
- risk_network-0.0.9b9.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
38
- risk_network-0.0.9b9.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
39
- risk_network-0.0.9b9.dist-info/RECORD,,
35
+ risk_network-0.0.9b11.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
36
+ risk_network-0.0.9b11.dist-info/METADATA,sha256=38pUWpqTrjTbc5LQ1_JnRqOpXwgSS-J1wVHHBMTTQ6M,47552
37
+ risk_network-0.0.9b11.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
38
+ risk_network-0.0.9b11.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
39
+ risk_network-0.0.9b11.dist-info/RECORD,,