risk-network 0.0.6b10__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {risk_network-0.0.6b10 → risk_network-0.0.7}/PKG-INFO +1 -1
  2. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/__init__.py +1 -1
  3. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/annotations/annotations.py +61 -42
  4. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/annotations/io.py +14 -14
  5. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/log/__init__.py +1 -1
  6. risk_network-0.0.7/risk/log/config.py +139 -0
  7. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/log/params.py +4 -4
  8. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/neighborhoods/community.py +25 -36
  9. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/neighborhoods/domains.py +29 -27
  10. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/neighborhoods/neighborhoods.py +171 -72
  11. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/network/graph.py +92 -41
  12. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/network/io.py +22 -26
  13. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/network/plot.py +132 -19
  14. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/risk.py +81 -78
  15. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/stats/__init__.py +2 -2
  16. risk_network-0.0.7/risk/stats/hypergeom.py +54 -0
  17. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/stats/permutation/permutation.py +23 -17
  18. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/stats/permutation/test_functions.py +2 -2
  19. risk_network-0.0.7/risk/stats/poisson.py +44 -0
  20. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk_network.egg-info/PKG-INFO +1 -1
  21. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk_network.egg-info/SOURCES.txt +2 -2
  22. risk_network-0.0.6b10/risk/log/console.py +0 -16
  23. risk_network-0.0.6b10/risk/stats/fisher_exact.py +0 -132
  24. risk_network-0.0.6b10/risk/stats/hypergeom.py +0 -131
  25. {risk_network-0.0.6b10 → risk_network-0.0.7}/LICENSE +0 -0
  26. {risk_network-0.0.6b10 → risk_network-0.0.7}/MANIFEST.in +0 -0
  27. {risk_network-0.0.6b10 → risk_network-0.0.7}/README.md +0 -0
  28. {risk_network-0.0.6b10 → risk_network-0.0.7}/pyproject.toml +0 -0
  29. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/annotations/__init__.py +0 -0
  30. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/constants.py +0 -0
  31. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/neighborhoods/__init__.py +0 -0
  32. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/network/__init__.py +0 -0
  33. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/network/geometry.py +0 -0
  34. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/stats/permutation/__init__.py +0 -0
  35. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk/stats/stats.py +0 -0
  36. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk_network.egg-info/dependency_links.txt +0 -0
  37. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk_network.egg-info/requires.txt +0 -0
  38. {risk_network-0.0.6b10 → risk_network-0.0.7}/risk_network.egg-info/top_level.txt +0 -0
  39. {risk_network-0.0.6b10 → risk_network-0.0.7}/setup.cfg +0 -0
  40. {risk_network-0.0.6b10 → risk_network-0.0.7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.6b10
3
+ Version: 0.0.7
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.6-beta.10"
10
+ __version__ = "0.0.7"
@@ -4,7 +4,7 @@ risk/annotations/annotations
4
4
  """
5
5
 
6
6
  from collections import Counter
7
- from itertools import compress, permutations
7
+ from itertools import compress
8
8
  from typing import Any, Dict, List, Set
9
9
 
10
10
  import networkx as nx
@@ -39,7 +39,7 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
39
39
  annotations_input (dict): A dictionary with annotations.
40
40
 
41
41
  Returns:
42
- dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
42
+ dict: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
43
43
  """
44
44
  # Flatten the dictionary to a list of tuples for easier DataFrame creation
45
45
  flattened_annotations = [
@@ -66,7 +66,8 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
66
66
  # Extract ordered nodes and annotations
67
67
  ordered_nodes = tuple(annotations_pivot.index)
68
68
  ordered_annotations = tuple(annotations_pivot.columns)
69
- annotations_pivot_numpy = annotations_pivot.fillna(0).to_numpy()
69
+ # Convert the annotations_pivot matrix to a numpy array and ensure it's binary
70
+ annotations_pivot_numpy = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
70
71
 
71
72
  return {
72
73
  "ordered_nodes": ordered_nodes,
@@ -132,34 +133,39 @@ def define_top_annotations(
132
133
  nx.connected_components(enriched_network), key=len, reverse=True
133
134
  )
134
135
  size_connected_components = np.array([len(c) for c in connected_components])
136
+
137
+ # Filter the size of connected components by min_cluster_size and max_cluster_size
138
+ filtered_size_connected_components = size_connected_components[
139
+ (size_connected_components >= min_cluster_size)
140
+ & (size_connected_components <= max_cluster_size)
141
+ ]
142
+ # Calculate the number of connected components and large connected components
135
143
  num_connected_components = len(connected_components)
136
- num_large_connected_components = np.sum(
137
- np.logical_and(
138
- size_connected_components >= min_cluster_size,
139
- size_connected_components <= max_cluster_size,
140
- )
141
- )
144
+ num_large_connected_components = len(filtered_size_connected_components)
145
+
146
+ # Assign the number of connected components
142
147
  annotations_enrichment_matrix.loc[attribute, "num connected components"] = (
143
148
  num_connected_components
144
149
  )
145
- annotations_enrichment_matrix.at[attribute, "size connected components"] = (
146
- size_connected_components
147
- )
150
+ # Filter out attributes with more than one connected component
151
+ annotations_enrichment_matrix.loc[
152
+ annotations_enrichment_matrix["num connected components"] > 1, "top attributes"
153
+ ] = False
154
+ # Assign the number of large connected components
148
155
  annotations_enrichment_matrix.loc[attribute, "num large connected components"] = (
149
156
  num_large_connected_components
150
157
  )
151
-
152
- # Filter out attributes with more than one connected component
153
- annotations_enrichment_matrix.loc[
154
- annotations_enrichment_matrix["num connected components"] > 1, "top attributes"
155
- ] = False
158
+ # Assign the size of connected components, ensuring it is always a list
159
+ annotations_enrichment_matrix.at[attribute, "size connected components"] = (
160
+ filtered_size_connected_components.tolist()
161
+ )
156
162
 
157
163
  return annotations_enrichment_matrix
158
164
 
159
165
 
160
166
  def get_description(words_column: pd.Series) -> str:
161
- """Process input Series to identify and return the top N frequent, significant words,
162
- filtering based on stopwords and similarity (Jaccard index).
167
+ """Process input Series to identify and return the top frequent, significant words,
168
+ filtering based on stopwords and gracefully handling numerical strings.
163
169
 
164
170
  Args:
165
171
  words_column (pd.Series): A pandas Series containing strings to process.
@@ -167,19 +173,29 @@ def get_description(words_column: pd.Series) -> str:
167
173
  Returns:
168
174
  str: A coherent description formed from the most frequent and significant words.
169
175
  """
170
- # Define stopwords
171
- stop_words = set(stopwords.words("english"))
172
- # Tokenize the concatenated string and filter out stopwords and non-alphabetic words
176
+ # Concatenate all rows into a single string and tokenize into words
177
+ all_words = words_column.str.cat(sep=" ")
178
+ tokens = word_tokenize(all_words)
179
+
180
+ # Separate numeric tokens
181
+ numeric_tokens = [token for token in tokens if token.replace(".", "", 1).isdigit()]
182
+ # If there's only one unique numeric value, return it directly as a string
183
+ unique_numeric_values = set(numeric_tokens)
184
+ if len(unique_numeric_values) == 1:
185
+ return f"{list(unique_numeric_values)[0]}"
186
+
187
+ # Ensure that all values in 'words' are strings and include both alphabetic and numeric tokens
173
188
  words = [
174
- (
189
+ str(
175
190
  word.lower() if word.istitle() else word
176
- ) # Lowercase all words except proper nouns (e.g., RNA, mRNA)
177
- for word in word_tokenize(words_column.str.cat(sep=" "))
178
- if word.isalpha() and word.lower() not in stop_words
191
+ ) # Convert to string and lowercase all words except proper nouns (e.g., RNA, mRNA)
192
+ for word in tokens
193
+ if word.isalpha()
194
+ or word.replace(".", "", 1).isdigit() # Keep alphabetic words and numeric strings
179
195
  ]
180
- # Simplify the word list to remove similar words based on the Jaccard index and generate coherent description
181
- simplified_words = _simplify_word_list(words, threshold=0.90)
182
- description = _generate_coherent_description(simplified_words)
196
+ # Generate a coherent description from the processed words
197
+ description = _generate_coherent_description(words)
198
+
183
199
  return description
184
200
 
185
201
 
@@ -237,25 +253,28 @@ def _calculate_jaccard_index(set1: Set[Any], set2: Set[Any]) -> float:
237
253
 
238
254
 
239
255
  def _generate_coherent_description(words: List[str]) -> str:
240
- """Generate a coherent description from a list of words.
256
+ """Generate a coherent description from a list of words or numerical string values.
257
+ If there is only one unique entry, return it directly.
241
258
 
242
259
  Args:
243
- words (list of str): A list of words from which to generate the description.
260
+ words (list): A list of words or numerical string values.
244
261
 
245
262
  Returns:
246
263
  str: A coherent description formed by arranging the words in a logical sequence.
247
264
  """
248
- # Count the frequency of each word
265
+ # If there are no words, return a keyword indicating no data is available
266
+ if not words:
267
+ return "N/A"
268
+
269
+ # If there's only one unique word, return it directly
270
+ unique_words = set(words)
271
+ if len(unique_words) == 1:
272
+ return list(unique_words)[0]
273
+
274
+ # Count the frequency of each word and sort them by frequency
249
275
  word_counts = Counter(words)
250
- # Get the most common words
251
276
  most_common_words = [word for word, _ in word_counts.most_common()]
252
- # Filter out common stopwords
253
- stop_words = set(stopwords.words("english"))
254
- filtered_words = [word for word in most_common_words if word.lower() not in stop_words]
255
- # Generate permutations of the filtered words to find a logical order
256
- perm = permutations(filtered_words)
257
- # Assume the first permutation as the logical sequence (since they're all equally likely without additional context)
258
- logical_sequence = next(perm)
259
- # Join the words to form a coherent description
260
- description = " ".join(logical_sequence)
277
+ # Join the most common words to form a coherent description based on frequency
278
+ description = " ".join(most_common_words)
279
+
261
280
  return description
@@ -12,7 +12,7 @@ import networkx as nx
12
12
  import pandas as pd
13
13
 
14
14
  from risk.annotations.annotations import load_annotations
15
- from risk.log import params, print_header
15
+ from risk.log import params, logger, log_header
16
16
 
17
17
 
18
18
  class AnnotationsIO:
@@ -25,12 +25,12 @@ class AnnotationsIO:
25
25
  def __init__(self):
26
26
  pass
27
27
 
28
- def load_json_annotation(self, filepath: str, network: nx.Graph) -> Dict[str, Any]:
28
+ def load_json_annotation(self, network: nx.Graph, filepath: str) -> Dict[str, Any]:
29
29
  """Load annotations from a JSON file and convert them to a DataFrame.
30
30
 
31
31
  Args:
32
- filepath (str): Path to the JSON annotations file.
33
32
  network (NetworkX graph): The network to which the annotations are related.
33
+ filepath (str): Path to the JSON annotations file.
34
34
 
35
35
  Returns:
36
36
  dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
@@ -49,8 +49,8 @@ class AnnotationsIO:
49
49
 
50
50
  def load_excel_annotation(
51
51
  self,
52
- filepath: str,
53
52
  network: nx.Graph,
53
+ filepath: str,
54
54
  label_colname: str = "label",
55
55
  nodes_colname: str = "nodes",
56
56
  sheet_name: str = "Sheet1",
@@ -59,8 +59,8 @@ class AnnotationsIO:
59
59
  """Load annotations from an Excel file and associate them with the network.
60
60
 
61
61
  Args:
62
- filepath (str): Path to the Excel annotations file.
63
62
  network (nx.Graph): The NetworkX graph to which the annotations are related.
63
+ filepath (str): Path to the Excel annotations file.
64
64
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
65
65
  nodes_colname (str): Name of the column containing the nodes associated with each label.
66
66
  sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
@@ -87,8 +87,8 @@ class AnnotationsIO:
87
87
 
88
88
  def load_csv_annotation(
89
89
  self,
90
- filepath: str,
91
90
  network: nx.Graph,
91
+ filepath: str,
92
92
  label_colname: str = "label",
93
93
  nodes_colname: str = "nodes",
94
94
  nodes_delimiter: str = ";",
@@ -96,8 +96,8 @@ class AnnotationsIO:
96
96
  """Load annotations from a CSV file and associate them with the network.
97
97
 
98
98
  Args:
99
- filepath (str): Path to the CSV annotations file.
100
99
  network (nx.Graph): The NetworkX graph to which the annotations are related.
100
+ filepath (str): Path to the CSV annotations file.
101
101
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
102
102
  nodes_colname (str): Name of the column containing the nodes associated with each label.
103
103
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
@@ -121,8 +121,8 @@ class AnnotationsIO:
121
121
 
122
122
  def load_tsv_annotation(
123
123
  self,
124
- filepath: str,
125
124
  network: nx.Graph,
125
+ filepath: str,
126
126
  label_colname: str = "label",
127
127
  nodes_colname: str = "nodes",
128
128
  nodes_delimiter: str = ";",
@@ -130,8 +130,8 @@ class AnnotationsIO:
130
130
  """Load annotations from a TSV file and associate them with the network.
131
131
 
132
132
  Args:
133
- filepath (str): Path to the TSV annotations file.
134
133
  network (nx.Graph): The NetworkX graph to which the annotations are related.
134
+ filepath (str): Path to the TSV annotations file.
135
135
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
136
136
  nodes_colname (str): Name of the column containing the nodes associated with each label.
137
137
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
@@ -153,12 +153,12 @@ class AnnotationsIO:
153
153
  # Load the annotations into the provided network
154
154
  return load_annotations(network, annotations_input)
155
155
 
156
- def load_dict_annotation(self, content: Dict[str, Any], network: nx.Graph) -> Dict[str, Any]:
156
+ def load_dict_annotation(self, network: nx.Graph, content: Dict[str, Any]) -> Dict[str, Any]:
157
157
  """Load annotations from a provided dictionary and convert them to a dictionary annotation.
158
158
 
159
159
  Args:
160
- content (dict): The annotations dictionary to load.
161
160
  network (NetworkX graph): The network to which the annotations are related.
161
+ content (dict): The annotations dictionary to load.
162
162
 
163
163
  Returns:
164
164
  dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
@@ -218,7 +218,7 @@ def _log_loading(filetype: str, filepath: str = "") -> None:
218
218
  filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
219
219
  filepath (str, optional): The path to the file being loaded.
220
220
  """
221
- print_header("Loading annotations")
222
- print(f"Filetype: {filetype}")
221
+ log_header("Loading annotations")
222
+ logger.debug(f"Filetype: {filetype}")
223
223
  if filepath:
224
- print(f"Filepath: {filepath}")
224
+ logger.debug(f"Filepath: {filepath}")
@@ -3,7 +3,7 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from .console import print_header
6
+ from .config import logger, log_header, set_global_verbosity
7
7
  from .params import Params
8
8
 
9
9
  params = Params()
@@ -0,0 +1,139 @@
1
+ """
2
+ risk/log/config
3
+ ~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import logging
7
+
8
+
9
+ def in_jupyter():
10
+ """Check if the code is running in a Jupyter notebook environment.
11
+
12
+ Returns:
13
+ bool: True if running in a Jupyter notebook or QtConsole, False otherwise.
14
+ """
15
+ try:
16
+ shell = get_ipython().__class__.__name__
17
+ if shell == "ZMQInteractiveShell": # Jupyter Notebook or QtConsole
18
+ return True
19
+ elif shell == "TerminalInteractiveShell": # Terminal running IPython
20
+ return False
21
+ except NameError:
22
+ return False # Not in Jupyter
23
+
24
+
25
+ # Define the MockLogger class to replicate logging behavior with print statements in Jupyter
26
+ class MockLogger:
27
+ """MockLogger: A lightweight logger replacement using print statements in Jupyter.
28
+
29
+ The MockLogger class replicates the behavior of a standard logger using print statements
30
+ to display messages. This is primarily used in a Jupyter environment to show outputs
31
+ directly in the notebook. The class supports logging levels such as `info`, `debug`,
32
+ `warning`, and `error`, while the `verbose` attribute controls whether to display non-error messages.
33
+ """
34
+
35
+ def __init__(self, verbose: bool = True):
36
+ """Initialize the MockLogger with verbosity settings.
37
+
38
+ Args:
39
+ verbose (bool): If True, display all log messages (info, debug, warning).
40
+ If False, only display error messages. Defaults to True.
41
+ """
42
+ self.verbose = verbose
43
+
44
+ def info(self, message: str) -> None:
45
+ """Display an informational message.
46
+
47
+ Args:
48
+ message (str): The informational message to be printed.
49
+ """
50
+ if self.verbose:
51
+ print(message)
52
+
53
+ def debug(self, message: str) -> None:
54
+ """Display a debug message.
55
+
56
+ Args:
57
+ message (str): The debug message to be printed.
58
+ """
59
+ if self.verbose:
60
+ print(message)
61
+
62
+ def warning(self, message: str) -> None:
63
+ """Display a warning message.
64
+
65
+ Args:
66
+ message (str): The warning message to be printed.
67
+ """
68
+ print(message)
69
+
70
+ def error(self, message: str) -> None:
71
+ """Display an error message.
72
+
73
+ Args:
74
+ message (str): The error message to be printed.
75
+ """
76
+ print(message)
77
+
78
+ def setLevel(self, level: int) -> None:
79
+ """Adjust verbosity based on the logging level.
80
+
81
+ Args:
82
+ level (int): Logging level to control message display.
83
+ - logging.DEBUG sets verbose to True (show all messages).
84
+ - logging.WARNING sets verbose to False (show only warning, error, and critical messages).
85
+ """
86
+ if level == logging.DEBUG:
87
+ self.verbose = True # Show all messages
88
+ elif level == logging.WARNING:
89
+ self.verbose = False # Suppress all except warning, error, and critical messages
90
+
91
+
92
+ # Set up logger based on environment
93
+ if not in_jupyter():
94
+ # Set up logger normally for .py files or terminal environments
95
+ logger = logging.getLogger("risk_logger")
96
+ logger.setLevel(logging.DEBUG)
97
+ console_handler = logging.StreamHandler()
98
+ console_handler.setLevel(logging.DEBUG)
99
+ console_handler.setFormatter(logging.Formatter("%(message)s"))
100
+
101
+ if not logger.hasHandlers():
102
+ logger.addHandler(console_handler)
103
+ else:
104
+ # If in Jupyter, use the MockLogger
105
+ logger = MockLogger()
106
+
107
+
108
+ def set_global_verbosity(verbose):
109
+ """Set the global verbosity level for the logger.
110
+
111
+ Args:
112
+ verbose (bool): Whether to display all log messages (True) or only error messages (False).
113
+
114
+ Returns:
115
+ None
116
+ """
117
+ if not isinstance(logger, MockLogger):
118
+ # For the regular logger, adjust logging levels
119
+ if verbose:
120
+ logger.setLevel(logging.DEBUG) # Show all messages
121
+ console_handler.setLevel(logging.DEBUG)
122
+ else:
123
+ logger.setLevel(logging.WARNING) # Show only warning, error, and critical messages
124
+ console_handler.setLevel(logging.WARNING)
125
+ else:
126
+ # For the MockLogger, set verbosity directly
127
+ logger.setLevel(logging.DEBUG if verbose else logging.WARNING)
128
+
129
+
130
+ def log_header(input_string: str) -> None:
131
+ """Log the input string as a header with a line of dashes above and below it.
132
+
133
+ Args:
134
+ input_string (str): The string to be printed as a header.
135
+ """
136
+ border = "-" * len(input_string)
137
+ logger.info(border)
138
+ logger.info(input_string)
139
+ logger.info(border)
@@ -12,7 +12,7 @@ from typing import Any, Dict
12
12
 
13
13
  import numpy as np
14
14
 
15
- from .console import print_header
15
+ from .config import logger, log_header
16
16
 
17
17
  # Suppress all warnings - this is to resolve warnings from multiprocessing
18
18
  warnings.filterwarnings("ignore")
@@ -35,11 +35,11 @@ def _safe_param_export(func):
35
35
  filepath = (
36
36
  kwargs.get("filepath") or args[1]
37
37
  ) # Assuming filepath is always the second argument
38
- print(f"Parameters successfully exported to filepath: {filepath}")
38
+ logger.info(f"Parameters successfully exported to filepath: {filepath}")
39
39
  return result
40
40
  except Exception as e:
41
41
  filepath = kwargs.get("filepath") or args[1]
42
- print(f"An error occurred while exporting parameters to {filepath}: {e}")
42
+ logger.error(f"An error occurred while exporting parameters to {filepath}: {e}")
43
43
  return None
44
44
 
45
45
  return wrapper
@@ -161,7 +161,7 @@ class Params:
161
161
  Returns:
162
162
  dict: A dictionary containing the processed parameters.
163
163
  """
164
- print_header("Loading parameters")
164
+ log_header("Loading parameters")
165
165
  return _convert_ndarray_to_list(
166
166
  {
167
167
  "annotations": self.annotations,
@@ -7,32 +7,29 @@ import community as community_louvain
7
7
  import networkx as nx
8
8
  import numpy as np
9
9
  import markov_clustering as mc
10
- from networkx.algorithms.community import asyn_lpa_communities
10
+ from networkx.algorithms.community import asyn_lpa_communities, greedy_modularity_communities
11
11
 
12
12
 
13
- def calculate_dijkstra_neighborhoods(network: nx.Graph) -> np.ndarray:
14
- """Calculate neighborhoods using Dijkstra's shortest path distances.
13
+ def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
14
+ """Calculate neighborhoods using the Greedy Modularity method.
15
15
 
16
16
  Args:
17
- network (nx.Graph): The network graph.
17
+ network (nx.Graph): The network graph to analyze for community structure.
18
18
 
19
19
  Returns:
20
- np.ndarray: Neighborhood matrix based on Dijkstra's distances.
20
+ np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
21
21
  """
22
- # Compute Dijkstra's distance for all pairs of nodes in the network
23
- all_dijkstra_paths = dict(nx.all_pairs_dijkstra_path_length(network, weight="length"))
22
+ # Detect communities using the Greedy Modularity method
23
+ communities = greedy_modularity_communities(network)
24
+ # Create a mapping from node to community
25
+ community_dict = {node: idx for idx, community in enumerate(communities) for node in community}
26
+ # Create a binary neighborhood matrix
24
27
  neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
25
-
26
- # Populate the neighborhoods matrix based on Dijkstra's distances
27
- for source, targets in all_dijkstra_paths.items():
28
- max_length = max(targets.values()) if targets else 1 # Handle cases with no targets
29
- for target, length in targets.items():
30
- if np.isnan(length):
31
- neighborhoods[source, target] = max_length # Use max distance for NaN
32
- elif length == 0:
33
- neighborhoods[source, target] = 1 # Assign 1 for zero-length paths (self-loops)
34
- else:
35
- neighborhoods[source, target] = 1 / length # Inverse of the distance
28
+ node_index = {node: i for i, node in enumerate(network.nodes())}
29
+ for node_i, community_i in community_dict.items():
30
+ for node_j, community_j in community_dict.items():
31
+ if community_i == community_j:
32
+ neighborhoods[node_index[node_i], node_index[node_j]] = 1
36
33
 
37
34
  return neighborhoods
38
35
 
@@ -44,21 +41,19 @@ def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
44
41
  network (nx.Graph): The network graph.
45
42
 
46
43
  Returns:
47
- np.ndarray: Neighborhood matrix based on Label Propagation.
44
+ np.ndarray: Binary neighborhood matrix on Label Propagation.
48
45
  """
49
46
  # Apply Label Propagation
50
47
  communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
51
-
52
48
  # Create a mapping from node to community
53
49
  community_dict = {}
54
50
  for community_id, community in enumerate(communities):
55
51
  for node in community:
56
52
  community_dict[node] = community_id
57
53
 
58
- # Create a neighborhood matrix
54
+ # Create a binary neighborhood matrix
59
55
  num_nodes = network.number_of_nodes()
60
56
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
61
-
62
57
  # Assign neighborhoods based on community labels
63
58
  for node_i, community_i in community_dict.items():
64
59
  for node_j, community_j in community_dict.items():
@@ -79,14 +74,14 @@ def calculate_louvain_neighborhoods(
79
74
  random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
80
75
 
81
76
  Returns:
82
- np.ndarray: Neighborhood matrix based on the Louvain method.
77
+ np.ndarray: Binary neighborhood matrix on the Louvain method.
83
78
  """
84
79
  # Apply Louvain method to partition the network
85
80
  partition = community_louvain.best_partition(
86
81
  network, resolution=resolution, random_state=random_seed
87
82
  )
83
+ # Create a binary neighborhood matrix
88
84
  neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
89
-
90
85
  # Assign neighborhoods based on community partitions
91
86
  for node_i, community_i in partition.items():
92
87
  for node_j, community_j in partition.items():
@@ -103,7 +98,7 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
103
98
  network (nx.Graph): The network graph.
104
99
 
105
100
  Returns:
106
- np.ndarray: Neighborhood matrix based on Markov Clustering.
101
+ np.ndarray: Binary neighborhood matrix on Markov Clustering.
107
102
  """
108
103
  # Convert the graph to an adjacency matrix
109
104
  adjacency_matrix = nx.to_numpy_array(network)
@@ -111,17 +106,15 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
111
106
  result = mc.run_mcl(adjacency_matrix) # Run MCL with default parameters
112
107
  # Get clusters
113
108
  clusters = mc.get_clusters(result)
114
-
115
109
  # Create a community label for each node
116
110
  community_dict = {}
117
111
  for community_id, community in enumerate(clusters):
118
112
  for node in community:
119
113
  community_dict[node] = community_id
120
114
 
121
- # Create a neighborhood matrix
115
+ # Create a binary neighborhood matrix
122
116
  num_nodes = network.number_of_nodes()
123
117
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
124
-
125
118
  # Assign neighborhoods based on community labels
126
119
  for node_i, community_i in community_dict.items():
127
120
  for node_j, community_j in community_dict.items():
@@ -138,21 +131,19 @@ def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
138
131
  network (nx.Graph): The network graph.
139
132
 
140
133
  Returns:
141
- np.ndarray: Neighborhood matrix based on Spin Glass communities.
134
+ np.ndarray: Binary neighborhood matrix on Spin Glass communities.
142
135
  """
143
136
  # Use the asynchronous label propagation algorithm as a proxy for Spin Glass
144
137
  communities = asyn_lpa_communities(network)
145
-
146
138
  # Create a community label for each node
147
139
  community_dict = {}
148
140
  for community_id, community in enumerate(communities):
149
141
  for node in community:
150
142
  community_dict[node] = community_id
151
143
 
152
- # Create a neighborhood matrix
144
+ # Create a binary neighborhood matrix
153
145
  num_nodes = network.number_of_nodes()
154
146
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
155
-
156
147
  # Assign neighborhoods based on community labels
157
148
  for node_i, community_i in community_dict.items():
158
149
  for node_j, community_j in community_dict.items():
@@ -169,21 +160,19 @@ def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
169
160
  network (nx.Graph): The network graph.
170
161
 
171
162
  Returns:
172
- np.ndarray: Neighborhood matrix based on Walktrap communities.
163
+ np.ndarray: Binary neighborhood matrix on Walktrap communities.
173
164
  """
174
165
  # Use the asynchronous label propagation algorithm as a proxy for Walktrap
175
166
  communities = asyn_lpa_communities(network)
176
-
177
167
  # Create a community label for each node
178
168
  community_dict = {}
179
169
  for community_id, community in enumerate(communities):
180
170
  for node in community:
181
171
  community_dict[node] = community_id
182
172
 
183
- # Create a neighborhood matrix
173
+ # Create a binary neighborhood matrix
184
174
  num_nodes = network.number_of_nodes()
185
175
  neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
186
-
187
176
  # Assign neighborhoods based on community labels
188
177
  for node_i, community_i in community_dict.items():
189
178
  for node_j, community_j in community_dict.items():