risk-network 0.0.8b1__py3-none-any.whl → 0.0.8b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/network/graph.py +21 -22
- risk/network/plot.py +248 -75
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b2.dist-info}/METADATA +84 -21
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b2.dist-info}/RECORD +8 -8
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b2.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b2.dist-info}/WHEEL +0 -0
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b2.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/network/graph.py
CHANGED
@@ -148,27 +148,6 @@ class NetworkGraph:
|
|
148
148
|
|
149
149
|
return transformed_colors
|
150
150
|
|
151
|
-
def _get_composite_node_colors(self, domain_colors: np.ndarray) -> np.ndarray:
|
152
|
-
"""Generate composite colors for nodes based on domain colors and counts.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
domain_colors (np.ndarray): Array of colors corresponding to each domain.
|
156
|
-
|
157
|
-
Returns:
|
158
|
-
np.ndarray: Array of composite colors for each node.
|
159
|
-
"""
|
160
|
-
# Determine the number of nodes
|
161
|
-
num_nodes = len(self.node_coordinates)
|
162
|
-
# Initialize composite colors array with shape (number of nodes, 4) for RGBA
|
163
|
-
composite_colors = np.zeros((num_nodes, 4))
|
164
|
-
# Assign colors to nodes based on domain_colors
|
165
|
-
for domain_id, nodes in self.domain_id_to_node_ids_map.items():
|
166
|
-
color = domain_colors[domain_id]
|
167
|
-
for node in nodes:
|
168
|
-
composite_colors[node] = color
|
169
|
-
|
170
|
-
return composite_colors
|
171
|
-
|
172
151
|
def _get_domain_colors(
|
173
152
|
self,
|
174
153
|
cmap: str = "gist_rainbow",
|
@@ -193,9 +172,29 @@ class NetworkGraph:
|
|
193
172
|
color=color,
|
194
173
|
random_seed=random_seed,
|
195
174
|
)
|
196
|
-
self.network, self.domain_id_to_node_ids_map
|
197
175
|
return dict(zip(self.domain_id_to_node_ids_map.keys(), domain_colors))
|
198
176
|
|
177
|
+
def _get_composite_node_colors(self, domain_colors: np.ndarray) -> np.ndarray:
|
178
|
+
"""Generate composite colors for nodes based on domain colors and counts.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
domain_colors (np.ndarray): Array of colors corresponding to each domain.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
np.ndarray: Array of composite colors for each node.
|
185
|
+
"""
|
186
|
+
# Determine the number of nodes
|
187
|
+
num_nodes = len(self.node_coordinates)
|
188
|
+
# Initialize composite colors array with shape (number of nodes, 4) for RGBA
|
189
|
+
composite_colors = np.zeros((num_nodes, 4))
|
190
|
+
# Assign colors to nodes based on domain_colors
|
191
|
+
for domain_id, nodes in self.domain_id_to_node_ids_map.items():
|
192
|
+
color = domain_colors[domain_id]
|
193
|
+
for node in nodes:
|
194
|
+
composite_colors[node] = color
|
195
|
+
|
196
|
+
return composite_colors
|
197
|
+
|
199
198
|
|
200
199
|
def _transform_colors(
|
201
200
|
colors: np.ndarray,
|
risk/network/plot.py
CHANGED
@@ -3,6 +3,7 @@ risk/network/plot
|
|
3
3
|
~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
+
from functools import lru_cache
|
6
7
|
from typing import Any, Dict, List, Tuple, Union
|
7
8
|
|
8
9
|
import matplotlib.colors as mcolors
|
@@ -748,15 +749,6 @@ class NetworkPlotter:
|
|
748
749
|
# Set max_labels to the total number of domains if not provided (None)
|
749
750
|
if max_labels is None:
|
750
751
|
max_labels = len(self.graph.domain_id_to_node_ids_map)
|
751
|
-
|
752
|
-
# Convert colors to RGBA using the _to_rgba helper function
|
753
|
-
fontcolor = _to_rgba(
|
754
|
-
fontcolor, fontalpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
|
755
|
-
)
|
756
|
-
arrow_color = _to_rgba(
|
757
|
-
arrow_color, arrow_alpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
|
758
|
-
)
|
759
|
-
|
760
752
|
# Normalize words_to_omit to lowercase
|
761
753
|
if words_to_omit:
|
762
754
|
words_to_omit = set(word.lower() for word in words_to_omit)
|
@@ -773,76 +765,42 @@ class NetworkPlotter:
|
|
773
765
|
filtered_domain_terms = {}
|
774
766
|
# Handle the ids_to_keep logic
|
775
767
|
if ids_to_keep:
|
776
|
-
#
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
if ids_to_replace and domain in ids_to_replace:
|
792
|
-
terms = ids_to_replace[domain].split(" ")
|
793
|
-
else:
|
794
|
-
terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
|
795
|
-
|
796
|
-
# Apply words_to_omit, word length constraints, and max_words
|
797
|
-
if words_to_omit:
|
798
|
-
terms = [term for term in terms if term.lower() not in words_to_omit]
|
799
|
-
terms = [
|
800
|
-
term for term in terms if min_word_length <= len(term) <= max_word_length
|
801
|
-
]
|
802
|
-
terms = terms[:max_words]
|
803
|
-
|
804
|
-
# Check if the domain passes the word count condition
|
805
|
-
if len(terms) >= min_words:
|
806
|
-
filtered_domain_centroids[domain] = domain_centroids[domain]
|
807
|
-
filtered_domain_terms[domain] = " ".join(terms)
|
808
|
-
valid_indices.append(
|
809
|
-
list(domain_centroids.keys()).index(domain)
|
810
|
-
) # Track the valid index
|
768
|
+
# Process the ids_to_keep first INPLACE
|
769
|
+
self._process_ids_to_keep(
|
770
|
+
ids_to_keep,
|
771
|
+
max_labels,
|
772
|
+
domain_centroids,
|
773
|
+
ids_to_replace,
|
774
|
+
words_to_omit,
|
775
|
+
min_word_length,
|
776
|
+
max_word_length,
|
777
|
+
max_words,
|
778
|
+
min_words,
|
779
|
+
filtered_domain_centroids,
|
780
|
+
filtered_domain_terms,
|
781
|
+
valid_indices,
|
782
|
+
)
|
811
783
|
|
812
784
|
# Calculate remaining labels to plot after processing ids_to_keep
|
813
785
|
remaining_labels = (
|
814
786
|
max_labels - len(ids_to_keep) if ids_to_keep and max_labels else max_labels
|
815
787
|
)
|
816
|
-
# Process remaining domains to fill in additional labels, if there are slots left
|
788
|
+
# Process remaining domains INPLACE to fill in additional labels, if there are slots left
|
817
789
|
if remaining_labels and remaining_labels > 0:
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
if words_to_omit:
|
833
|
-
terms = [term for term in terms if term.lower() not in words_to_omit]
|
834
|
-
|
835
|
-
terms = [term for term in terms if min_word_length <= len(term) <= max_word_length]
|
836
|
-
terms = terms[:max_words]
|
837
|
-
# Check if the domain passes the word count condition
|
838
|
-
if len(terms) >= min_words:
|
839
|
-
filtered_domain_centroids[domain] = centroid
|
840
|
-
filtered_domain_terms[domain] = " ".join(terms)
|
841
|
-
valid_indices.append(idx) # Track the valid index
|
842
|
-
|
843
|
-
# Stop once we've reached the max_labels limit
|
844
|
-
if len(filtered_domain_centroids) >= max_labels:
|
845
|
-
break
|
790
|
+
self._process_remaining_domains(
|
791
|
+
domain_centroids,
|
792
|
+
ids_to_keep,
|
793
|
+
ids_to_replace,
|
794
|
+
words_to_omit,
|
795
|
+
min_word_length,
|
796
|
+
max_word_length,
|
797
|
+
max_words,
|
798
|
+
min_words,
|
799
|
+
max_labels,
|
800
|
+
filtered_domain_centroids,
|
801
|
+
filtered_domain_terms,
|
802
|
+
valid_indices,
|
803
|
+
)
|
846
804
|
|
847
805
|
# Calculate the bounding box around the network
|
848
806
|
center, radius = _calculate_bounding_box(self.graph.node_coordinates, radius_margin=scale)
|
@@ -850,11 +808,19 @@ class NetworkPlotter:
|
|
850
808
|
best_label_positions = _calculate_best_label_positions(
|
851
809
|
filtered_domain_centroids, center, radius, offset
|
852
810
|
)
|
811
|
+
# Convert colors to RGBA using the _to_rgba helper function
|
812
|
+
fontcolor = _to_rgba(
|
813
|
+
fontcolor, fontalpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
|
814
|
+
)
|
815
|
+
arrow_color = _to_rgba(
|
816
|
+
arrow_color, arrow_alpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
|
817
|
+
)
|
853
818
|
|
854
819
|
# Annotate the network with labels
|
855
820
|
for idx, (domain, pos) in zip(valid_indices, best_label_positions.items()):
|
856
821
|
centroid = filtered_domain_centroids[domain]
|
857
|
-
|
822
|
+
# Split by special key to split annotation into multiple lines
|
823
|
+
annotations = filtered_domain_terms[domain].split("::::")
|
858
824
|
self.ax.annotate(
|
859
825
|
"\n".join(annotations),
|
860
826
|
xy=centroid,
|
@@ -1001,6 +967,158 @@ class NetworkPlotter:
|
|
1001
967
|
domain_central_node = node_positions[central_node_idx]
|
1002
968
|
return domain_central_node
|
1003
969
|
|
970
|
+
def _process_ids_to_keep(
|
971
|
+
self,
|
972
|
+
ids_to_keep: Union[List[str], Tuple[str], np.ndarray, None],
|
973
|
+
max_labels: Union[int, None],
|
974
|
+
domain_centroids: Dict[str, np.ndarray],
|
975
|
+
ids_to_replace: Union[Dict[str, str], None],
|
976
|
+
words_to_omit: Union[List[str], None],
|
977
|
+
min_word_length: int,
|
978
|
+
max_word_length: int,
|
979
|
+
max_words: int,
|
980
|
+
min_words: int,
|
981
|
+
filtered_domain_centroids: Dict[str, np.ndarray],
|
982
|
+
filtered_domain_terms: Dict[str, str],
|
983
|
+
valid_indices: List[int],
|
984
|
+
) -> None:
|
985
|
+
"""Process the ids_to_keep, apply filtering, and store valid domain centroids and terms.
|
986
|
+
|
987
|
+
Args:
|
988
|
+
ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled.
|
989
|
+
max_labels (int, optional): Maximum number of labels allowed.
|
990
|
+
domain_centroids (dict): Mapping of domains to their centroids.
|
991
|
+
ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
992
|
+
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
993
|
+
min_word_length (int): Minimum allowed word length.
|
994
|
+
max_word_length (int): Maximum allowed word length.
|
995
|
+
max_words (int): Maximum number of words allowed.
|
996
|
+
min_words (int): Minimum number of words required for a domain.
|
997
|
+
filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
|
998
|
+
filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
|
999
|
+
valid_indices (list): List to store valid indices (output).
|
1000
|
+
|
1001
|
+
Note:
|
1002
|
+
The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
|
1003
|
+
|
1004
|
+
Raises:
|
1005
|
+
ValueError: If the number of provided `ids_to_keep` exceeds `max_labels`.
|
1006
|
+
"""
|
1007
|
+
# Convert ids_to_keep to a set for faster, unique lookups
|
1008
|
+
ids_to_keep = set(ids_to_keep) if ids_to_keep else set()
|
1009
|
+
# Check if the number of provided ids_to_keep exceeds max_labels
|
1010
|
+
if max_labels is not None and len(ids_to_keep) > max_labels:
|
1011
|
+
raise ValueError(
|
1012
|
+
f"Number of provided IDs ({len(ids_to_keep)}) exceeds max_labels ({max_labels})."
|
1013
|
+
)
|
1014
|
+
|
1015
|
+
# Process each domain in ids_to_keep
|
1016
|
+
for domain in ids_to_keep:
|
1017
|
+
if domain in self.graph.domain_id_to_domain_terms_map and domain in domain_centroids:
|
1018
|
+
filtered_domain_terms[domain] = self._process_terms(
|
1019
|
+
domain=domain,
|
1020
|
+
ids_to_replace=ids_to_replace,
|
1021
|
+
words_to_omit=words_to_omit,
|
1022
|
+
min_word_length=min_word_length,
|
1023
|
+
max_word_length=max_word_length,
|
1024
|
+
max_words=max_words,
|
1025
|
+
)
|
1026
|
+
filtered_domain_centroids[domain] = domain_centroids[domain]
|
1027
|
+
valid_indices.append(list(domain_centroids.keys()).index(domain))
|
1028
|
+
|
1029
|
+
def _process_remaining_domains(
|
1030
|
+
self,
|
1031
|
+
domain_centroids: Dict[str, np.ndarray],
|
1032
|
+
ids_to_keep: Union[List[str], Tuple[str], np.ndarray, None],
|
1033
|
+
ids_to_replace: Union[Dict[str, str], None],
|
1034
|
+
words_to_omit: Union[List[str], None],
|
1035
|
+
min_word_length: int,
|
1036
|
+
max_word_length: int,
|
1037
|
+
max_words: int,
|
1038
|
+
min_words: int,
|
1039
|
+
max_labels: Union[int, None],
|
1040
|
+
filtered_domain_centroids: Dict[str, np.ndarray],
|
1041
|
+
filtered_domain_terms: Dict[str, str],
|
1042
|
+
valid_indices: List[int],
|
1043
|
+
) -> None:
|
1044
|
+
"""Process remaining domains to fill in additional labels, if there are slots left.
|
1045
|
+
|
1046
|
+
Args:
|
1047
|
+
domain_centroids (dict): Mapping of domains to their centroids.
|
1048
|
+
ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled. Defaults to None.
|
1049
|
+
ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
1050
|
+
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
1051
|
+
min_word_length (int): Minimum allowed word length.
|
1052
|
+
max_word_length (int): Maximum allowed word length.
|
1053
|
+
max_words (int): Maximum number of words allowed.
|
1054
|
+
min_words (int): Minimum number of words required for a domain.
|
1055
|
+
max_labels (int, optional): Maximum number of labels allowed. Defaults to None.
|
1056
|
+
filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
|
1057
|
+
filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
|
1058
|
+
valid_indices (list): List to store valid indices (output).
|
1059
|
+
|
1060
|
+
Note:
|
1061
|
+
The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
|
1062
|
+
"""
|
1063
|
+
for idx, (domain, centroid) in enumerate(domain_centroids.items()):
|
1064
|
+
# Check if the domain is NaN and continue if true
|
1065
|
+
if pd.isna(domain) or (isinstance(domain, float) and np.isnan(domain)):
|
1066
|
+
continue # Skip NaN domains
|
1067
|
+
if ids_to_keep and domain in ids_to_keep:
|
1068
|
+
continue # Skip domains already handled by ids_to_keep
|
1069
|
+
|
1070
|
+
filtered_domain_terms[domain] = self._process_terms(
|
1071
|
+
domain=domain,
|
1072
|
+
ids_to_replace=ids_to_replace,
|
1073
|
+
words_to_omit=words_to_omit,
|
1074
|
+
min_word_length=min_word_length,
|
1075
|
+
max_word_length=max_word_length,
|
1076
|
+
max_words=max_words,
|
1077
|
+
)
|
1078
|
+
filtered_domain_centroids[domain] = centroid
|
1079
|
+
valid_indices.append(idx)
|
1080
|
+
|
1081
|
+
def _process_terms(
|
1082
|
+
self,
|
1083
|
+
domain: str,
|
1084
|
+
ids_to_replace: Union[Dict[str, str], None],
|
1085
|
+
words_to_omit: Union[List[str], None],
|
1086
|
+
min_word_length: int,
|
1087
|
+
max_word_length: int,
|
1088
|
+
max_words: int,
|
1089
|
+
) -> List[str]:
|
1090
|
+
"""Process terms for a domain, applying word length constraints and combining words where appropriate.
|
1091
|
+
|
1092
|
+
Args:
|
1093
|
+
domain (str): The domain being processed.
|
1094
|
+
ids_to_replace (dict, optional): Dictionary mapping domain IDs to custom labels.
|
1095
|
+
words_to_omit (list, optional): List of words to omit from the labels.
|
1096
|
+
min_word_length (int): Minimum allowed word length.
|
1097
|
+
max_word_length (int): Maximum allowed word length.
|
1098
|
+
max_words (int): Maximum number of words allowed.
|
1099
|
+
|
1100
|
+
Returns:
|
1101
|
+
list: Processed terms, with words combined if necessary to fit within constraints.
|
1102
|
+
"""
|
1103
|
+
# Handle ids_to_replace logic
|
1104
|
+
if ids_to_replace and domain in ids_to_replace:
|
1105
|
+
terms = ids_to_replace[domain].split(" ")
|
1106
|
+
else:
|
1107
|
+
terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
|
1108
|
+
|
1109
|
+
# Apply words_to_omit and word length constraints
|
1110
|
+
if words_to_omit:
|
1111
|
+
terms = [
|
1112
|
+
term
|
1113
|
+
for term in terms
|
1114
|
+
if term.lower() not in words_to_omit and len(term) >= min_word_length
|
1115
|
+
]
|
1116
|
+
|
1117
|
+
# Use the combine_words function directly to handle word combinations and length constraints
|
1118
|
+
compressed_terms = _combine_words(tuple(terms), max_word_length, max_words)
|
1119
|
+
|
1120
|
+
return compressed_terms
|
1121
|
+
|
1004
1122
|
def get_annotated_node_colors(
|
1005
1123
|
self,
|
1006
1124
|
cmap: str = "gist_rainbow",
|
@@ -1254,7 +1372,9 @@ def _to_rgba(
|
|
1254
1372
|
# Handle array of colors case (including strings, RGB, and RGBA)
|
1255
1373
|
elif isinstance(color, (list, tuple, np.ndarray)):
|
1256
1374
|
rgba_colors = []
|
1257
|
-
for
|
1375
|
+
for i in range(num_repeats):
|
1376
|
+
# Reiterate over the colors if the number of repeats exceeds the number of colors
|
1377
|
+
c = color[i % len(color)]
|
1258
1378
|
# Ensure each element is either a valid string or a list/tuple of length 3 (RGB) or 4 (RGBA)
|
1259
1379
|
if isinstance(c, str) or (
|
1260
1380
|
isinstance(c, (list, tuple, np.ndarray)) and len(c) in [3, 4]
|
@@ -1313,6 +1433,59 @@ def _calculate_bounding_box(
|
|
1313
1433
|
return center, radius
|
1314
1434
|
|
1315
1435
|
|
1436
|
+
def _combine_words(words: List[str], max_length: int, max_words: int) -> str:
|
1437
|
+
"""Combine words to fit within the max_length and max_words constraints,
|
1438
|
+
and separate the final output by ':' for plotting.
|
1439
|
+
|
1440
|
+
Args:
|
1441
|
+
words (List[str]): List of words to combine.
|
1442
|
+
max_length (int): Maximum allowed length for a combined line.
|
1443
|
+
max_words (int): Maximum number of lines (words) allowed.
|
1444
|
+
|
1445
|
+
Returns:
|
1446
|
+
str: String of combined words separated by ':' for line breaks.
|
1447
|
+
"""
|
1448
|
+
|
1449
|
+
def try_combinations(words_batch: List[str]) -> List[str]:
|
1450
|
+
"""Try to combine words within a batch and return them with combined words separated by ':'."""
|
1451
|
+
combined_lines = []
|
1452
|
+
i = 0
|
1453
|
+
while i < len(words_batch):
|
1454
|
+
current_word = words_batch[i]
|
1455
|
+
combined_word = current_word # Start with the current word
|
1456
|
+
# Try to combine more words if possible, and ensure the combination fits within max_length
|
1457
|
+
for j in range(i + 1, len(words_batch)):
|
1458
|
+
next_word = words_batch[j]
|
1459
|
+
if len(combined_word) + len(next_word) + 2 <= max_length: # +2 for ', '
|
1460
|
+
combined_word = f"{combined_word} {next_word}"
|
1461
|
+
i += 1 # Move past the combined word
|
1462
|
+
else:
|
1463
|
+
break # Stop combining if the length is exceeded
|
1464
|
+
|
1465
|
+
combined_lines.append(combined_word) # Add the combined word or single word
|
1466
|
+
i += 1 # Move to the next word
|
1467
|
+
|
1468
|
+
# Stop if we've reached the max_words limit
|
1469
|
+
if len(combined_lines) >= max_words:
|
1470
|
+
break
|
1471
|
+
|
1472
|
+
return combined_lines
|
1473
|
+
|
1474
|
+
# Main logic: start with max_words number of words
|
1475
|
+
combined_lines = try_combinations(words[:max_words])
|
1476
|
+
remaining_words = words[max_words:] # Remaining words after the initial batch
|
1477
|
+
|
1478
|
+
# Continue pulling more words until we fill the lines
|
1479
|
+
while remaining_words and len(combined_lines) < max_words:
|
1480
|
+
available_slots = max_words - len(combined_lines)
|
1481
|
+
words_to_add = remaining_words[:available_slots]
|
1482
|
+
remaining_words = remaining_words[available_slots:]
|
1483
|
+
combined_lines += try_combinations(words_to_add)
|
1484
|
+
|
1485
|
+
# Join the final combined lines with '::::', a special separator for line breaks
|
1486
|
+
return "::::".join(combined_lines[:max_words])
|
1487
|
+
|
1488
|
+
|
1316
1489
|
def _calculate_best_label_positions(
|
1317
1490
|
filtered_domain_centroids: Dict[str, Any], center: np.ndarray, radius: float, offset: float
|
1318
1491
|
) -> Dict[str, Any]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8b2
|
4
4
|
Summary: A Python package for biological network analysis
|
5
5
|
Author: Ira Horecka
|
6
6
|
Author-email: Ira Horecka <ira89@icloud.com>
|
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
|
|
709
709
|
Requires-Dist: threadpoolctl
|
710
710
|
Requires-Dist: tqdm
|
711
711
|
|
712
|
-
|
713
|
-
<img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
|
714
|
-
</p>
|
712
|
+
# RISK
|
715
713
|
|
716
714
|
<p align="center">
|
717
|
-
<
|
718
|
-
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
|
719
|
-
<a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
|
715
|
+
<img src="https://i.imgur.com/8TleEJs.png" width="50%" />
|
720
716
|
</p>
|
721
717
|
|
722
|
-
|
718
|
+
<br>
|
719
|
+
|
720
|
+

|
721
|
+
[](https://pypi.python.org/pypi/risk-network)
|
722
|
+

|
723
|
+
[](https://doi.org/10.5281/zenodo.xxxxxxx)
|
724
|
+

|
725
|
+

|
726
|
+
|
727
|
+
**RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
|
723
728
|
|
724
|
-
|
729
|
+
## Documentation and Tutorial
|
730
|
+
|
731
|
+
- **Documentation**: Comprehensive documentation is available at [Documentation link].
|
732
|
+
- **Tutorial**: An interactive Jupyter notebook tutorial can be found at [Tutorial link].
|
733
|
+
We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
|
734
|
+
|
735
|
+
## Installation
|
725
736
|
|
726
|
-
RISK is
|
737
|
+
RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
|
738
|
+
|
739
|
+
```bash
|
740
|
+
pip install risk-network
|
741
|
+
```
|
727
742
|
|
728
743
|
## Features
|
729
744
|
|
730
|
-
-
|
731
|
-
-
|
732
|
-
-
|
745
|
+
- **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
|
746
|
+
- **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
|
747
|
+
- **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
|
748
|
+
- **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
|
749
|
+
- **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
|
750
|
+
- **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
|
733
751
|
|
734
|
-
## Example
|
752
|
+
## Example Usage
|
735
753
|
|
736
|
-
*Saccharomyces cerevisiae*
|
754
|
+
We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
|
737
755
|
|
738
|
-

|
739
757
|
|
740
|
-
|
758
|
+
RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
|
759
|
+
|
760
|
+
## Citation
|
761
|
+
|
762
|
+
If you use RISK in your research, please cite the following:
|
763
|
+
|
764
|
+
**Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
|
765
|
+
|
766
|
+
## Software Architecture and Implementation
|
741
767
|
|
742
|
-
|
768
|
+
RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
|
743
769
|
|
744
|
-
|
770
|
+
### Supported Data Formats
|
745
771
|
|
746
|
-
|
772
|
+
- **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
|
773
|
+
- **Visualization Outputs**: SVG, PNG, PDF.
|
774
|
+
|
775
|
+
### Clustering Algorithms
|
776
|
+
|
777
|
+
- **Available Algorithms**:
|
778
|
+
- Greedy Modularity
|
779
|
+
- Label Propagation
|
780
|
+
- Louvain
|
781
|
+
- Markov Clustering
|
782
|
+
- Spinglass
|
783
|
+
- Walktrap
|
784
|
+
- **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
|
785
|
+
|
786
|
+
### Statistical Tests
|
787
|
+
|
788
|
+
- **Hypergeometric Test**
|
789
|
+
- **Permutation Test** (single- or multi-process modes)
|
790
|
+
- **Poisson Test**
|
791
|
+
|
792
|
+
## Performance and Efficiency
|
793
|
+
|
794
|
+
In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
|
795
|
+
|
796
|
+
## Contributing
|
797
|
+
|
798
|
+
We welcome contributions from the community. Please use the following resources:
|
799
|
+
|
800
|
+
- [Issues Tracker](https://github.com/irahorecka/risk/issues)
|
801
|
+
- [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
|
802
|
+
|
803
|
+
## Support
|
804
|
+
|
805
|
+
If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
|
747
806
|
|
748
807
|
## License
|
749
808
|
|
750
|
-
|
809
|
+
RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
810
|
+
|
811
|
+
---
|
812
|
+
|
813
|
+
**Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=DqyXwLfxTtmQed3FAS_3IKRthc1pFIdFBZ2kbnNC7VY,112
|
2
2
|
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
3
|
risk/risk.py,sha256=FaQhDCBZxZSAXJsScH0rSbjjCTNZA5vgf9rJj1GHW44,20924
|
4
4
|
risk/annotations/__init__.py,sha256=vUpVvMRE5if01Ic8QY6M2Ae3EFGJHdugEe9PdEkAW4Y,138
|
@@ -13,9 +13,9 @@ risk/neighborhoods/domains.py,sha256=Ov52EEr-tWqy96y8_0tJ9f1K8FI-8tZQxHR7a59A1k8
|
|
13
13
|
risk/neighborhoods/neighborhoods.py,sha256=M-wL4xB_BUTlSZg90swygO5NdrZ6hFUFqs6jsiZaqHk,18260
|
14
14
|
risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
|
15
15
|
risk/network/geometry.py,sha256=H1yGVVqgbfpzBzJwEheDLfvGLSA284jGQQTn612L4Vc,6759
|
16
|
-
risk/network/graph.py,sha256=
|
16
|
+
risk/network/graph.py,sha256=EwD4-1THC5YNdP6PY01Oe35k2QYYqtZpxWraPVH6wa4,16426
|
17
17
|
risk/network/io.py,sha256=kY7HqmL3wa1NnqHu61_G8IpT21qpBijpAZ4ixmsseJA,22911
|
18
|
-
risk/network/plot.py,sha256=
|
18
|
+
risk/network/plot.py,sha256=5YBq2AxqCW2aK5dQBvUtNMqGweJPoqwsKpCm0t2ldps,74646
|
19
19
|
risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
|
20
20
|
risk/stats/hypergeom.py,sha256=o6Qnj31gCAKxr2uQirXrbv7XvdDJGEq69MFW-ubx_hA,2272
|
21
21
|
risk/stats/poisson.py,sha256=8x9hB4DCukq4gNIlIKO-c_jYG1-BTwTX53oLauFyfj8,1793
|
@@ -23,8 +23,8 @@ risk/stats/stats.py,sha256=kvShov-94W6ffgDUTb522vB9hDJQSyTsYif_UIaFfSM,7059
|
|
23
23
|
risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
|
24
24
|
risk/stats/permutation/permutation.py,sha256=D84Rcpt6iTQniK0PfQGcw9bLcHbMt9p-ARcurUnIXZQ,10095
|
25
25
|
risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
|
26
|
-
risk_network-0.0.
|
27
|
-
risk_network-0.0.
|
28
|
-
risk_network-0.0.
|
29
|
-
risk_network-0.0.
|
30
|
-
risk_network-0.0.
|
26
|
+
risk_network-0.0.8b2.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
27
|
+
risk_network-0.0.8b2.dist-info/METADATA,sha256=h8kWeUQSzCUAV8PQfOde5UvBZa0Vz0ggUonwu33w4R4,47450
|
28
|
+
risk_network-0.0.8b2.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
29
|
+
risk_network-0.0.8b2.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
30
|
+
risk_network-0.0.8b2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|