risk-network 0.0.8b1__py3-none-any.whl → 0.0.8b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/network/graph.py +21 -22
- risk/network/plot.py +318 -91
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b3.dist-info}/METADATA +84 -21
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b3.dist-info}/RECORD +8 -8
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b3.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b3.dist-info}/WHEEL +0 -0
- {risk_network-0.0.8b1.dist-info → risk_network-0.0.8b3.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/network/graph.py
CHANGED
@@ -148,27 +148,6 @@ class NetworkGraph:
|
|
148
148
|
|
149
149
|
return transformed_colors
|
150
150
|
|
151
|
-
def _get_composite_node_colors(self, domain_colors: np.ndarray) -> np.ndarray:
|
152
|
-
"""Generate composite colors for nodes based on domain colors and counts.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
domain_colors (np.ndarray): Array of colors corresponding to each domain.
|
156
|
-
|
157
|
-
Returns:
|
158
|
-
np.ndarray: Array of composite colors for each node.
|
159
|
-
"""
|
160
|
-
# Determine the number of nodes
|
161
|
-
num_nodes = len(self.node_coordinates)
|
162
|
-
# Initialize composite colors array with shape (number of nodes, 4) for RGBA
|
163
|
-
composite_colors = np.zeros((num_nodes, 4))
|
164
|
-
# Assign colors to nodes based on domain_colors
|
165
|
-
for domain_id, nodes in self.domain_id_to_node_ids_map.items():
|
166
|
-
color = domain_colors[domain_id]
|
167
|
-
for node in nodes:
|
168
|
-
composite_colors[node] = color
|
169
|
-
|
170
|
-
return composite_colors
|
171
|
-
|
172
151
|
def _get_domain_colors(
|
173
152
|
self,
|
174
153
|
cmap: str = "gist_rainbow",
|
@@ -193,9 +172,29 @@ class NetworkGraph:
|
|
193
172
|
color=color,
|
194
173
|
random_seed=random_seed,
|
195
174
|
)
|
196
|
-
self.network, self.domain_id_to_node_ids_map
|
197
175
|
return dict(zip(self.domain_id_to_node_ids_map.keys(), domain_colors))
|
198
176
|
|
177
|
+
def _get_composite_node_colors(self, domain_colors: np.ndarray) -> np.ndarray:
|
178
|
+
"""Generate composite colors for nodes based on domain colors and counts.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
domain_colors (np.ndarray): Array of colors corresponding to each domain.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
np.ndarray: Array of composite colors for each node.
|
185
|
+
"""
|
186
|
+
# Determine the number of nodes
|
187
|
+
num_nodes = len(self.node_coordinates)
|
188
|
+
# Initialize composite colors array with shape (number of nodes, 4) for RGBA
|
189
|
+
composite_colors = np.zeros((num_nodes, 4))
|
190
|
+
# Assign colors to nodes based on domain_colors
|
191
|
+
for domain_id, nodes in self.domain_id_to_node_ids_map.items():
|
192
|
+
color = domain_colors[domain_id]
|
193
|
+
for node in nodes:
|
194
|
+
composite_colors[node] = color
|
195
|
+
|
196
|
+
return composite_colors
|
197
|
+
|
199
198
|
|
200
199
|
def _transform_colors(
|
201
200
|
colors: np.ndarray,
|
risk/network/plot.py
CHANGED
@@ -17,6 +17,8 @@ from scipy.stats import gaussian_kde
|
|
17
17
|
from risk.log import params, logger
|
18
18
|
from risk.network.graph import NetworkGraph
|
19
19
|
|
20
|
+
TERM_DELIMITER = "::::" # String used to separate multiple domain terms when constructing composite domain labels
|
21
|
+
|
20
22
|
|
21
23
|
class NetworkPlotter:
|
22
24
|
"""A class for visualizing network graphs with customizable options.
|
@@ -678,10 +680,10 @@ class NetworkPlotter:
|
|
678
680
|
arrow_base_shrink: float = 0.0,
|
679
681
|
arrow_tip_shrink: float = 0.0,
|
680
682
|
max_labels: Union[int, None] = None,
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
683
|
+
max_label_lines: Union[int, None] = None,
|
684
|
+
min_label_lines: int = 1,
|
685
|
+
max_chars_per_line: Union[int, None] = None,
|
686
|
+
min_chars_per_line: int = 1,
|
685
687
|
words_to_omit: Union[List, None] = None,
|
686
688
|
overlay_ids: bool = False,
|
687
689
|
ids_to_keep: Union[List, Tuple, np.ndarray, None] = None,
|
@@ -703,10 +705,10 @@ class NetworkPlotter:
|
|
703
705
|
arrow_base_shrink (float, optional): Distance between the text and the base of the arrow. Defaults to 0.0.
|
704
706
|
arrow_tip_shrink (float, optional): Distance between the arrow tip and the centroid. Defaults to 0.0.
|
705
707
|
max_labels (int, optional): Maximum number of labels to plot. Defaults to None (no limit).
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
708
|
+
max_label_lines (int, optional): Maximum number of lines in a label. Defaults to None (no limit).
|
709
|
+
min_label_lines (int, optional): Minimum number of lines in a label. Defaults to 1.
|
710
|
+
max_chars_per_line (int, optional): Maximum number of characters in a line to display. Defaults to None (no limit).
|
711
|
+
min_chars_per_line (int, optional): Minimum number of characters in a line to display. Defaults to 1.
|
710
712
|
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
711
713
|
overlay_ids (bool, optional): Whether to overlay domain IDs in the center of the centroids. Defaults to False.
|
712
714
|
ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled. To discover domain IDs,
|
@@ -735,28 +737,26 @@ class NetworkPlotter:
|
|
735
737
|
label_arrow_base_shrink=arrow_base_shrink,
|
736
738
|
label_arrow_tip_shrink=arrow_tip_shrink,
|
737
739
|
label_max_labels=max_labels,
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
740
|
+
label_min_label_lines=min_label_lines,
|
741
|
+
label_max_label_lines=max_label_lines,
|
742
|
+
label_max_chars_per_line=max_chars_per_line,
|
743
|
+
label_min_chars_per_line=min_chars_per_line,
|
742
744
|
label_words_to_omit=words_to_omit,
|
743
745
|
label_overlay_ids=overlay_ids,
|
744
746
|
label_ids_to_keep=ids_to_keep,
|
745
747
|
label_ids_to_replace=ids_to_replace,
|
746
748
|
)
|
747
749
|
|
750
|
+
# Convert ids_to_keep to a tuple if it is not None
|
751
|
+
ids_to_keep = tuple(ids_to_keep) if ids_to_keep else tuple()
|
748
752
|
# Set max_labels to the total number of domains if not provided (None)
|
749
753
|
if max_labels is None:
|
750
754
|
max_labels = len(self.graph.domain_id_to_node_ids_map)
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
arrow_color = _to_rgba(
|
757
|
-
arrow_color, arrow_alpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
|
758
|
-
)
|
759
|
-
|
755
|
+
# Set max_label_lines and max_chars_per_line to large numbers if not provided (None)
|
756
|
+
if max_label_lines is None:
|
757
|
+
max_label_lines = int(1e6)
|
758
|
+
if max_chars_per_line is None:
|
759
|
+
max_chars_per_line = int(1e6)
|
760
760
|
# Normalize words_to_omit to lowercase
|
761
761
|
if words_to_omit:
|
762
762
|
words_to_omit = set(word.lower() for word in words_to_omit)
|
@@ -768,81 +768,47 @@ class NetworkPlotter:
|
|
768
768
|
domain_centroids[domain_id] = self._calculate_domain_centroid(node_ids)
|
769
769
|
|
770
770
|
# Initialize dictionaries and lists for valid indices
|
771
|
-
valid_indices = []
|
772
|
-
filtered_domain_centroids = {}
|
773
|
-
filtered_domain_terms = {}
|
771
|
+
valid_indices = [] # List of valid indices to plot colors and arrows
|
772
|
+
filtered_domain_centroids = {} # Filtered domain centroids to plot
|
773
|
+
filtered_domain_terms = {} # Filtered domain terms to plot
|
774
774
|
# Handle the ids_to_keep logic
|
775
775
|
if ids_to_keep:
|
776
|
-
#
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
if ids_to_replace and domain in ids_to_replace:
|
792
|
-
terms = ids_to_replace[domain].split(" ")
|
793
|
-
else:
|
794
|
-
terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
|
795
|
-
|
796
|
-
# Apply words_to_omit, word length constraints, and max_words
|
797
|
-
if words_to_omit:
|
798
|
-
terms = [term for term in terms if term.lower() not in words_to_omit]
|
799
|
-
terms = [
|
800
|
-
term for term in terms if min_word_length <= len(term) <= max_word_length
|
801
|
-
]
|
802
|
-
terms = terms[:max_words]
|
803
|
-
|
804
|
-
# Check if the domain passes the word count condition
|
805
|
-
if len(terms) >= min_words:
|
806
|
-
filtered_domain_centroids[domain] = domain_centroids[domain]
|
807
|
-
filtered_domain_terms[domain] = " ".join(terms)
|
808
|
-
valid_indices.append(
|
809
|
-
list(domain_centroids.keys()).index(domain)
|
810
|
-
) # Track the valid index
|
776
|
+
# Process the ids_to_keep first INPLACE
|
777
|
+
self._process_ids_to_keep(
|
778
|
+
ids_to_keep=ids_to_keep,
|
779
|
+
domain_centroids=domain_centroids,
|
780
|
+
ids_to_replace=ids_to_replace,
|
781
|
+
words_to_omit=words_to_omit,
|
782
|
+
max_labels=max_labels,
|
783
|
+
min_label_lines=min_label_lines,
|
784
|
+
max_label_lines=max_label_lines,
|
785
|
+
min_chars_per_line=min_chars_per_line,
|
786
|
+
max_chars_per_line=max_chars_per_line,
|
787
|
+
filtered_domain_centroids=filtered_domain_centroids,
|
788
|
+
filtered_domain_terms=filtered_domain_terms,
|
789
|
+
valid_indices=valid_indices,
|
790
|
+
)
|
811
791
|
|
812
792
|
# Calculate remaining labels to plot after processing ids_to_keep
|
813
793
|
remaining_labels = (
|
814
|
-
max_labels - len(
|
794
|
+
max_labels - len(valid_indices) if valid_indices and max_labels else max_labels
|
815
795
|
)
|
816
|
-
# Process remaining domains to fill in additional labels, if there are slots left
|
796
|
+
# Process remaining domains INPLACE to fill in additional labels, if there are slots left
|
817
797
|
if remaining_labels and remaining_labels > 0:
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
if words_to_omit:
|
833
|
-
terms = [term for term in terms if term.lower() not in words_to_omit]
|
834
|
-
|
835
|
-
terms = [term for term in terms if min_word_length <= len(term) <= max_word_length]
|
836
|
-
terms = terms[:max_words]
|
837
|
-
# Check if the domain passes the word count condition
|
838
|
-
if len(terms) >= min_words:
|
839
|
-
filtered_domain_centroids[domain] = centroid
|
840
|
-
filtered_domain_terms[domain] = " ".join(terms)
|
841
|
-
valid_indices.append(idx) # Track the valid index
|
842
|
-
|
843
|
-
# Stop once we've reached the max_labels limit
|
844
|
-
if len(filtered_domain_centroids) >= max_labels:
|
845
|
-
break
|
798
|
+
self._process_remaining_domains(
|
799
|
+
domain_centroids=domain_centroids,
|
800
|
+
ids_to_keep=ids_to_keep,
|
801
|
+
ids_to_replace=ids_to_replace,
|
802
|
+
words_to_omit=words_to_omit,
|
803
|
+
remaining_labels=remaining_labels,
|
804
|
+
min_chars_per_line=min_chars_per_line,
|
805
|
+
max_chars_per_line=max_chars_per_line,
|
806
|
+
max_label_lines=max_label_lines,
|
807
|
+
min_label_lines=min_label_lines,
|
808
|
+
filtered_domain_centroids=filtered_domain_centroids,
|
809
|
+
filtered_domain_terms=filtered_domain_terms,
|
810
|
+
valid_indices=valid_indices,
|
811
|
+
)
|
846
812
|
|
847
813
|
# Calculate the bounding box around the network
|
848
814
|
center, radius = _calculate_bounding_box(self.graph.node_coordinates, radius_margin=scale)
|
@@ -850,11 +816,19 @@ class NetworkPlotter:
|
|
850
816
|
best_label_positions = _calculate_best_label_positions(
|
851
817
|
filtered_domain_centroids, center, radius, offset
|
852
818
|
)
|
819
|
+
# Convert colors to RGBA using the _to_rgba helper function
|
820
|
+
fontcolor = _to_rgba(
|
821
|
+
fontcolor, fontalpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
|
822
|
+
)
|
823
|
+
arrow_color = _to_rgba(
|
824
|
+
arrow_color, arrow_alpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
|
825
|
+
)
|
853
826
|
|
854
827
|
# Annotate the network with labels
|
855
828
|
for idx, (domain, pos) in zip(valid_indices, best_label_positions.items()):
|
856
829
|
centroid = filtered_domain_centroids[domain]
|
857
|
-
|
830
|
+
# Split by special key TERM_DELIMITER to split annotation into multiple lines
|
831
|
+
annotations = filtered_domain_terms[domain].split(TERM_DELIMITER)
|
858
832
|
self.ax.annotate(
|
859
833
|
"\n".join(annotations),
|
860
834
|
xy=centroid,
|
@@ -1001,6 +975,204 @@ class NetworkPlotter:
|
|
1001
975
|
domain_central_node = node_positions[central_node_idx]
|
1002
976
|
return domain_central_node
|
1003
977
|
|
978
|
+
def _process_ids_to_keep(
|
979
|
+
self,
|
980
|
+
ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
|
981
|
+
domain_centroids: Dict[str, np.ndarray],
|
982
|
+
ids_to_replace: Union[Dict[str, str], None],
|
983
|
+
words_to_omit: Union[List[str], None],
|
984
|
+
max_labels: Union[int, None],
|
985
|
+
min_label_lines: int,
|
986
|
+
max_label_lines: int,
|
987
|
+
min_chars_per_line: int,
|
988
|
+
max_chars_per_line: int,
|
989
|
+
filtered_domain_centroids: Dict[str, np.ndarray],
|
990
|
+
filtered_domain_terms: Dict[str, str],
|
991
|
+
valid_indices: List[int],
|
992
|
+
) -> None:
|
993
|
+
"""Process the ids_to_keep, apply filtering, and store valid domain centroids and terms.
|
994
|
+
|
995
|
+
Args:
|
996
|
+
ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
|
997
|
+
domain_centroids (dict): Mapping of domains to their centroids.
|
998
|
+
ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
999
|
+
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
1000
|
+
max_labels (int, optional): Maximum number of labels allowed.
|
1001
|
+
min_label_lines (int): Minimum number of lines in a label.
|
1002
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1003
|
+
min_chars_per_line (int): Minimum number of characters in a line to display.
|
1004
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
1005
|
+
filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
|
1006
|
+
filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
|
1007
|
+
valid_indices (list): List to store valid indices (output).
|
1008
|
+
|
1009
|
+
Note:
|
1010
|
+
The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
|
1011
|
+
|
1012
|
+
Raises:
|
1013
|
+
ValueError: If the number of provided `ids_to_keep` exceeds `max_labels`.
|
1014
|
+
"""
|
1015
|
+
# Check if the number of provided ids_to_keep exceeds max_labels
|
1016
|
+
if max_labels is not None and len(ids_to_keep) > max_labels:
|
1017
|
+
raise ValueError(
|
1018
|
+
f"Number of provided IDs ({len(ids_to_keep)}) exceeds max_labels ({max_labels})."
|
1019
|
+
)
|
1020
|
+
|
1021
|
+
# Process each domain in ids_to_keep
|
1022
|
+
for domain in ids_to_keep:
|
1023
|
+
if domain in self.graph.domain_id_to_domain_terms_map and domain in domain_centroids:
|
1024
|
+
domain_terms = self._process_terms(
|
1025
|
+
domain=domain,
|
1026
|
+
ids_to_replace=ids_to_replace,
|
1027
|
+
words_to_omit=words_to_omit,
|
1028
|
+
max_label_lines=max_label_lines,
|
1029
|
+
min_chars_per_line=min_chars_per_line,
|
1030
|
+
max_chars_per_line=max_chars_per_line,
|
1031
|
+
)
|
1032
|
+
num_domain_lines = len(domain_terms.split(TERM_DELIMITER))
|
1033
|
+
# Check if the number of lines in the label is greater than or equal to the minimum
|
1034
|
+
if num_domain_lines >= min_label_lines:
|
1035
|
+
filtered_domain_terms[domain] = domain_terms
|
1036
|
+
filtered_domain_centroids[domain] = domain_centroids[domain]
|
1037
|
+
valid_indices.append(list(domain_centroids.keys()).index(domain))
|
1038
|
+
|
1039
|
+
def _process_remaining_domains(
|
1040
|
+
self,
|
1041
|
+
domain_centroids: Dict[str, np.ndarray],
|
1042
|
+
ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
|
1043
|
+
ids_to_replace: Union[Dict[str, str], None],
|
1044
|
+
words_to_omit: Union[List[str], None],
|
1045
|
+
remaining_labels: int,
|
1046
|
+
min_label_lines: int,
|
1047
|
+
max_label_lines: int,
|
1048
|
+
min_chars_per_line: int,
|
1049
|
+
max_chars_per_line: int,
|
1050
|
+
filtered_domain_centroids: Dict[str, np.ndarray],
|
1051
|
+
filtered_domain_terms: Dict[str, str],
|
1052
|
+
valid_indices: List[int],
|
1053
|
+
) -> None:
|
1054
|
+
"""Process remaining domains to fill in additional labels, respecting the remaining_labels limit.
|
1055
|
+
|
1056
|
+
Args:
|
1057
|
+
domain_centroids (dict): Mapping of domains to their centroids.
|
1058
|
+
ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
|
1059
|
+
ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
1060
|
+
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
1061
|
+
remaining_labels (int): The remaining number of labels that can be generated.
|
1062
|
+
min_label_lines (int): Minimum number of lines in a label.
|
1063
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1064
|
+
min_chars_per_line (int): Minimum number of characters in a line to display.
|
1065
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
1066
|
+
filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
|
1067
|
+
filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
|
1068
|
+
valid_indices (list): List to store valid indices (output).
|
1069
|
+
"""
|
1070
|
+
# Counter to track how many labels have been created
|
1071
|
+
label_count = 0
|
1072
|
+
# Collect domains not in ids_to_keep
|
1073
|
+
remaining_domains = {
|
1074
|
+
domain: centroid
|
1075
|
+
for domain, centroid in domain_centroids.items()
|
1076
|
+
if domain not in ids_to_keep and not pd.isna(domain)
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
# Function to calculate distance between two centroids
|
1080
|
+
def calculate_distance(centroid1, centroid2):
|
1081
|
+
return np.linalg.norm(centroid1 - centroid2)
|
1082
|
+
|
1083
|
+
# Find the farthest apart domains using centroids
|
1084
|
+
if remaining_domains and remaining_labels:
|
1085
|
+
selected_domains = []
|
1086
|
+
first_domain = next(iter(remaining_domains)) # Pick the first domain to start
|
1087
|
+
selected_domains.append(first_domain)
|
1088
|
+
|
1089
|
+
while len(selected_domains) < remaining_labels:
|
1090
|
+
farthest_domain = None
|
1091
|
+
max_distance = -1
|
1092
|
+
# Find the domain farthest from any already selected domain
|
1093
|
+
for candidate_domain, candidate_centroid in remaining_domains.items():
|
1094
|
+
if candidate_domain in selected_domains:
|
1095
|
+
continue
|
1096
|
+
|
1097
|
+
# Calculate the minimum distance to any selected domain
|
1098
|
+
min_distance = min(
|
1099
|
+
calculate_distance(candidate_centroid, remaining_domains[dom])
|
1100
|
+
for dom in selected_domains
|
1101
|
+
)
|
1102
|
+
# Update the farthest domain if the minimum distance is greater
|
1103
|
+
if min_distance > max_distance:
|
1104
|
+
max_distance = min_distance
|
1105
|
+
farthest_domain = candidate_domain
|
1106
|
+
|
1107
|
+
# Add the farthest domain to the selected domains
|
1108
|
+
if farthest_domain:
|
1109
|
+
selected_domains.append(farthest_domain)
|
1110
|
+
else:
|
1111
|
+
break # No more domains to select
|
1112
|
+
|
1113
|
+
# Process the selected domains and add to filtered lists
|
1114
|
+
for domain in selected_domains:
|
1115
|
+
centroid = remaining_domains[domain]
|
1116
|
+
domain_terms = self._process_terms(
|
1117
|
+
domain=domain,
|
1118
|
+
ids_to_replace=ids_to_replace,
|
1119
|
+
words_to_omit=words_to_omit,
|
1120
|
+
max_label_lines=max_label_lines,
|
1121
|
+
min_chars_per_line=min_chars_per_line,
|
1122
|
+
max_chars_per_line=max_chars_per_line,
|
1123
|
+
)
|
1124
|
+
num_domain_lines = len(domain_terms.split(TERM_DELIMITER))
|
1125
|
+
# Check if the number of lines in the label is greater than or equal to the minimum
|
1126
|
+
if num_domain_lines >= min_label_lines:
|
1127
|
+
filtered_domain_centroids[domain] = centroid
|
1128
|
+
filtered_domain_terms[domain] = domain_terms
|
1129
|
+
valid_indices.append(list(domain_centroids.keys()).index(domain))
|
1130
|
+
|
1131
|
+
label_count += 1
|
1132
|
+
if label_count >= remaining_labels:
|
1133
|
+
break
|
1134
|
+
|
1135
|
+
def _process_terms(
|
1136
|
+
self,
|
1137
|
+
domain: str,
|
1138
|
+
ids_to_replace: Union[Dict[str, str], None],
|
1139
|
+
words_to_omit: Union[List[str], None],
|
1140
|
+
max_label_lines: int,
|
1141
|
+
min_chars_per_line: int,
|
1142
|
+
max_chars_per_line: int,
|
1143
|
+
) -> List[str]:
|
1144
|
+
"""Process terms for a domain, applying word length constraints and combining words where appropriate.
|
1145
|
+
|
1146
|
+
Args:
|
1147
|
+
domain (str): The domain being processed.
|
1148
|
+
ids_to_replace (dict, optional): Dictionary mapping domain IDs to custom labels.
|
1149
|
+
words_to_omit (list, optional): List of words to omit from the labels.
|
1150
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1151
|
+
min_chars_per_line (int): Minimum number of characters in a line to display.
|
1152
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
1153
|
+
|
1154
|
+
Returns:
|
1155
|
+
list: Processed terms, with words combined if necessary to fit within constraints.
|
1156
|
+
"""
|
1157
|
+
# Handle ids_to_replace logic
|
1158
|
+
if ids_to_replace and domain in ids_to_replace:
|
1159
|
+
terms = ids_to_replace[domain].split(" ")
|
1160
|
+
else:
|
1161
|
+
terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
|
1162
|
+
|
1163
|
+
# Apply words_to_omit and word length constraints
|
1164
|
+
if words_to_omit:
|
1165
|
+
terms = [
|
1166
|
+
term
|
1167
|
+
for term in terms
|
1168
|
+
if term.lower() not in words_to_omit and len(term) >= min_chars_per_line
|
1169
|
+
]
|
1170
|
+
|
1171
|
+
# Use the combine_words function directly to handle word combinations and length constraints
|
1172
|
+
compressed_terms = _combine_words(tuple(terms), max_chars_per_line, max_label_lines)
|
1173
|
+
|
1174
|
+
return compressed_terms
|
1175
|
+
|
1004
1176
|
def get_annotated_node_colors(
|
1005
1177
|
self,
|
1006
1178
|
cmap: str = "gist_rainbow",
|
@@ -1254,7 +1426,9 @@ def _to_rgba(
|
|
1254
1426
|
# Handle array of colors case (including strings, RGB, and RGBA)
|
1255
1427
|
elif isinstance(color, (list, tuple, np.ndarray)):
|
1256
1428
|
rgba_colors = []
|
1257
|
-
for
|
1429
|
+
for i in range(num_repeats):
|
1430
|
+
# Reiterate over the colors if the number of repeats exceeds the number of colors
|
1431
|
+
c = color[i % len(color)]
|
1258
1432
|
# Ensure each element is either a valid string or a list/tuple of length 3 (RGB) or 4 (RGBA)
|
1259
1433
|
if isinstance(c, str) or (
|
1260
1434
|
isinstance(c, (list, tuple, np.ndarray)) and len(c) in [3, 4]
|
@@ -1313,6 +1487,59 @@ def _calculate_bounding_box(
|
|
1313
1487
|
return center, radius
|
1314
1488
|
|
1315
1489
|
|
1490
|
+
def _combine_words(words: List[str], max_length: int, max_label_lines: int) -> str:
|
1491
|
+
"""Combine words to fit within the max_length and max_label_lines constraints,
|
1492
|
+
and separate the final output by ':' for plotting.
|
1493
|
+
|
1494
|
+
Args:
|
1495
|
+
words (List[str]): List of words to combine.
|
1496
|
+
max_length (int): Maximum allowed length for a combined line.
|
1497
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1498
|
+
|
1499
|
+
Returns:
|
1500
|
+
str: String of combined words separated by ':' for line breaks.
|
1501
|
+
"""
|
1502
|
+
|
1503
|
+
def try_combinations(words_batch: List[str]) -> List[str]:
|
1504
|
+
"""Try to combine words within a batch and return them with combined words separated by ':'."""
|
1505
|
+
combined_lines = []
|
1506
|
+
i = 0
|
1507
|
+
while i < len(words_batch):
|
1508
|
+
current_word = words_batch[i]
|
1509
|
+
combined_word = current_word # Start with the current word
|
1510
|
+
# Try to combine more words if possible, and ensure the combination fits within max_length
|
1511
|
+
for j in range(i + 1, len(words_batch)):
|
1512
|
+
next_word = words_batch[j]
|
1513
|
+
if len(combined_word) + len(next_word) + 2 <= max_length: # +2 for ', '
|
1514
|
+
combined_word = f"{combined_word} {next_word}"
|
1515
|
+
i += 1 # Move past the combined word
|
1516
|
+
else:
|
1517
|
+
break # Stop combining if the length is exceeded
|
1518
|
+
|
1519
|
+
combined_lines.append(combined_word) # Add the combined word or single word
|
1520
|
+
i += 1 # Move to the next word
|
1521
|
+
|
1522
|
+
# Stop if we've reached the max_label_lines limit
|
1523
|
+
if len(combined_lines) >= max_label_lines:
|
1524
|
+
break
|
1525
|
+
|
1526
|
+
return combined_lines
|
1527
|
+
|
1528
|
+
# Main logic: start with max_label_lines number of words
|
1529
|
+
combined_lines = try_combinations(words[:max_label_lines])
|
1530
|
+
remaining_words = words[max_label_lines:] # Remaining words after the initial batch
|
1531
|
+
|
1532
|
+
# Continue pulling more words until we fill the lines
|
1533
|
+
while remaining_words and len(combined_lines) < max_label_lines:
|
1534
|
+
available_slots = max_label_lines - len(combined_lines)
|
1535
|
+
words_to_add = remaining_words[:available_slots]
|
1536
|
+
remaining_words = remaining_words[available_slots:]
|
1537
|
+
combined_lines += try_combinations(words_to_add)
|
1538
|
+
|
1539
|
+
# Join the final combined lines with TERM_DELIMITER, a special separator for line breaks
|
1540
|
+
return TERM_DELIMITER.join(combined_lines[:max_label_lines])
|
1541
|
+
|
1542
|
+
|
1316
1543
|
def _calculate_best_label_positions(
|
1317
1544
|
filtered_domain_centroids: Dict[str, Any], center: np.ndarray, radius: float, offset: float
|
1318
1545
|
) -> Dict[str, Any]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8b3
|
4
4
|
Summary: A Python package for biological network analysis
|
5
5
|
Author: Ira Horecka
|
6
6
|
Author-email: Ira Horecka <ira89@icloud.com>
|
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
|
|
709
709
|
Requires-Dist: threadpoolctl
|
710
710
|
Requires-Dist: tqdm
|
711
711
|
|
712
|
-
|
713
|
-
<img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
|
714
|
-
</p>
|
712
|
+
# RISK
|
715
713
|
|
716
714
|
<p align="center">
|
717
|
-
<
|
718
|
-
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
|
719
|
-
<a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
|
715
|
+
<img src="https://i.imgur.com/8TleEJs.png" width="50%" />
|
720
716
|
</p>
|
721
717
|
|
722
|
-
|
718
|
+
<br>
|
719
|
+
|
720
|
+

|
721
|
+
[](https://pypi.python.org/pypi/risk-network)
|
722
|
+

|
723
|
+
[](https://doi.org/10.5281/zenodo.xxxxxxx)
|
724
|
+

|
725
|
+

|
726
|
+
|
727
|
+
**RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
|
723
728
|
|
724
|
-
|
729
|
+
## Documentation and Tutorial
|
730
|
+
|
731
|
+
- **Documentation**: Comprehensive documentation is available at [Documentation link].
|
732
|
+
- **Tutorial**: An interactive Jupyter notebook tutorial can be found at [Tutorial link].
|
733
|
+
We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
|
734
|
+
|
735
|
+
## Installation
|
725
736
|
|
726
|
-
RISK is
|
737
|
+
RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
|
738
|
+
|
739
|
+
```bash
|
740
|
+
pip install risk-network
|
741
|
+
```
|
727
742
|
|
728
743
|
## Features
|
729
744
|
|
730
|
-
-
|
731
|
-
-
|
732
|
-
-
|
745
|
+
- **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
|
746
|
+
- **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
|
747
|
+
- **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
|
748
|
+
- **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
|
749
|
+
- **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
|
750
|
+
- **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
|
733
751
|
|
734
|
-
## Example
|
752
|
+
## Example Usage
|
735
753
|
|
736
|
-
*Saccharomyces cerevisiae*
|
754
|
+
We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
|
737
755
|
|
738
|
-

|
739
757
|
|
740
|
-
|
758
|
+
RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
|
759
|
+
|
760
|
+
## Citation
|
761
|
+
|
762
|
+
If you use RISK in your research, please cite the following:
|
763
|
+
|
764
|
+
**Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
|
765
|
+
|
766
|
+
## Software Architecture and Implementation
|
741
767
|
|
742
|
-
|
768
|
+
RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
|
743
769
|
|
744
|
-
|
770
|
+
### Supported Data Formats
|
745
771
|
|
746
|
-
|
772
|
+
- **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
|
773
|
+
- **Visualization Outputs**: SVG, PNG, PDF.
|
774
|
+
|
775
|
+
### Clustering Algorithms
|
776
|
+
|
777
|
+
- **Available Algorithms**:
|
778
|
+
- Greedy Modularity
|
779
|
+
- Label Propagation
|
780
|
+
- Louvain
|
781
|
+
- Markov Clustering
|
782
|
+
- Spinglass
|
783
|
+
- Walktrap
|
784
|
+
- **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
|
785
|
+
|
786
|
+
### Statistical Tests
|
787
|
+
|
788
|
+
- **Hypergeometric Test**
|
789
|
+
- **Permutation Test** (single- or multi-process modes)
|
790
|
+
- **Poisson Test**
|
791
|
+
|
792
|
+
## Performance and Efficiency
|
793
|
+
|
794
|
+
In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
|
795
|
+
|
796
|
+
## Contributing
|
797
|
+
|
798
|
+
We welcome contributions from the community. Please use the following resources:
|
799
|
+
|
800
|
+
- [Issues Tracker](https://github.com/irahorecka/risk/issues)
|
801
|
+
- [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
|
802
|
+
|
803
|
+
## Support
|
804
|
+
|
805
|
+
If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
|
747
806
|
|
748
807
|
## License
|
749
808
|
|
750
|
-
|
809
|
+
RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
810
|
+
|
811
|
+
---
|
812
|
+
|
813
|
+
**Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=qjjV3tZUr6CjlV98T9q2oJFgjLB5qxwKFQm6MkwQc2s,112
|
2
2
|
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
3
|
risk/risk.py,sha256=FaQhDCBZxZSAXJsScH0rSbjjCTNZA5vgf9rJj1GHW44,20924
|
4
4
|
risk/annotations/__init__.py,sha256=vUpVvMRE5if01Ic8QY6M2Ae3EFGJHdugEe9PdEkAW4Y,138
|
@@ -13,9 +13,9 @@ risk/neighborhoods/domains.py,sha256=Ov52EEr-tWqy96y8_0tJ9f1K8FI-8tZQxHR7a59A1k8
|
|
13
13
|
risk/neighborhoods/neighborhoods.py,sha256=M-wL4xB_BUTlSZg90swygO5NdrZ6hFUFqs6jsiZaqHk,18260
|
14
14
|
risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
|
15
15
|
risk/network/geometry.py,sha256=H1yGVVqgbfpzBzJwEheDLfvGLSA284jGQQTn612L4Vc,6759
|
16
|
-
risk/network/graph.py,sha256=
|
16
|
+
risk/network/graph.py,sha256=EwD4-1THC5YNdP6PY01Oe35k2QYYqtZpxWraPVH6wa4,16426
|
17
17
|
risk/network/io.py,sha256=kY7HqmL3wa1NnqHu61_G8IpT21qpBijpAZ4ixmsseJA,22911
|
18
|
-
risk/network/plot.py,sha256=
|
18
|
+
risk/network/plot.py,sha256=uDRQTza5scBJKFTlcayFgA7nzWfz-c075J_V7k8eyBI,78285
|
19
19
|
risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
|
20
20
|
risk/stats/hypergeom.py,sha256=o6Qnj31gCAKxr2uQirXrbv7XvdDJGEq69MFW-ubx_hA,2272
|
21
21
|
risk/stats/poisson.py,sha256=8x9hB4DCukq4gNIlIKO-c_jYG1-BTwTX53oLauFyfj8,1793
|
@@ -23,8 +23,8 @@ risk/stats/stats.py,sha256=kvShov-94W6ffgDUTb522vB9hDJQSyTsYif_UIaFfSM,7059
|
|
23
23
|
risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
|
24
24
|
risk/stats/permutation/permutation.py,sha256=D84Rcpt6iTQniK0PfQGcw9bLcHbMt9p-ARcurUnIXZQ,10095
|
25
25
|
risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
|
26
|
-
risk_network-0.0.
|
27
|
-
risk_network-0.0.
|
28
|
-
risk_network-0.0.
|
29
|
-
risk_network-0.0.
|
30
|
-
risk_network-0.0.
|
26
|
+
risk_network-0.0.8b3.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
27
|
+
risk_network-0.0.8b3.dist-info/METADATA,sha256=cUY2Uidk8Bqhj1sWs25aIACjI2QrMXhL42oZQdHSBMo,47450
|
28
|
+
risk_network-0.0.8b3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
29
|
+
risk_network-0.0.8b3.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
30
|
+
risk_network-0.0.8b3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|