risk-network 0.0.8b1__py3-none-any.whl → 0.0.8b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.1"
10
+ __version__ = "0.0.8-beta.3"
risk/network/graph.py CHANGED
@@ -148,27 +148,6 @@ class NetworkGraph:
148
148
 
149
149
  return transformed_colors
150
150
 
151
- def _get_composite_node_colors(self, domain_colors: np.ndarray) -> np.ndarray:
152
- """Generate composite colors for nodes based on domain colors and counts.
153
-
154
- Args:
155
- domain_colors (np.ndarray): Array of colors corresponding to each domain.
156
-
157
- Returns:
158
- np.ndarray: Array of composite colors for each node.
159
- """
160
- # Determine the number of nodes
161
- num_nodes = len(self.node_coordinates)
162
- # Initialize composite colors array with shape (number of nodes, 4) for RGBA
163
- composite_colors = np.zeros((num_nodes, 4))
164
- # Assign colors to nodes based on domain_colors
165
- for domain_id, nodes in self.domain_id_to_node_ids_map.items():
166
- color = domain_colors[domain_id]
167
- for node in nodes:
168
- composite_colors[node] = color
169
-
170
- return composite_colors
171
-
172
151
  def _get_domain_colors(
173
152
  self,
174
153
  cmap: str = "gist_rainbow",
@@ -193,9 +172,29 @@ class NetworkGraph:
193
172
  color=color,
194
173
  random_seed=random_seed,
195
174
  )
196
- self.network, self.domain_id_to_node_ids_map
197
175
  return dict(zip(self.domain_id_to_node_ids_map.keys(), domain_colors))
198
176
 
177
+ def _get_composite_node_colors(self, domain_colors: np.ndarray) -> np.ndarray:
178
+ """Generate composite colors for nodes based on domain colors and counts.
179
+
180
+ Args:
181
+ domain_colors (np.ndarray): Array of colors corresponding to each domain.
182
+
183
+ Returns:
184
+ np.ndarray: Array of composite colors for each node.
185
+ """
186
+ # Determine the number of nodes
187
+ num_nodes = len(self.node_coordinates)
188
+ # Initialize composite colors array with shape (number of nodes, 4) for RGBA
189
+ composite_colors = np.zeros((num_nodes, 4))
190
+ # Assign colors to nodes based on domain_colors
191
+ for domain_id, nodes in self.domain_id_to_node_ids_map.items():
192
+ color = domain_colors[domain_id]
193
+ for node in nodes:
194
+ composite_colors[node] = color
195
+
196
+ return composite_colors
197
+
199
198
 
200
199
  def _transform_colors(
201
200
  colors: np.ndarray,
risk/network/plot.py CHANGED
@@ -17,6 +17,8 @@ from scipy.stats import gaussian_kde
17
17
  from risk.log import params, logger
18
18
  from risk.network.graph import NetworkGraph
19
19
 
20
+ TERM_DELIMITER = "::::" # String used to separate multiple domain terms when constructing composite domain labels
21
+
20
22
 
21
23
  class NetworkPlotter:
22
24
  """A class for visualizing network graphs with customizable options.
@@ -678,10 +680,10 @@ class NetworkPlotter:
678
680
  arrow_base_shrink: float = 0.0,
679
681
  arrow_tip_shrink: float = 0.0,
680
682
  max_labels: Union[int, None] = None,
681
- max_words: int = 10,
682
- min_words: int = 1,
683
- max_word_length: int = 20,
684
- min_word_length: int = 1,
683
+ max_label_lines: Union[int, None] = None,
684
+ min_label_lines: int = 1,
685
+ max_chars_per_line: Union[int, None] = None,
686
+ min_chars_per_line: int = 1,
685
687
  words_to_omit: Union[List, None] = None,
686
688
  overlay_ids: bool = False,
687
689
  ids_to_keep: Union[List, Tuple, np.ndarray, None] = None,
@@ -703,10 +705,10 @@ class NetworkPlotter:
703
705
  arrow_base_shrink (float, optional): Distance between the text and the base of the arrow. Defaults to 0.0.
704
706
  arrow_tip_shrink (float, optional): Distance between the arrow tip and the centroid. Defaults to 0.0.
705
707
  max_labels (int, optional): Maximum number of labels to plot. Defaults to None (no limit).
706
- max_words (int, optional): Maximum number of words in a label. Defaults to 10.
707
- min_words (int, optional): Minimum number of words required to display a label. Defaults to 1.
708
- max_word_length (int, optional): Maximum number of characters in a word to display. Defaults to 20.
709
- min_word_length (int, optional): Minimum number of characters in a word to display. Defaults to 1.
708
+ max_label_lines (int, optional): Maximum number of lines in a label. Defaults to None (no limit).
709
+ min_label_lines (int, optional): Minimum number of lines in a label. Defaults to 1.
710
+ max_chars_per_line (int, optional): Maximum number of characters in a line to display. Defaults to None (no limit).
711
+ min_chars_per_line (int, optional): Minimum number of characters in a line to display. Defaults to 1.
710
712
  words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
711
713
  overlay_ids (bool, optional): Whether to overlay domain IDs in the center of the centroids. Defaults to False.
712
714
  ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled. To discover domain IDs,
@@ -735,28 +737,26 @@ class NetworkPlotter:
735
737
  label_arrow_base_shrink=arrow_base_shrink,
736
738
  label_arrow_tip_shrink=arrow_tip_shrink,
737
739
  label_max_labels=max_labels,
738
- label_max_words=max_words,
739
- label_min_words=min_words,
740
- label_max_word_length=max_word_length,
741
- label_min_word_length=min_word_length,
740
+ label_min_label_lines=min_label_lines,
741
+ label_max_label_lines=max_label_lines,
742
+ label_max_chars_per_line=max_chars_per_line,
743
+ label_min_chars_per_line=min_chars_per_line,
742
744
  label_words_to_omit=words_to_omit,
743
745
  label_overlay_ids=overlay_ids,
744
746
  label_ids_to_keep=ids_to_keep,
745
747
  label_ids_to_replace=ids_to_replace,
746
748
  )
747
749
 
750
+ # Convert ids_to_keep to a tuple if it is not None
751
+ ids_to_keep = tuple(ids_to_keep) if ids_to_keep else tuple()
748
752
  # Set max_labels to the total number of domains if not provided (None)
749
753
  if max_labels is None:
750
754
  max_labels = len(self.graph.domain_id_to_node_ids_map)
751
-
752
- # Convert colors to RGBA using the _to_rgba helper function
753
- fontcolor = _to_rgba(
754
- fontcolor, fontalpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
755
- )
756
- arrow_color = _to_rgba(
757
- arrow_color, arrow_alpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
758
- )
759
-
755
+ # Set max_label_lines and max_chars_per_line to large numbers if not provided (None)
756
+ if max_label_lines is None:
757
+ max_label_lines = int(1e6)
758
+ if max_chars_per_line is None:
759
+ max_chars_per_line = int(1e6)
760
760
  # Normalize words_to_omit to lowercase
761
761
  if words_to_omit:
762
762
  words_to_omit = set(word.lower() for word in words_to_omit)
@@ -768,81 +768,47 @@ class NetworkPlotter:
768
768
  domain_centroids[domain_id] = self._calculate_domain_centroid(node_ids)
769
769
 
770
770
  # Initialize dictionaries and lists for valid indices
771
- valid_indices = []
772
- filtered_domain_centroids = {}
773
- filtered_domain_terms = {}
771
+ valid_indices = [] # List of valid indices to plot colors and arrows
772
+ filtered_domain_centroids = {} # Filtered domain centroids to plot
773
+ filtered_domain_terms = {} # Filtered domain terms to plot
774
774
  # Handle the ids_to_keep logic
775
775
  if ids_to_keep:
776
- # Convert ids_to_keep to remove accidental duplicates
777
- ids_to_keep = set(ids_to_keep)
778
- # Check if the number of provided ids_to_keep exceeds max_labels
779
- if max_labels is not None and len(ids_to_keep) > max_labels:
780
- raise ValueError(
781
- f"Number of provided IDs ({len(ids_to_keep)}) exceeds max_labels ({max_labels})."
782
- )
783
-
784
- # Process the specified IDs first
785
- for domain in ids_to_keep:
786
- if (
787
- domain in self.graph.domain_id_to_domain_terms_map
788
- and domain in domain_centroids
789
- ):
790
- # Handle ids_to_replace logic here for ids_to_keep
791
- if ids_to_replace and domain in ids_to_replace:
792
- terms = ids_to_replace[domain].split(" ")
793
- else:
794
- terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
795
-
796
- # Apply words_to_omit, word length constraints, and max_words
797
- if words_to_omit:
798
- terms = [term for term in terms if term.lower() not in words_to_omit]
799
- terms = [
800
- term for term in terms if min_word_length <= len(term) <= max_word_length
801
- ]
802
- terms = terms[:max_words]
803
-
804
- # Check if the domain passes the word count condition
805
- if len(terms) >= min_words:
806
- filtered_domain_centroids[domain] = domain_centroids[domain]
807
- filtered_domain_terms[domain] = " ".join(terms)
808
- valid_indices.append(
809
- list(domain_centroids.keys()).index(domain)
810
- ) # Track the valid index
776
+ # Process the ids_to_keep first INPLACE
777
+ self._process_ids_to_keep(
778
+ ids_to_keep=ids_to_keep,
779
+ domain_centroids=domain_centroids,
780
+ ids_to_replace=ids_to_replace,
781
+ words_to_omit=words_to_omit,
782
+ max_labels=max_labels,
783
+ min_label_lines=min_label_lines,
784
+ max_label_lines=max_label_lines,
785
+ min_chars_per_line=min_chars_per_line,
786
+ max_chars_per_line=max_chars_per_line,
787
+ filtered_domain_centroids=filtered_domain_centroids,
788
+ filtered_domain_terms=filtered_domain_terms,
789
+ valid_indices=valid_indices,
790
+ )
811
791
 
812
792
  # Calculate remaining labels to plot after processing ids_to_keep
813
793
  remaining_labels = (
814
- max_labels - len(ids_to_keep) if ids_to_keep and max_labels else max_labels
794
+ max_labels - len(valid_indices) if valid_indices and max_labels else max_labels
815
795
  )
816
- # Process remaining domains to fill in additional labels, if there are slots left
796
+ # Process remaining domains INPLACE to fill in additional labels, if there are slots left
817
797
  if remaining_labels and remaining_labels > 0:
818
- for idx, (domain, centroid) in enumerate(domain_centroids.items()):
819
- # Check if the domain is NaN and continue if true
820
- if pd.isna(domain) or (isinstance(domain, float) and np.isnan(domain)):
821
- continue # Skip NaN domains
822
- if ids_to_keep and domain in ids_to_keep:
823
- continue # Skip domains already handled by ids_to_keep
824
-
825
- # Handle ids_to_replace logic first
826
- if ids_to_replace and domain in ids_to_replace:
827
- terms = ids_to_replace[domain].split(" ")
828
- else:
829
- terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
830
-
831
- # Apply words_to_omit, word length constraints, and max_words
832
- if words_to_omit:
833
- terms = [term for term in terms if term.lower() not in words_to_omit]
834
-
835
- terms = [term for term in terms if min_word_length <= len(term) <= max_word_length]
836
- terms = terms[:max_words]
837
- # Check if the domain passes the word count condition
838
- if len(terms) >= min_words:
839
- filtered_domain_centroids[domain] = centroid
840
- filtered_domain_terms[domain] = " ".join(terms)
841
- valid_indices.append(idx) # Track the valid index
842
-
843
- # Stop once we've reached the max_labels limit
844
- if len(filtered_domain_centroids) >= max_labels:
845
- break
798
+ self._process_remaining_domains(
799
+ domain_centroids=domain_centroids,
800
+ ids_to_keep=ids_to_keep,
801
+ ids_to_replace=ids_to_replace,
802
+ words_to_omit=words_to_omit,
803
+ remaining_labels=remaining_labels,
804
+ min_chars_per_line=min_chars_per_line,
805
+ max_chars_per_line=max_chars_per_line,
806
+ max_label_lines=max_label_lines,
807
+ min_label_lines=min_label_lines,
808
+ filtered_domain_centroids=filtered_domain_centroids,
809
+ filtered_domain_terms=filtered_domain_terms,
810
+ valid_indices=valid_indices,
811
+ )
846
812
 
847
813
  # Calculate the bounding box around the network
848
814
  center, radius = _calculate_bounding_box(self.graph.node_coordinates, radius_margin=scale)
@@ -850,11 +816,19 @@ class NetworkPlotter:
850
816
  best_label_positions = _calculate_best_label_positions(
851
817
  filtered_domain_centroids, center, radius, offset
852
818
  )
819
+ # Convert colors to RGBA using the _to_rgba helper function
820
+ fontcolor = _to_rgba(
821
+ fontcolor, fontalpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
822
+ )
823
+ arrow_color = _to_rgba(
824
+ arrow_color, arrow_alpha, num_repeats=len(self.graph.domain_id_to_node_ids_map)
825
+ )
853
826
 
854
827
  # Annotate the network with labels
855
828
  for idx, (domain, pos) in zip(valid_indices, best_label_positions.items()):
856
829
  centroid = filtered_domain_centroids[domain]
857
- annotations = filtered_domain_terms[domain].split(" ")[:max_words]
830
+ # Split by special key TERM_DELIMITER to split annotation into multiple lines
831
+ annotations = filtered_domain_terms[domain].split(TERM_DELIMITER)
858
832
  self.ax.annotate(
859
833
  "\n".join(annotations),
860
834
  xy=centroid,
@@ -1001,6 +975,204 @@ class NetworkPlotter:
1001
975
  domain_central_node = node_positions[central_node_idx]
1002
976
  return domain_central_node
1003
977
 
978
+ def _process_ids_to_keep(
979
+ self,
980
+ ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
981
+ domain_centroids: Dict[str, np.ndarray],
982
+ ids_to_replace: Union[Dict[str, str], None],
983
+ words_to_omit: Union[List[str], None],
984
+ max_labels: Union[int, None],
985
+ min_label_lines: int,
986
+ max_label_lines: int,
987
+ min_chars_per_line: int,
988
+ max_chars_per_line: int,
989
+ filtered_domain_centroids: Dict[str, np.ndarray],
990
+ filtered_domain_terms: Dict[str, str],
991
+ valid_indices: List[int],
992
+ ) -> None:
993
+ """Process the ids_to_keep, apply filtering, and store valid domain centroids and terms.
994
+
995
+ Args:
996
+ ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
997
+ domain_centroids (dict): Mapping of domains to their centroids.
998
+ ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
999
+ words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
1000
+ max_labels (int, optional): Maximum number of labels allowed.
1001
+ min_label_lines (int): Minimum number of lines in a label.
1002
+ max_label_lines (int): Maximum number of lines in a label.
1003
+ min_chars_per_line (int): Minimum number of characters in a line to display.
1004
+ max_chars_per_line (int): Maximum number of characters in a line to display.
1005
+ filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
1006
+ filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
1007
+ valid_indices (list): List to store valid indices (output).
1008
+
1009
+ Note:
1010
+ The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
1011
+
1012
+ Raises:
1013
+ ValueError: If the number of provided `ids_to_keep` exceeds `max_labels`.
1014
+ """
1015
+ # Check if the number of provided ids_to_keep exceeds max_labels
1016
+ if max_labels is not None and len(ids_to_keep) > max_labels:
1017
+ raise ValueError(
1018
+ f"Number of provided IDs ({len(ids_to_keep)}) exceeds max_labels ({max_labels})."
1019
+ )
1020
+
1021
+ # Process each domain in ids_to_keep
1022
+ for domain in ids_to_keep:
1023
+ if domain in self.graph.domain_id_to_domain_terms_map and domain in domain_centroids:
1024
+ domain_terms = self._process_terms(
1025
+ domain=domain,
1026
+ ids_to_replace=ids_to_replace,
1027
+ words_to_omit=words_to_omit,
1028
+ max_label_lines=max_label_lines,
1029
+ min_chars_per_line=min_chars_per_line,
1030
+ max_chars_per_line=max_chars_per_line,
1031
+ )
1032
+ num_domain_lines = len(domain_terms.split(TERM_DELIMITER))
1033
+ # Check if the number of lines in the label is greater than or equal to the minimum
1034
+ if num_domain_lines >= min_label_lines:
1035
+ filtered_domain_terms[domain] = domain_terms
1036
+ filtered_domain_centroids[domain] = domain_centroids[domain]
1037
+ valid_indices.append(list(domain_centroids.keys()).index(domain))
1038
+
1039
+ def _process_remaining_domains(
1040
+ self,
1041
+ domain_centroids: Dict[str, np.ndarray],
1042
+ ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
1043
+ ids_to_replace: Union[Dict[str, str], None],
1044
+ words_to_omit: Union[List[str], None],
1045
+ remaining_labels: int,
1046
+ min_label_lines: int,
1047
+ max_label_lines: int,
1048
+ min_chars_per_line: int,
1049
+ max_chars_per_line: int,
1050
+ filtered_domain_centroids: Dict[str, np.ndarray],
1051
+ filtered_domain_terms: Dict[str, str],
1052
+ valid_indices: List[int],
1053
+ ) -> None:
1054
+ """Process remaining domains to fill in additional labels, respecting the remaining_labels limit.
1055
+
1056
+ Args:
1057
+ domain_centroids (dict): Mapping of domains to their centroids.
1058
+ ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
1059
+ ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
1060
+ words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
1061
+ remaining_labels (int): The remaining number of labels that can be generated.
1062
+ min_label_lines (int): Minimum number of lines in a label.
1063
+ max_label_lines (int): Maximum number of lines in a label.
1064
+ min_chars_per_line (int): Minimum number of characters in a line to display.
1065
+ max_chars_per_line (int): Maximum number of characters in a line to display.
1066
+ filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
1067
+ filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
1068
+ valid_indices (list): List to store valid indices (output).
1069
+ """
1070
+ # Counter to track how many labels have been created
1071
+ label_count = 0
1072
+ # Collect domains not in ids_to_keep
1073
+ remaining_domains = {
1074
+ domain: centroid
1075
+ for domain, centroid in domain_centroids.items()
1076
+ if domain not in ids_to_keep and not pd.isna(domain)
1077
+ }
1078
+
1079
+ # Function to calculate distance between two centroids
1080
+ def calculate_distance(centroid1, centroid2):
1081
+ return np.linalg.norm(centroid1 - centroid2)
1082
+
1083
+ # Find the farthest apart domains using centroids
1084
+ if remaining_domains and remaining_labels:
1085
+ selected_domains = []
1086
+ first_domain = next(iter(remaining_domains)) # Pick the first domain to start
1087
+ selected_domains.append(first_domain)
1088
+
1089
+ while len(selected_domains) < remaining_labels:
1090
+ farthest_domain = None
1091
+ max_distance = -1
1092
+ # Find the domain farthest from any already selected domain
1093
+ for candidate_domain, candidate_centroid in remaining_domains.items():
1094
+ if candidate_domain in selected_domains:
1095
+ continue
1096
+
1097
+ # Calculate the minimum distance to any selected domain
1098
+ min_distance = min(
1099
+ calculate_distance(candidate_centroid, remaining_domains[dom])
1100
+ for dom in selected_domains
1101
+ )
1102
+ # Update the farthest domain if the minimum distance is greater
1103
+ if min_distance > max_distance:
1104
+ max_distance = min_distance
1105
+ farthest_domain = candidate_domain
1106
+
1107
+ # Add the farthest domain to the selected domains
1108
+ if farthest_domain:
1109
+ selected_domains.append(farthest_domain)
1110
+ else:
1111
+ break # No more domains to select
1112
+
1113
+ # Process the selected domains and add to filtered lists
1114
+ for domain in selected_domains:
1115
+ centroid = remaining_domains[domain]
1116
+ domain_terms = self._process_terms(
1117
+ domain=domain,
1118
+ ids_to_replace=ids_to_replace,
1119
+ words_to_omit=words_to_omit,
1120
+ max_label_lines=max_label_lines,
1121
+ min_chars_per_line=min_chars_per_line,
1122
+ max_chars_per_line=max_chars_per_line,
1123
+ )
1124
+ num_domain_lines = len(domain_terms.split(TERM_DELIMITER))
1125
+ # Check if the number of lines in the label is greater than or equal to the minimum
1126
+ if num_domain_lines >= min_label_lines:
1127
+ filtered_domain_centroids[domain] = centroid
1128
+ filtered_domain_terms[domain] = domain_terms
1129
+ valid_indices.append(list(domain_centroids.keys()).index(domain))
1130
+
1131
+ label_count += 1
1132
+ if label_count >= remaining_labels:
1133
+ break
1134
+
1135
+ def _process_terms(
1136
+ self,
1137
+ domain: str,
1138
+ ids_to_replace: Union[Dict[str, str], None],
1139
+ words_to_omit: Union[List[str], None],
1140
+ max_label_lines: int,
1141
+ min_chars_per_line: int,
1142
+ max_chars_per_line: int,
1143
+ ) -> List[str]:
1144
+ """Process terms for a domain, applying word length constraints and combining words where appropriate.
1145
+
1146
+ Args:
1147
+ domain (str): The domain being processed.
1148
+ ids_to_replace (dict, optional): Dictionary mapping domain IDs to custom labels.
1149
+ words_to_omit (list, optional): List of words to omit from the labels.
1150
+ max_label_lines (int): Maximum number of lines in a label.
1151
+ min_chars_per_line (int): Minimum number of characters in a line to display.
1152
+ max_chars_per_line (int): Maximum number of characters in a line to display.
1153
+
1154
+ Returns:
1155
+ list: Processed terms, with words combined if necessary to fit within constraints.
1156
+ """
1157
+ # Handle ids_to_replace logic
1158
+ if ids_to_replace and domain in ids_to_replace:
1159
+ terms = ids_to_replace[domain].split(" ")
1160
+ else:
1161
+ terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
1162
+
1163
+ # Apply words_to_omit and word length constraints
1164
+ if words_to_omit:
1165
+ terms = [
1166
+ term
1167
+ for term in terms
1168
+ if term.lower() not in words_to_omit and len(term) >= min_chars_per_line
1169
+ ]
1170
+
1171
+ # Use the combine_words function directly to handle word combinations and length constraints
1172
+ compressed_terms = _combine_words(tuple(terms), max_chars_per_line, max_label_lines)
1173
+
1174
+ return compressed_terms
1175
+
1004
1176
  def get_annotated_node_colors(
1005
1177
  self,
1006
1178
  cmap: str = "gist_rainbow",
@@ -1254,7 +1426,9 @@ def _to_rgba(
1254
1426
  # Handle array of colors case (including strings, RGB, and RGBA)
1255
1427
  elif isinstance(color, (list, tuple, np.ndarray)):
1256
1428
  rgba_colors = []
1257
- for c in color:
1429
+ for i in range(num_repeats):
1430
+ # Reiterate over the colors if the number of repeats exceeds the number of colors
1431
+ c = color[i % len(color)]
1258
1432
  # Ensure each element is either a valid string or a list/tuple of length 3 (RGB) or 4 (RGBA)
1259
1433
  if isinstance(c, str) or (
1260
1434
  isinstance(c, (list, tuple, np.ndarray)) and len(c) in [3, 4]
@@ -1313,6 +1487,59 @@ def _calculate_bounding_box(
1313
1487
  return center, radius
1314
1488
 
1315
1489
 
1490
+ def _combine_words(words: List[str], max_length: int, max_label_lines: int) -> str:
1491
+ """Combine words to fit within the max_length and max_label_lines constraints,
1492
+ and separate the final output by ':' for plotting.
1493
+
1494
+ Args:
1495
+ words (List[str]): List of words to combine.
1496
+ max_length (int): Maximum allowed length for a combined line.
1497
+ max_label_lines (int): Maximum number of lines in a label.
1498
+
1499
+ Returns:
1500
+ str: String of combined words separated by ':' for line breaks.
1501
+ """
1502
+
1503
+ def try_combinations(words_batch: List[str]) -> List[str]:
1504
+ """Try to combine words within a batch and return them with combined words separated by ':'."""
1505
+ combined_lines = []
1506
+ i = 0
1507
+ while i < len(words_batch):
1508
+ current_word = words_batch[i]
1509
+ combined_word = current_word # Start with the current word
1510
+ # Try to combine more words if possible, and ensure the combination fits within max_length
1511
+ for j in range(i + 1, len(words_batch)):
1512
+ next_word = words_batch[j]
1513
+ if len(combined_word) + len(next_word) + 2 <= max_length: # +2 for ', '
1514
+ combined_word = f"{combined_word} {next_word}"
1515
+ i += 1 # Move past the combined word
1516
+ else:
1517
+ break # Stop combining if the length is exceeded
1518
+
1519
+ combined_lines.append(combined_word) # Add the combined word or single word
1520
+ i += 1 # Move to the next word
1521
+
1522
+ # Stop if we've reached the max_label_lines limit
1523
+ if len(combined_lines) >= max_label_lines:
1524
+ break
1525
+
1526
+ return combined_lines
1527
+
1528
+ # Main logic: start with max_label_lines number of words
1529
+ combined_lines = try_combinations(words[:max_label_lines])
1530
+ remaining_words = words[max_label_lines:] # Remaining words after the initial batch
1531
+
1532
+ # Continue pulling more words until we fill the lines
1533
+ while remaining_words and len(combined_lines) < max_label_lines:
1534
+ available_slots = max_label_lines - len(combined_lines)
1535
+ words_to_add = remaining_words[:available_slots]
1536
+ remaining_words = remaining_words[available_slots:]
1537
+ combined_lines += try_combinations(words_to_add)
1538
+
1539
+ # Join the final combined lines with TERM_DELIMITER, a special separator for line breaks
1540
+ return TERM_DELIMITER.join(combined_lines[:max_label_lines])
1541
+
1542
+
1316
1543
  def _calculate_best_label_positions(
1317
1544
  filtered_domain_centroids: Dict[str, Any], center: np.ndarray, radius: float, offset: float
1318
1545
  ) -> Dict[str, Any]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.8b1
3
+ Version: 0.0.8b3
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
709
709
  Requires-Dist: threadpoolctl
710
710
  Requires-Dist: tqdm
711
711
 
712
- <p align="center">
713
- <img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
714
- </p>
712
+ # RISK
715
713
 
716
714
  <p align="center">
717
- <a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
718
- <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
719
- <a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
715
+ <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
720
716
  </p>
721
717
 
722
- ## RISK
718
+ <br>
719
+
720
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
721
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
722
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
723
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
724
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
725
+ ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20macos%20%7C%20windows-lightgrey)
726
+
727
+ **RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
723
728
 
724
- #### RISK Infers Spatial Kinships
729
+ ## Documentation and Tutorial
730
+
731
+ - **Documentation**: Comprehensive documentation is available at [Documentation link].
732
+ - **Tutorial**: An interactive Jupyter notebook tutorial can be found at [Tutorial link].
733
+ We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
734
+
735
+ ## Installation
725
736
 
726
- RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
737
+ RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
738
+
739
+ ```bash
740
+ pip install risk-network
741
+ ```
727
742
 
728
743
  ## Features
729
744
 
730
- - Spatial analysis of biological networks
731
- - Functional enrichment detection
732
- - Optimized performance
745
+ - **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
746
+ - **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
747
+ - **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
748
+ - **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
749
+ - **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
750
+ - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
733
751
 
734
- ## Example
752
+ ## Example Usage
735
753
 
736
- *Saccharomyces cerevisiae* proteins oriented by physical interactions discovered through affinity enrichment and mass spectrometry (Michaelis et al., 2023).
754
+ We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
737
755
 
738
- ![PPI Network Demo](https://i.imgur.com/NnyK6nO.png)
756
+ ![RISK Main Figure](https://i.imgur.com/TUVfvfH.jpeg)
739
757
 
740
- ## Installation
758
+ RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
759
+
760
+ ## Citation
761
+
762
+ If you use RISK in your research, please cite the following:
763
+
764
+ **Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
765
+
766
+ ## Software Architecture and Implementation
741
767
 
742
- Coming soon...
768
+ RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
743
769
 
744
- ## Usage
770
+ ### Supported Data Formats
745
771
 
746
- Coming soon...
772
+ - **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
773
+ - **Visualization Outputs**: SVG, PNG, PDF.
774
+
775
+ ### Clustering Algorithms
776
+
777
+ - **Available Algorithms**:
778
+ - Greedy Modularity
779
+ - Label Propagation
780
+ - Louvain
781
+ - Markov Clustering
782
+ - Spinglass
783
+ - Walktrap
784
+ - **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
785
+
786
+ ### Statistical Tests
787
+
788
+ - **Hypergeometric Test**
789
+ - **Permutation Test** (single- or multi-process modes)
790
+ - **Poisson Test**
791
+
792
+ ## Performance and Efficiency
793
+
794
+ In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
795
+
796
+ ## Contributing
797
+
798
+ We welcome contributions from the community. Please use the following resources:
799
+
800
+ - [Issues Tracker](https://github.com/irahorecka/risk/issues)
801
+ - [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
802
+
803
+ ## Support
804
+
805
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
747
806
 
748
807
  ## License
749
808
 
750
- This project is licensed under the GPL-3.0 license.
809
+ RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
810
+
811
+ ---
812
+
813
+ **Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
@@ -1,4 +1,4 @@
1
- risk/__init__.py,sha256=UNSdF3ch5eG5kY1NsOrjzDZOS-HRak9ASoBVXBkvCAM,112
1
+ risk/__init__.py,sha256=qjjV3tZUr6CjlV98T9q2oJFgjLB5qxwKFQm6MkwQc2s,112
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
3
  risk/risk.py,sha256=FaQhDCBZxZSAXJsScH0rSbjjCTNZA5vgf9rJj1GHW44,20924
4
4
  risk/annotations/__init__.py,sha256=vUpVvMRE5if01Ic8QY6M2Ae3EFGJHdugEe9PdEkAW4Y,138
@@ -13,9 +13,9 @@ risk/neighborhoods/domains.py,sha256=Ov52EEr-tWqy96y8_0tJ9f1K8FI-8tZQxHR7a59A1k8
13
13
  risk/neighborhoods/neighborhoods.py,sha256=M-wL4xB_BUTlSZg90swygO5NdrZ6hFUFqs6jsiZaqHk,18260
14
14
  risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
15
15
  risk/network/geometry.py,sha256=H1yGVVqgbfpzBzJwEheDLfvGLSA284jGQQTn612L4Vc,6759
16
- risk/network/graph.py,sha256=_LEoom4EEowGALuJKSXcev9RAAHu2FqIeq3u7mkifW0,16479
16
+ risk/network/graph.py,sha256=EwD4-1THC5YNdP6PY01Oe35k2QYYqtZpxWraPVH6wa4,16426
17
17
  risk/network/io.py,sha256=kY7HqmL3wa1NnqHu61_G8IpT21qpBijpAZ4ixmsseJA,22911
18
- risk/network/plot.py,sha256=9GcLKkH3CMEtraYnfdLXNJCi04rBQCjw4T6Q8k5yNOI,67091
18
+ risk/network/plot.py,sha256=uDRQTza5scBJKFTlcayFgA7nzWfz-c075J_V7k8eyBI,78285
19
19
  risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
20
20
  risk/stats/hypergeom.py,sha256=o6Qnj31gCAKxr2uQirXrbv7XvdDJGEq69MFW-ubx_hA,2272
21
21
  risk/stats/poisson.py,sha256=8x9hB4DCukq4gNIlIKO-c_jYG1-BTwTX53oLauFyfj8,1793
@@ -23,8 +23,8 @@ risk/stats/stats.py,sha256=kvShov-94W6ffgDUTb522vB9hDJQSyTsYif_UIaFfSM,7059
23
23
  risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
24
24
  risk/stats/permutation/permutation.py,sha256=D84Rcpt6iTQniK0PfQGcw9bLcHbMt9p-ARcurUnIXZQ,10095
25
25
  risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
26
- risk_network-0.0.8b1.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
27
- risk_network-0.0.8b1.dist-info/METADATA,sha256=E0T1xFQXaQfe3oH0ZW7fAukDoB3QbvzswcLc0oczpqA,43142
28
- risk_network-0.0.8b1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
29
- risk_network-0.0.8b1.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
30
- risk_network-0.0.8b1.dist-info/RECORD,,
26
+ risk_network-0.0.8b3.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
27
+ risk_network-0.0.8b3.dist-info/METADATA,sha256=cUY2Uidk8Bqhj1sWs25aIACjI2QrMXhL42oZQdHSBMo,47450
28
+ risk_network-0.0.8b3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
29
+ risk_network-0.0.8b3.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
30
+ risk_network-0.0.8b3.dist-info/RECORD,,