risk-network 0.0.8b2__tar.gz → 0.0.8b4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/PKG-INFO +1 -1
  2. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/__init__.py +1 -1
  3. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/plot.py +252 -123
  4. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/PKG-INFO +1 -1
  5. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/LICENSE +0 -0
  6. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/MANIFEST.in +0 -0
  7. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/README.md +0 -0
  8. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/pyproject.toml +0 -0
  9. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/annotations/__init__.py +0 -0
  10. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/annotations/annotations.py +0 -0
  11. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/annotations/io.py +0 -0
  12. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/constants.py +0 -0
  13. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/log/__init__.py +0 -0
  14. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/log/config.py +0 -0
  15. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/log/params.py +0 -0
  16. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/__init__.py +0 -0
  17. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/community.py +0 -0
  18. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/domains.py +0 -0
  19. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/neighborhoods.py +0 -0
  20. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/__init__.py +0 -0
  21. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/geometry.py +0 -0
  22. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/graph.py +0 -0
  23. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/io.py +0 -0
  24. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/risk.py +0 -0
  25. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/__init__.py +0 -0
  26. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/hypergeom.py +0 -0
  27. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/permutation/__init__.py +0 -0
  28. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/permutation/permutation.py +0 -0
  29. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/permutation/test_functions.py +0 -0
  30. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/poisson.py +0 -0
  31. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/stats.py +0 -0
  32. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/SOURCES.txt +0 -0
  33. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/dependency_links.txt +0 -0
  34. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/requires.txt +0 -0
  35. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/top_level.txt +0 -0
  36. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/setup.cfg +0 -0
  37. {risk_network-0.0.8b2 → risk_network-0.0.8b4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.8b2
3
+ Version: 0.0.8b4
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.2"
10
+ __version__ = "0.0.8-beta.4"
@@ -3,7 +3,6 @@ risk/network/plot
3
3
  ~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from functools import lru_cache
7
6
  from typing import Any, Dict, List, Tuple, Union
8
7
 
9
8
  import matplotlib.colors as mcolors
@@ -18,6 +17,8 @@ from scipy.stats import gaussian_kde
18
17
  from risk.log import params, logger
19
18
  from risk.network.graph import NetworkGraph
20
19
 
20
+ TERM_DELIMITER = "::::" # String used to separate multiple domain terms when constructing composite domain labels
21
+
21
22
 
22
23
  class NetworkPlotter:
23
24
  """A class for visualizing network graphs with customizable options.
@@ -679,10 +680,10 @@ class NetworkPlotter:
679
680
  arrow_base_shrink: float = 0.0,
680
681
  arrow_tip_shrink: float = 0.0,
681
682
  max_labels: Union[int, None] = None,
682
- max_words: int = 10,
683
- min_words: int = 1,
684
- max_word_length: int = 20,
685
- min_word_length: int = 1,
683
+ max_label_lines: Union[int, None] = None,
684
+ min_label_lines: int = 1,
685
+ max_chars_per_line: Union[int, None] = None,
686
+ min_chars_per_line: int = 1,
686
687
  words_to_omit: Union[List, None] = None,
687
688
  overlay_ids: bool = False,
688
689
  ids_to_keep: Union[List, Tuple, np.ndarray, None] = None,
@@ -704,10 +705,10 @@ class NetworkPlotter:
704
705
  arrow_base_shrink (float, optional): Distance between the text and the base of the arrow. Defaults to 0.0.
705
706
  arrow_tip_shrink (float, optional): Distance between the arrow tip and the centroid. Defaults to 0.0.
706
707
  max_labels (int, optional): Maximum number of labels to plot. Defaults to None (no limit).
707
- max_words (int, optional): Maximum number of words in a label. Defaults to 10.
708
- min_words (int, optional): Minimum number of words required to display a label. Defaults to 1.
709
- max_word_length (int, optional): Maximum number of characters in a word to display. Defaults to 20.
710
- min_word_length (int, optional): Minimum number of characters in a word to display. Defaults to 1.
708
+ min_label_lines (int, optional): Minimum number of lines in a label. Defaults to 1.
709
+ max_label_lines (int, optional): Maximum number of lines in a label. Defaults to None (no limit).
710
+ min_chars_per_line (int, optional): Minimum number of characters in a line to display. Defaults to 1.
711
+ max_chars_per_line (int, optional): Maximum number of characters in a line to display. Defaults to None (no limit).
711
712
  words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
712
713
  overlay_ids (bool, optional): Whether to overlay domain IDs in the center of the centroids. Defaults to False.
713
714
  ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled. To discover domain IDs,
@@ -736,70 +737,77 @@ class NetworkPlotter:
736
737
  label_arrow_base_shrink=arrow_base_shrink,
737
738
  label_arrow_tip_shrink=arrow_tip_shrink,
738
739
  label_max_labels=max_labels,
739
- label_max_words=max_words,
740
- label_min_words=min_words,
741
- label_max_word_length=max_word_length,
742
- label_min_word_length=min_word_length,
740
+ label_min_label_lines=min_label_lines,
741
+ label_max_label_lines=max_label_lines,
742
+ label_max_chars_per_line=max_chars_per_line,
743
+ label_min_chars_per_line=min_chars_per_line,
743
744
  label_words_to_omit=words_to_omit,
744
745
  label_overlay_ids=overlay_ids,
745
746
  label_ids_to_keep=ids_to_keep,
746
747
  label_ids_to_replace=ids_to_replace,
747
748
  )
748
749
 
750
+ # Convert ids_to_keep to a tuple if it is not None
751
+ ids_to_keep = tuple(ids_to_keep) if ids_to_keep else tuple()
749
752
  # Set max_labels to the total number of domains if not provided (None)
750
753
  if max_labels is None:
751
754
  max_labels = len(self.graph.domain_id_to_node_ids_map)
755
+ # Set max_label_lines and max_chars_per_line to large numbers if not provided (None)
756
+ if max_label_lines is None:
757
+ max_label_lines = int(1e6)
758
+ if max_chars_per_line is None:
759
+ max_chars_per_line = int(1e6)
752
760
  # Normalize words_to_omit to lowercase
753
761
  if words_to_omit:
754
762
  words_to_omit = set(word.lower() for word in words_to_omit)
755
763
 
756
- # Calculate the center and radius of the network
757
- domain_centroids = {}
764
+ # Calculate the center and radius of domains to position labels around the network
765
+ domain_id_to_centroid_map = {}
758
766
  for domain_id, node_ids in self.graph.domain_id_to_node_ids_map.items():
759
767
  if node_ids: # Skip if the domain has no nodes
760
- domain_centroids[domain_id] = self._calculate_domain_centroid(node_ids)
768
+ domain_id_to_centroid_map[domain_id] = self._calculate_domain_centroid(node_ids)
761
769
 
762
770
  # Initialize dictionaries and lists for valid indices
763
- valid_indices = []
764
- filtered_domain_centroids = {}
765
- filtered_domain_terms = {}
771
+ valid_indices = [] # List of valid indices to plot colors and arrows
772
+ filtered_domain_centroids = {} # Filtered domain centroids to plot
773
+ filtered_domain_terms = {} # Filtered domain terms to plot
766
774
  # Handle the ids_to_keep logic
767
775
  if ids_to_keep:
768
776
  # Process the ids_to_keep first INPLACE
769
777
  self._process_ids_to_keep(
770
- ids_to_keep,
771
- max_labels,
772
- domain_centroids,
773
- ids_to_replace,
774
- words_to_omit,
775
- min_word_length,
776
- max_word_length,
777
- max_words,
778
- min_words,
779
- filtered_domain_centroids,
780
- filtered_domain_terms,
781
- valid_indices,
778
+ domain_id_to_centroid_map=domain_id_to_centroid_map,
779
+ ids_to_keep=ids_to_keep,
780
+ ids_to_replace=ids_to_replace,
781
+ words_to_omit=words_to_omit,
782
+ max_labels=max_labels,
783
+ min_label_lines=min_label_lines,
784
+ max_label_lines=max_label_lines,
785
+ min_chars_per_line=min_chars_per_line,
786
+ max_chars_per_line=max_chars_per_line,
787
+ filtered_domain_centroids=filtered_domain_centroids,
788
+ filtered_domain_terms=filtered_domain_terms,
789
+ valid_indices=valid_indices,
782
790
  )
783
791
 
784
792
  # Calculate remaining labels to plot after processing ids_to_keep
785
793
  remaining_labels = (
786
- max_labels - len(ids_to_keep) if ids_to_keep and max_labels else max_labels
794
+ max_labels - len(valid_indices) if valid_indices and max_labels else max_labels
787
795
  )
788
796
  # Process remaining domains INPLACE to fill in additional labels, if there are slots left
789
797
  if remaining_labels and remaining_labels > 0:
790
798
  self._process_remaining_domains(
791
- domain_centroids,
792
- ids_to_keep,
793
- ids_to_replace,
794
- words_to_omit,
795
- min_word_length,
796
- max_word_length,
797
- max_words,
798
- min_words,
799
- max_labels,
800
- filtered_domain_centroids,
801
- filtered_domain_terms,
802
- valid_indices,
799
+ domain_id_to_centroid_map=domain_id_to_centroid_map,
800
+ ids_to_keep=ids_to_keep,
801
+ ids_to_replace=ids_to_replace,
802
+ words_to_omit=words_to_omit,
803
+ remaining_labels=remaining_labels,
804
+ min_chars_per_line=min_chars_per_line,
805
+ max_chars_per_line=max_chars_per_line,
806
+ max_label_lines=max_label_lines,
807
+ min_label_lines=min_label_lines,
808
+ filtered_domain_centroids=filtered_domain_centroids,
809
+ filtered_domain_terms=filtered_domain_terms,
810
+ valid_indices=valid_indices,
803
811
  )
804
812
 
805
813
  # Calculate the bounding box around the network
@@ -819,8 +827,8 @@ class NetworkPlotter:
819
827
  # Annotate the network with labels
820
828
  for idx, (domain, pos) in zip(valid_indices, best_label_positions.items()):
821
829
  centroid = filtered_domain_centroids[domain]
822
- # Split by special key to split annotation into multiple lines
823
- annotations = filtered_domain_terms[domain].split("::::")
830
+ # Split by special key TERM_DELIMITER to split annotation into multiple lines
831
+ annotations = filtered_domain_terms[domain].split(TERM_DELIMITER)
824
832
  self.ax.annotate(
825
833
  "\n".join(annotations),
826
834
  xy=centroid,
@@ -969,15 +977,15 @@ class NetworkPlotter:
969
977
 
970
978
  def _process_ids_to_keep(
971
979
  self,
972
- ids_to_keep: Union[List[str], Tuple[str], np.ndarray, None],
973
- max_labels: Union[int, None],
974
- domain_centroids: Dict[str, np.ndarray],
980
+ domain_id_to_centroid_map: Dict[str, np.ndarray],
981
+ ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
975
982
  ids_to_replace: Union[Dict[str, str], None],
976
983
  words_to_omit: Union[List[str], None],
977
- min_word_length: int,
978
- max_word_length: int,
979
- max_words: int,
980
- min_words: int,
984
+ max_labels: Union[int, None],
985
+ min_label_lines: int,
986
+ max_label_lines: int,
987
+ min_chars_per_line: int,
988
+ max_chars_per_line: int,
981
989
  filtered_domain_centroids: Dict[str, np.ndarray],
982
990
  filtered_domain_terms: Dict[str, str],
983
991
  valid_indices: List[int],
@@ -985,15 +993,15 @@ class NetworkPlotter:
985
993
  """Process the ids_to_keep, apply filtering, and store valid domain centroids and terms.
986
994
 
987
995
  Args:
988
- ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled.
989
- max_labels (int, optional): Maximum number of labels allowed.
990
- domain_centroids (dict): Mapping of domains to their centroids.
996
+ domain_id_to_centroid_map (dict): Mapping of domain IDs to their centroids.
997
+ ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
991
998
  ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
992
999
  words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
993
- min_word_length (int): Minimum allowed word length.
994
- max_word_length (int): Maximum allowed word length.
995
- max_words (int): Maximum number of words allowed.
996
- min_words (int): Minimum number of words required for a domain.
1000
+ max_labels (int, optional): Maximum number of labels allowed.
1001
+ min_label_lines (int): Minimum number of lines in a label.
1002
+ max_label_lines (int): Maximum number of lines in a label.
1003
+ min_chars_per_line (int): Minimum number of characters in a line to display.
1004
+ max_chars_per_line (int): Maximum number of characters in a line to display.
997
1005
  filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
998
1006
  filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
999
1007
  valid_indices (list): List to store valid indices (output).
@@ -1004,8 +1012,6 @@ class NetworkPlotter:
1004
1012
  Raises:
1005
1013
  ValueError: If the number of provided `ids_to_keep` exceeds `max_labels`.
1006
1014
  """
1007
- # Convert ids_to_keep to a set for faster, unique lookups
1008
- ids_to_keep = set(ids_to_keep) if ids_to_keep else set()
1009
1015
  # Check if the number of provided ids_to_keep exceeds max_labels
1010
1016
  if max_labels is not None and len(ids_to_keep) > max_labels:
1011
1017
  raise ValueError(
@@ -1014,45 +1020,54 @@ class NetworkPlotter:
1014
1020
 
1015
1021
  # Process each domain in ids_to_keep
1016
1022
  for domain in ids_to_keep:
1017
- if domain in self.graph.domain_id_to_domain_terms_map and domain in domain_centroids:
1018
- filtered_domain_terms[domain] = self._process_terms(
1023
+ if (
1024
+ domain in self.graph.domain_id_to_domain_terms_map
1025
+ and domain in domain_id_to_centroid_map
1026
+ ):
1027
+ domain_centroid = domain_id_to_centroid_map[domain]
1028
+ # No need to filter the domain terms if it is in ids_to_keep
1029
+ _ = self._validate_and_update_domain(
1019
1030
  domain=domain,
1031
+ domain_centroid=domain_centroid,
1032
+ domain_id_to_centroid_map=domain_id_to_centroid_map,
1020
1033
  ids_to_replace=ids_to_replace,
1021
1034
  words_to_omit=words_to_omit,
1022
- min_word_length=min_word_length,
1023
- max_word_length=max_word_length,
1024
- max_words=max_words,
1035
+ min_label_lines=min_label_lines,
1036
+ max_label_lines=max_label_lines,
1037
+ min_chars_per_line=min_chars_per_line,
1038
+ max_chars_per_line=max_chars_per_line,
1039
+ filtered_domain_centroids=filtered_domain_centroids,
1040
+ filtered_domain_terms=filtered_domain_terms,
1041
+ valid_indices=valid_indices,
1025
1042
  )
1026
- filtered_domain_centroids[domain] = domain_centroids[domain]
1027
- valid_indices.append(list(domain_centroids.keys()).index(domain))
1028
1043
 
1029
1044
  def _process_remaining_domains(
1030
1045
  self,
1031
- domain_centroids: Dict[str, np.ndarray],
1032
- ids_to_keep: Union[List[str], Tuple[str], np.ndarray, None],
1046
+ domain_id_to_centroid_map: Dict[str, np.ndarray],
1047
+ ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
1033
1048
  ids_to_replace: Union[Dict[str, str], None],
1034
1049
  words_to_omit: Union[List[str], None],
1035
- min_word_length: int,
1036
- max_word_length: int,
1037
- max_words: int,
1038
- min_words: int,
1039
- max_labels: Union[int, None],
1050
+ remaining_labels: int,
1051
+ min_label_lines: int,
1052
+ max_label_lines: int,
1053
+ min_chars_per_line: int,
1054
+ max_chars_per_line: int,
1040
1055
  filtered_domain_centroids: Dict[str, np.ndarray],
1041
1056
  filtered_domain_terms: Dict[str, str],
1042
1057
  valid_indices: List[int],
1043
1058
  ) -> None:
1044
- """Process remaining domains to fill in additional labels, if there are slots left.
1059
+ """Process remaining domains to fill in additional labels, respecting the remaining_labels limit.
1045
1060
 
1046
1061
  Args:
1047
- domain_centroids (dict): Mapping of domains to their centroids.
1048
- ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled. Defaults to None.
1062
+ domain_id_to_centroid_map (dict): Mapping of domain IDs to their centroids.
1063
+ ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
1049
1064
  ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
1050
1065
  words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
1051
- min_word_length (int): Minimum allowed word length.
1052
- max_word_length (int): Maximum allowed word length.
1053
- max_words (int): Maximum number of words allowed.
1054
- min_words (int): Minimum number of words required for a domain.
1055
- max_labels (int, optional): Maximum number of labels allowed. Defaults to None.
1066
+ remaining_labels (int): The remaining number of labels that can be generated.
1067
+ min_label_lines (int): Minimum number of lines in a label.
1068
+ max_label_lines (int): Maximum number of lines in a label.
1069
+ min_chars_per_line (int): Minimum number of characters in a line to display.
1070
+ max_chars_per_line (int): Maximum number of characters in a line to display.
1056
1071
  filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
1057
1072
  filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
1058
1073
  valid_indices (list): List to store valid indices (output).
@@ -1060,32 +1075,142 @@ class NetworkPlotter:
1060
1075
  Note:
1061
1076
  The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
1062
1077
  """
1063
- for idx, (domain, centroid) in enumerate(domain_centroids.items()):
1064
- # Check if the domain is NaN and continue if true
1065
- if pd.isna(domain) or (isinstance(domain, float) and np.isnan(domain)):
1066
- continue # Skip NaN domains
1067
- if ids_to_keep and domain in ids_to_keep:
1068
- continue # Skip domains already handled by ids_to_keep
1069
-
1070
- filtered_domain_terms[domain] = self._process_terms(
1078
+ # Counter to track how many labels have been created
1079
+ label_count = 0
1080
+ # Collect domains not in ids_to_keep
1081
+ remaining_domains = {
1082
+ domain: centroid
1083
+ for domain, centroid in domain_id_to_centroid_map.items()
1084
+ if domain not in ids_to_keep and not pd.isna(domain)
1085
+ }
1086
+
1087
+ # Function to calculate distance between two centroids
1088
+ def calculate_distance(centroid1, centroid2):
1089
+ return np.linalg.norm(centroid1 - centroid2)
1090
+
1091
+ # Find the farthest apart domains using centroids
1092
+ if remaining_domains and remaining_labels:
1093
+ selected_domains = []
1094
+ first_domain = next(iter(remaining_domains)) # Pick the first domain to start
1095
+ selected_domains.append(first_domain)
1096
+
1097
+ while len(selected_domains) < remaining_labels:
1098
+ farthest_domain = None
1099
+ max_distance = -1
1100
+ # Find the domain farthest from any already selected domain
1101
+ for candidate_domain, candidate_centroid in remaining_domains.items():
1102
+ if candidate_domain in selected_domains:
1103
+ continue
1104
+
1105
+ # Calculate the minimum distance to any selected domain
1106
+ min_distance = min(
1107
+ calculate_distance(candidate_centroid, remaining_domains[dom])
1108
+ for dom in selected_domains
1109
+ )
1110
+ # Update the farthest domain if the minimum distance is greater
1111
+ if min_distance > max_distance:
1112
+ max_distance = min_distance
1113
+ farthest_domain = candidate_domain
1114
+
1115
+ # Add the farthest domain to the selected domains
1116
+ if farthest_domain:
1117
+ selected_domains.append(farthest_domain)
1118
+ else:
1119
+ break # No more domains to select
1120
+
1121
+ # Process the selected domains and add to filtered lists
1122
+ for domain in selected_domains:
1123
+ domain_centroid = remaining_domains[domain]
1124
+ is_domain_valid = self._validate_and_update_domain(
1071
1125
  domain=domain,
1126
+ domain_centroid=domain_centroid,
1127
+ domain_id_to_centroid_map=domain_id_to_centroid_map,
1072
1128
  ids_to_replace=ids_to_replace,
1073
1129
  words_to_omit=words_to_omit,
1074
- min_word_length=min_word_length,
1075
- max_word_length=max_word_length,
1076
- max_words=max_words,
1130
+ min_label_lines=min_label_lines,
1131
+ max_label_lines=max_label_lines,
1132
+ min_chars_per_line=min_chars_per_line,
1133
+ max_chars_per_line=max_chars_per_line,
1134
+ filtered_domain_centroids=filtered_domain_centroids,
1135
+ filtered_domain_terms=filtered_domain_terms,
1136
+ valid_indices=valid_indices,
1077
1137
  )
1078
- filtered_domain_centroids[domain] = centroid
1079
- valid_indices.append(idx)
1138
+ # Increment the label count if the domain is valid
1139
+ if is_domain_valid:
1140
+ label_count += 1
1141
+ if label_count >= remaining_labels:
1142
+ break
1143
+
1144
+ def _validate_and_update_domain(
1145
+ self,
1146
+ domain: str,
1147
+ domain_centroid: np.ndarray,
1148
+ domain_id_to_centroid_map: Dict[str, np.ndarray],
1149
+ ids_to_replace: Union[Dict[str, str], None],
1150
+ words_to_omit: Union[List[str], None],
1151
+ min_label_lines: int,
1152
+ max_label_lines: int,
1153
+ min_chars_per_line: int,
1154
+ max_chars_per_line: int,
1155
+ filtered_domain_centroids: Dict[str, np.ndarray],
1156
+ filtered_domain_terms: Dict[str, str],
1157
+ valid_indices: List[int],
1158
+ ) -> bool:
1159
+ """Validate and process the domain terms, updating relevant dictionaries if valid.
1160
+
1161
+ Args:
1162
+ domain (str): Domain ID to process.
1163
+ domain_centroid (np.ndarray): Centroid position of the domain.
1164
+ domain_id_to_centroid_map (dict): Mapping of domain IDs to their centroids.
1165
+ ids_to_replace (Union[Dict[str, str], None]): A dictionary mapping domain IDs to custom labels.
1166
+ words_to_omit (Union[List[str], None]): List of words to omit from the labels.
1167
+ min_label_lines (int): Minimum number of lines required in a label.
1168
+ max_label_lines (int): Maximum number of lines allowed in a label.
1169
+ min_chars_per_line (int): Minimum number of characters allowed per line.
1170
+ max_chars_per_line (int): Maximum number of characters allowed per line.
1171
+ filtered_domain_centroids (Dict[str, np.ndarray]): Dictionary to store valid domain centroids.
1172
+ filtered_domain_terms (Dict[str, str]): Dictionary to store valid domain terms.
1173
+ valid_indices (List[int]): List of valid domain indices.
1174
+
1175
+ Returns:
1176
+ bool: True if the domain is valid and added to the filtered dictionaries, False otherwise.
1177
+
1178
+ Note:
1179
+ The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
1180
+ """
1181
+ # Process the domain terms
1182
+ domain_terms = self._process_terms(
1183
+ domain=domain,
1184
+ ids_to_replace=ids_to_replace,
1185
+ words_to_omit=words_to_omit,
1186
+ max_label_lines=max_label_lines,
1187
+ min_chars_per_line=min_chars_per_line,
1188
+ max_chars_per_line=max_chars_per_line,
1189
+ )
1190
+ # If domain_terms is empty, skip further processing
1191
+ if not domain_terms:
1192
+ return False
1193
+
1194
+ # Split the terms by TERM_DELIMITER and count the number of lines
1195
+ num_domain_lines = len(domain_terms.split(TERM_DELIMITER))
1196
+ # Check if the number of lines is greater than or equal to the minimum
1197
+ if num_domain_lines >= min_label_lines:
1198
+ filtered_domain_centroids[domain] = domain_centroid
1199
+ filtered_domain_terms[domain] = domain_terms
1200
+ # Add the index of the domain to the valid indices list
1201
+ valid_indices.append(list(domain_id_to_centroid_map.keys()).index(domain))
1202
+ return True
1203
+
1204
+ return False
1080
1205
 
1081
1206
  def _process_terms(
1082
1207
  self,
1083
1208
  domain: str,
1084
1209
  ids_to_replace: Union[Dict[str, str], None],
1085
1210
  words_to_omit: Union[List[str], None],
1086
- min_word_length: int,
1087
- max_word_length: int,
1088
- max_words: int,
1211
+ max_label_lines: int,
1212
+ min_chars_per_line: int,
1213
+ max_chars_per_line: int,
1089
1214
  ) -> List[str]:
1090
1215
  """Process terms for a domain, applying word length constraints and combining words where appropriate.
1091
1216
 
@@ -1093,12 +1218,12 @@ class NetworkPlotter:
1093
1218
  domain (str): The domain being processed.
1094
1219
  ids_to_replace (dict, optional): Dictionary mapping domain IDs to custom labels.
1095
1220
  words_to_omit (list, optional): List of words to omit from the labels.
1096
- min_word_length (int): Minimum allowed word length.
1097
- max_word_length (int): Maximum allowed word length.
1098
- max_words (int): Maximum number of words allowed.
1221
+ max_label_lines (int): Maximum number of lines in a label.
1222
+ min_chars_per_line (int): Minimum number of characters in a line to display.
1223
+ max_chars_per_line (int): Maximum number of characters in a line to display.
1099
1224
 
1100
1225
  Returns:
1101
- list: Processed terms, with words combined if necessary to fit within constraints.
1226
+ str: Processed terms separated by TERM_DELIMITER, with words combined if necessary to fit within constraints.
1102
1227
  """
1103
1228
  # Handle ids_to_replace logic
1104
1229
  if ids_to_replace and domain in ids_to_replace:
@@ -1111,11 +1236,11 @@ class NetworkPlotter:
1111
1236
  terms = [
1112
1237
  term
1113
1238
  for term in terms
1114
- if term.lower() not in words_to_omit and len(term) >= min_word_length
1239
+ if term.lower() not in words_to_omit and len(term) >= min_chars_per_line
1115
1240
  ]
1116
1241
 
1117
1242
  # Use the combine_words function directly to handle word combinations and length constraints
1118
- compressed_terms = _combine_words(tuple(terms), max_word_length, max_words)
1243
+ compressed_terms = _combine_words(tuple(terms), max_chars_per_line, max_label_lines)
1119
1244
 
1120
1245
  return compressed_terms
1121
1246
 
@@ -1433,14 +1558,14 @@ def _calculate_bounding_box(
1433
1558
  return center, radius
1434
1559
 
1435
1560
 
1436
- def _combine_words(words: List[str], max_length: int, max_words: int) -> str:
1437
- """Combine words to fit within the max_length and max_words constraints,
1438
- and separate the final output by ':' for plotting.
1561
+ def _combine_words(words: List[str], max_chars_per_line: int, max_label_lines: int) -> str:
1562
+ """Combine words to fit within the max_chars_per_line and max_label_lines constraints,
1563
+ and separate the final output by TERM_DELIMITER for plotting.
1439
1564
 
1440
1565
  Args:
1441
1566
  words (List[str]): List of words to combine.
1442
- max_length (int): Maximum allowed length for a combined line.
1443
- max_words (int): Maximum number of lines (words) allowed.
1567
+ max_chars_per_line (int): Maximum number of characters in a line to display.
1568
+ max_label_lines (int): Maximum number of lines in a label.
1444
1569
 
1445
1570
  Returns:
1446
1571
  str: String of combined words separated by ':' for line breaks.
@@ -1456,34 +1581,38 @@ def _combine_words(words: List[str], max_length: int, max_words: int) -> str:
1456
1581
  # Try to combine more words if possible, and ensure the combination fits within max_length
1457
1582
  for j in range(i + 1, len(words_batch)):
1458
1583
  next_word = words_batch[j]
1459
- if len(combined_word) + len(next_word) + 2 <= max_length: # +2 for ', '
1584
+ # Ensure that the combined word fits within the max_chars_per_line limit
1585
+ if len(combined_word) + len(next_word) + 1 <= max_chars_per_line: # +1 for space
1460
1586
  combined_word = f"{combined_word} {next_word}"
1461
1587
  i += 1 # Move past the combined word
1462
1588
  else:
1463
1589
  break # Stop combining if the length is exceeded
1464
1590
 
1465
- combined_lines.append(combined_word) # Add the combined word or single word
1466
- i += 1 # Move to the next word
1591
+ # Add the combined word only if it fits within the max_chars_per_line limit
1592
+ if len(combined_word) <= max_chars_per_line:
1593
+ combined_lines.append(combined_word) # Add the combined word
1594
+ # Move to the next word
1595
+ i += 1
1467
1596
 
1468
- # Stop if we've reached the max_words limit
1469
- if len(combined_lines) >= max_words:
1597
+ # Stop if we've reached the max_label_lines limit
1598
+ if len(combined_lines) >= max_label_lines:
1470
1599
  break
1471
1600
 
1472
1601
  return combined_lines
1473
1602
 
1474
- # Main logic: start with max_words number of words
1475
- combined_lines = try_combinations(words[:max_words])
1476
- remaining_words = words[max_words:] # Remaining words after the initial batch
1603
+ # Main logic: start with max_label_lines number of words
1604
+ combined_lines = try_combinations(words[:max_label_lines])
1605
+ remaining_words = words[max_label_lines:] # Remaining words after the initial batch
1477
1606
 
1478
1607
  # Continue pulling more words until we fill the lines
1479
- while remaining_words and len(combined_lines) < max_words:
1480
- available_slots = max_words - len(combined_lines)
1608
+ while remaining_words and len(combined_lines) < max_label_lines:
1609
+ available_slots = max_label_lines - len(combined_lines)
1481
1610
  words_to_add = remaining_words[:available_slots]
1482
1611
  remaining_words = remaining_words[available_slots:]
1483
1612
  combined_lines += try_combinations(words_to_add)
1484
1613
 
1485
- # Join the final combined lines with '::::', a special separator for line breaks
1486
- return "::::".join(combined_lines[:max_words])
1614
+ # Join the final combined lines with TERM_DELIMITER, a special separator for line breaks
1615
+ return TERM_DELIMITER.join(combined_lines[:max_label_lines])
1487
1616
 
1488
1617
 
1489
1618
  def _calculate_best_label_positions(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.8b2
3
+ Version: 0.0.8b4
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
File without changes
File without changes
File without changes
File without changes