risk-network 0.0.8b2__tar.gz → 0.0.8b4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/PKG-INFO +1 -1
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/__init__.py +1 -1
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/plot.py +252 -123
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/PKG-INFO +1 -1
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/LICENSE +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/MANIFEST.in +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/README.md +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/pyproject.toml +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/annotations/__init__.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/annotations/annotations.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/annotations/io.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/constants.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/log/__init__.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/log/config.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/log/params.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/__init__.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/community.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/domains.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/neighborhoods/neighborhoods.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/__init__.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/geometry.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/graph.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/network/io.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/risk.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/__init__.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/hypergeom.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/permutation/__init__.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/permutation/permutation.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/permutation/test_functions.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/poisson.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk/stats/stats.py +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/SOURCES.txt +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/dependency_links.txt +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/requires.txt +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/risk_network.egg-info/top_level.txt +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/setup.cfg +0 -0
- {risk_network-0.0.8b2 → risk_network-0.0.8b4}/setup.py +0 -0
@@ -3,7 +3,6 @@ risk/network/plot
|
|
3
3
|
~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from functools import lru_cache
|
7
6
|
from typing import Any, Dict, List, Tuple, Union
|
8
7
|
|
9
8
|
import matplotlib.colors as mcolors
|
@@ -18,6 +17,8 @@ from scipy.stats import gaussian_kde
|
|
18
17
|
from risk.log import params, logger
|
19
18
|
from risk.network.graph import NetworkGraph
|
20
19
|
|
20
|
+
TERM_DELIMITER = "::::" # String used to separate multiple domain terms when constructing composite domain labels
|
21
|
+
|
21
22
|
|
22
23
|
class NetworkPlotter:
|
23
24
|
"""A class for visualizing network graphs with customizable options.
|
@@ -679,10 +680,10 @@ class NetworkPlotter:
|
|
679
680
|
arrow_base_shrink: float = 0.0,
|
680
681
|
arrow_tip_shrink: float = 0.0,
|
681
682
|
max_labels: Union[int, None] = None,
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
683
|
+
max_label_lines: Union[int, None] = None,
|
684
|
+
min_label_lines: int = 1,
|
685
|
+
max_chars_per_line: Union[int, None] = None,
|
686
|
+
min_chars_per_line: int = 1,
|
686
687
|
words_to_omit: Union[List, None] = None,
|
687
688
|
overlay_ids: bool = False,
|
688
689
|
ids_to_keep: Union[List, Tuple, np.ndarray, None] = None,
|
@@ -704,10 +705,10 @@ class NetworkPlotter:
|
|
704
705
|
arrow_base_shrink (float, optional): Distance between the text and the base of the arrow. Defaults to 0.0.
|
705
706
|
arrow_tip_shrink (float, optional): Distance between the arrow tip and the centroid. Defaults to 0.0.
|
706
707
|
max_labels (int, optional): Maximum number of labels to plot. Defaults to None (no limit).
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
708
|
+
min_label_lines (int, optional): Minimum number of lines in a label. Defaults to 1.
|
709
|
+
max_label_lines (int, optional): Maximum number of lines in a label. Defaults to None (no limit).
|
710
|
+
min_chars_per_line (int, optional): Minimum number of characters in a line to display. Defaults to 1.
|
711
|
+
max_chars_per_line (int, optional): Maximum number of characters in a line to display. Defaults to None (no limit).
|
711
712
|
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
712
713
|
overlay_ids (bool, optional): Whether to overlay domain IDs in the center of the centroids. Defaults to False.
|
713
714
|
ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled. To discover domain IDs,
|
@@ -736,70 +737,77 @@ class NetworkPlotter:
|
|
736
737
|
label_arrow_base_shrink=arrow_base_shrink,
|
737
738
|
label_arrow_tip_shrink=arrow_tip_shrink,
|
738
739
|
label_max_labels=max_labels,
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
740
|
+
label_min_label_lines=min_label_lines,
|
741
|
+
label_max_label_lines=max_label_lines,
|
742
|
+
label_max_chars_per_line=max_chars_per_line,
|
743
|
+
label_min_chars_per_line=min_chars_per_line,
|
743
744
|
label_words_to_omit=words_to_omit,
|
744
745
|
label_overlay_ids=overlay_ids,
|
745
746
|
label_ids_to_keep=ids_to_keep,
|
746
747
|
label_ids_to_replace=ids_to_replace,
|
747
748
|
)
|
748
749
|
|
750
|
+
# Convert ids_to_keep to a tuple if it is not None
|
751
|
+
ids_to_keep = tuple(ids_to_keep) if ids_to_keep else tuple()
|
749
752
|
# Set max_labels to the total number of domains if not provided (None)
|
750
753
|
if max_labels is None:
|
751
754
|
max_labels = len(self.graph.domain_id_to_node_ids_map)
|
755
|
+
# Set max_label_lines and max_chars_per_line to large numbers if not provided (None)
|
756
|
+
if max_label_lines is None:
|
757
|
+
max_label_lines = int(1e6)
|
758
|
+
if max_chars_per_line is None:
|
759
|
+
max_chars_per_line = int(1e6)
|
752
760
|
# Normalize words_to_omit to lowercase
|
753
761
|
if words_to_omit:
|
754
762
|
words_to_omit = set(word.lower() for word in words_to_omit)
|
755
763
|
|
756
|
-
# Calculate the center and radius of the network
|
757
|
-
|
764
|
+
# Calculate the center and radius of domains to position labels around the network
|
765
|
+
domain_id_to_centroid_map = {}
|
758
766
|
for domain_id, node_ids in self.graph.domain_id_to_node_ids_map.items():
|
759
767
|
if node_ids: # Skip if the domain has no nodes
|
760
|
-
|
768
|
+
domain_id_to_centroid_map[domain_id] = self._calculate_domain_centroid(node_ids)
|
761
769
|
|
762
770
|
# Initialize dictionaries and lists for valid indices
|
763
|
-
valid_indices = []
|
764
|
-
filtered_domain_centroids = {}
|
765
|
-
filtered_domain_terms = {}
|
771
|
+
valid_indices = [] # List of valid indices to plot colors and arrows
|
772
|
+
filtered_domain_centroids = {} # Filtered domain centroids to plot
|
773
|
+
filtered_domain_terms = {} # Filtered domain terms to plot
|
766
774
|
# Handle the ids_to_keep logic
|
767
775
|
if ids_to_keep:
|
768
776
|
# Process the ids_to_keep first INPLACE
|
769
777
|
self._process_ids_to_keep(
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
filtered_domain_centroids,
|
780
|
-
filtered_domain_terms,
|
781
|
-
valid_indices,
|
778
|
+
domain_id_to_centroid_map=domain_id_to_centroid_map,
|
779
|
+
ids_to_keep=ids_to_keep,
|
780
|
+
ids_to_replace=ids_to_replace,
|
781
|
+
words_to_omit=words_to_omit,
|
782
|
+
max_labels=max_labels,
|
783
|
+
min_label_lines=min_label_lines,
|
784
|
+
max_label_lines=max_label_lines,
|
785
|
+
min_chars_per_line=min_chars_per_line,
|
786
|
+
max_chars_per_line=max_chars_per_line,
|
787
|
+
filtered_domain_centroids=filtered_domain_centroids,
|
788
|
+
filtered_domain_terms=filtered_domain_terms,
|
789
|
+
valid_indices=valid_indices,
|
782
790
|
)
|
783
791
|
|
784
792
|
# Calculate remaining labels to plot after processing ids_to_keep
|
785
793
|
remaining_labels = (
|
786
|
-
max_labels - len(
|
794
|
+
max_labels - len(valid_indices) if valid_indices and max_labels else max_labels
|
787
795
|
)
|
788
796
|
# Process remaining domains INPLACE to fill in additional labels, if there are slots left
|
789
797
|
if remaining_labels and remaining_labels > 0:
|
790
798
|
self._process_remaining_domains(
|
791
|
-
|
792
|
-
ids_to_keep,
|
793
|
-
ids_to_replace,
|
794
|
-
words_to_omit,
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
filtered_domain_centroids,
|
801
|
-
filtered_domain_terms,
|
802
|
-
valid_indices,
|
799
|
+
domain_id_to_centroid_map=domain_id_to_centroid_map,
|
800
|
+
ids_to_keep=ids_to_keep,
|
801
|
+
ids_to_replace=ids_to_replace,
|
802
|
+
words_to_omit=words_to_omit,
|
803
|
+
remaining_labels=remaining_labels,
|
804
|
+
min_chars_per_line=min_chars_per_line,
|
805
|
+
max_chars_per_line=max_chars_per_line,
|
806
|
+
max_label_lines=max_label_lines,
|
807
|
+
min_label_lines=min_label_lines,
|
808
|
+
filtered_domain_centroids=filtered_domain_centroids,
|
809
|
+
filtered_domain_terms=filtered_domain_terms,
|
810
|
+
valid_indices=valid_indices,
|
803
811
|
)
|
804
812
|
|
805
813
|
# Calculate the bounding box around the network
|
@@ -819,8 +827,8 @@ class NetworkPlotter:
|
|
819
827
|
# Annotate the network with labels
|
820
828
|
for idx, (domain, pos) in zip(valid_indices, best_label_positions.items()):
|
821
829
|
centroid = filtered_domain_centroids[domain]
|
822
|
-
# Split by special key to split annotation into multiple lines
|
823
|
-
annotations = filtered_domain_terms[domain].split(
|
830
|
+
# Split by special key TERM_DELIMITER to split annotation into multiple lines
|
831
|
+
annotations = filtered_domain_terms[domain].split(TERM_DELIMITER)
|
824
832
|
self.ax.annotate(
|
825
833
|
"\n".join(annotations),
|
826
834
|
xy=centroid,
|
@@ -969,15 +977,15 @@ class NetworkPlotter:
|
|
969
977
|
|
970
978
|
def _process_ids_to_keep(
|
971
979
|
self,
|
972
|
-
|
973
|
-
|
974
|
-
domain_centroids: Dict[str, np.ndarray],
|
980
|
+
domain_id_to_centroid_map: Dict[str, np.ndarray],
|
981
|
+
ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
|
975
982
|
ids_to_replace: Union[Dict[str, str], None],
|
976
983
|
words_to_omit: Union[List[str], None],
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
984
|
+
max_labels: Union[int, None],
|
985
|
+
min_label_lines: int,
|
986
|
+
max_label_lines: int,
|
987
|
+
min_chars_per_line: int,
|
988
|
+
max_chars_per_line: int,
|
981
989
|
filtered_domain_centroids: Dict[str, np.ndarray],
|
982
990
|
filtered_domain_terms: Dict[str, str],
|
983
991
|
valid_indices: List[int],
|
@@ -985,15 +993,15 @@ class NetworkPlotter:
|
|
985
993
|
"""Process the ids_to_keep, apply filtering, and store valid domain centroids and terms.
|
986
994
|
|
987
995
|
Args:
|
988
|
-
|
989
|
-
|
990
|
-
domain_centroids (dict): Mapping of domains to their centroids.
|
996
|
+
domain_id_to_centroid_map (dict): Mapping of domain IDs to their centroids.
|
997
|
+
ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
|
991
998
|
ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
992
999
|
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
1000
|
+
max_labels (int, optional): Maximum number of labels allowed.
|
1001
|
+
min_label_lines (int): Minimum number of lines in a label.
|
1002
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1003
|
+
min_chars_per_line (int): Minimum number of characters in a line to display.
|
1004
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
997
1005
|
filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
|
998
1006
|
filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
|
999
1007
|
valid_indices (list): List to store valid indices (output).
|
@@ -1004,8 +1012,6 @@ class NetworkPlotter:
|
|
1004
1012
|
Raises:
|
1005
1013
|
ValueError: If the number of provided `ids_to_keep` exceeds `max_labels`.
|
1006
1014
|
"""
|
1007
|
-
# Convert ids_to_keep to a set for faster, unique lookups
|
1008
|
-
ids_to_keep = set(ids_to_keep) if ids_to_keep else set()
|
1009
1015
|
# Check if the number of provided ids_to_keep exceeds max_labels
|
1010
1016
|
if max_labels is not None and len(ids_to_keep) > max_labels:
|
1011
1017
|
raise ValueError(
|
@@ -1014,45 +1020,54 @@ class NetworkPlotter:
|
|
1014
1020
|
|
1015
1021
|
# Process each domain in ids_to_keep
|
1016
1022
|
for domain in ids_to_keep:
|
1017
|
-
if
|
1018
|
-
|
1023
|
+
if (
|
1024
|
+
domain in self.graph.domain_id_to_domain_terms_map
|
1025
|
+
and domain in domain_id_to_centroid_map
|
1026
|
+
):
|
1027
|
+
domain_centroid = domain_id_to_centroid_map[domain]
|
1028
|
+
# No need to filter the domain terms if it is in ids_to_keep
|
1029
|
+
_ = self._validate_and_update_domain(
|
1019
1030
|
domain=domain,
|
1031
|
+
domain_centroid=domain_centroid,
|
1032
|
+
domain_id_to_centroid_map=domain_id_to_centroid_map,
|
1020
1033
|
ids_to_replace=ids_to_replace,
|
1021
1034
|
words_to_omit=words_to_omit,
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1035
|
+
min_label_lines=min_label_lines,
|
1036
|
+
max_label_lines=max_label_lines,
|
1037
|
+
min_chars_per_line=min_chars_per_line,
|
1038
|
+
max_chars_per_line=max_chars_per_line,
|
1039
|
+
filtered_domain_centroids=filtered_domain_centroids,
|
1040
|
+
filtered_domain_terms=filtered_domain_terms,
|
1041
|
+
valid_indices=valid_indices,
|
1025
1042
|
)
|
1026
|
-
filtered_domain_centroids[domain] = domain_centroids[domain]
|
1027
|
-
valid_indices.append(list(domain_centroids.keys()).index(domain))
|
1028
1043
|
|
1029
1044
|
def _process_remaining_domains(
|
1030
1045
|
self,
|
1031
|
-
|
1032
|
-
ids_to_keep: Union[List[str], Tuple[str], np.ndarray
|
1046
|
+
domain_id_to_centroid_map: Dict[str, np.ndarray],
|
1047
|
+
ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
|
1033
1048
|
ids_to_replace: Union[Dict[str, str], None],
|
1034
1049
|
words_to_omit: Union[List[str], None],
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1050
|
+
remaining_labels: int,
|
1051
|
+
min_label_lines: int,
|
1052
|
+
max_label_lines: int,
|
1053
|
+
min_chars_per_line: int,
|
1054
|
+
max_chars_per_line: int,
|
1040
1055
|
filtered_domain_centroids: Dict[str, np.ndarray],
|
1041
1056
|
filtered_domain_terms: Dict[str, str],
|
1042
1057
|
valid_indices: List[int],
|
1043
1058
|
) -> None:
|
1044
|
-
"""Process remaining domains to fill in additional labels,
|
1059
|
+
"""Process remaining domains to fill in additional labels, respecting the remaining_labels limit.
|
1045
1060
|
|
1046
1061
|
Args:
|
1047
|
-
|
1048
|
-
ids_to_keep (list, tuple, np.ndarray,
|
1062
|
+
domain_id_to_centroid_map (dict): Mapping of domain IDs to their centroids.
|
1063
|
+
ids_to_keep (list, tuple, or np.ndarray, optional): IDs of domains that must be labeled.
|
1049
1064
|
ids_to_replace (dict, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
1050
1065
|
words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1066
|
+
remaining_labels (int): The remaining number of labels that can be generated.
|
1067
|
+
min_label_lines (int): Minimum number of lines in a label.
|
1068
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1069
|
+
min_chars_per_line (int): Minimum number of characters in a line to display.
|
1070
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
1056
1071
|
filtered_domain_centroids (dict): Dictionary to store filtered domain centroids (output).
|
1057
1072
|
filtered_domain_terms (dict): Dictionary to store filtered domain terms (output).
|
1058
1073
|
valid_indices (list): List to store valid indices (output).
|
@@ -1060,32 +1075,142 @@ class NetworkPlotter:
|
|
1060
1075
|
Note:
|
1061
1076
|
The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
|
1062
1077
|
"""
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1078
|
+
# Counter to track how many labels have been created
|
1079
|
+
label_count = 0
|
1080
|
+
# Collect domains not in ids_to_keep
|
1081
|
+
remaining_domains = {
|
1082
|
+
domain: centroid
|
1083
|
+
for domain, centroid in domain_id_to_centroid_map.items()
|
1084
|
+
if domain not in ids_to_keep and not pd.isna(domain)
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
# Function to calculate distance between two centroids
|
1088
|
+
def calculate_distance(centroid1, centroid2):
|
1089
|
+
return np.linalg.norm(centroid1 - centroid2)
|
1090
|
+
|
1091
|
+
# Find the farthest apart domains using centroids
|
1092
|
+
if remaining_domains and remaining_labels:
|
1093
|
+
selected_domains = []
|
1094
|
+
first_domain = next(iter(remaining_domains)) # Pick the first domain to start
|
1095
|
+
selected_domains.append(first_domain)
|
1096
|
+
|
1097
|
+
while len(selected_domains) < remaining_labels:
|
1098
|
+
farthest_domain = None
|
1099
|
+
max_distance = -1
|
1100
|
+
# Find the domain farthest from any already selected domain
|
1101
|
+
for candidate_domain, candidate_centroid in remaining_domains.items():
|
1102
|
+
if candidate_domain in selected_domains:
|
1103
|
+
continue
|
1104
|
+
|
1105
|
+
# Calculate the minimum distance to any selected domain
|
1106
|
+
min_distance = min(
|
1107
|
+
calculate_distance(candidate_centroid, remaining_domains[dom])
|
1108
|
+
for dom in selected_domains
|
1109
|
+
)
|
1110
|
+
# Update the farthest domain if the minimum distance is greater
|
1111
|
+
if min_distance > max_distance:
|
1112
|
+
max_distance = min_distance
|
1113
|
+
farthest_domain = candidate_domain
|
1114
|
+
|
1115
|
+
# Add the farthest domain to the selected domains
|
1116
|
+
if farthest_domain:
|
1117
|
+
selected_domains.append(farthest_domain)
|
1118
|
+
else:
|
1119
|
+
break # No more domains to select
|
1120
|
+
|
1121
|
+
# Process the selected domains and add to filtered lists
|
1122
|
+
for domain in selected_domains:
|
1123
|
+
domain_centroid = remaining_domains[domain]
|
1124
|
+
is_domain_valid = self._validate_and_update_domain(
|
1071
1125
|
domain=domain,
|
1126
|
+
domain_centroid=domain_centroid,
|
1127
|
+
domain_id_to_centroid_map=domain_id_to_centroid_map,
|
1072
1128
|
ids_to_replace=ids_to_replace,
|
1073
1129
|
words_to_omit=words_to_omit,
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1130
|
+
min_label_lines=min_label_lines,
|
1131
|
+
max_label_lines=max_label_lines,
|
1132
|
+
min_chars_per_line=min_chars_per_line,
|
1133
|
+
max_chars_per_line=max_chars_per_line,
|
1134
|
+
filtered_domain_centroids=filtered_domain_centroids,
|
1135
|
+
filtered_domain_terms=filtered_domain_terms,
|
1136
|
+
valid_indices=valid_indices,
|
1077
1137
|
)
|
1078
|
-
|
1079
|
-
|
1138
|
+
# Increment the label count if the domain is valid
|
1139
|
+
if is_domain_valid:
|
1140
|
+
label_count += 1
|
1141
|
+
if label_count >= remaining_labels:
|
1142
|
+
break
|
1143
|
+
|
1144
|
+
def _validate_and_update_domain(
|
1145
|
+
self,
|
1146
|
+
domain: str,
|
1147
|
+
domain_centroid: np.ndarray,
|
1148
|
+
domain_id_to_centroid_map: Dict[str, np.ndarray],
|
1149
|
+
ids_to_replace: Union[Dict[str, str], None],
|
1150
|
+
words_to_omit: Union[List[str], None],
|
1151
|
+
min_label_lines: int,
|
1152
|
+
max_label_lines: int,
|
1153
|
+
min_chars_per_line: int,
|
1154
|
+
max_chars_per_line: int,
|
1155
|
+
filtered_domain_centroids: Dict[str, np.ndarray],
|
1156
|
+
filtered_domain_terms: Dict[str, str],
|
1157
|
+
valid_indices: List[int],
|
1158
|
+
) -> bool:
|
1159
|
+
"""Validate and process the domain terms, updating relevant dictionaries if valid.
|
1160
|
+
|
1161
|
+
Args:
|
1162
|
+
domain (str): Domain ID to process.
|
1163
|
+
domain_centroid (np.ndarray): Centroid position of the domain.
|
1164
|
+
domain_id_to_centroid_map (dict): Mapping of domain IDs to their centroids.
|
1165
|
+
ids_to_replace (Union[Dict[str, str], None]): A dictionary mapping domain IDs to custom labels.
|
1166
|
+
words_to_omit (Union[List[str], None]): List of words to omit from the labels.
|
1167
|
+
min_label_lines (int): Minimum number of lines required in a label.
|
1168
|
+
max_label_lines (int): Maximum number of lines allowed in a label.
|
1169
|
+
min_chars_per_line (int): Minimum number of characters allowed per line.
|
1170
|
+
max_chars_per_line (int): Maximum number of characters allowed per line.
|
1171
|
+
filtered_domain_centroids (Dict[str, np.ndarray]): Dictionary to store valid domain centroids.
|
1172
|
+
filtered_domain_terms (Dict[str, str]): Dictionary to store valid domain terms.
|
1173
|
+
valid_indices (List[int]): List of valid domain indices.
|
1174
|
+
|
1175
|
+
Returns:
|
1176
|
+
bool: True if the domain is valid and added to the filtered dictionaries, False otherwise.
|
1177
|
+
|
1178
|
+
Note:
|
1179
|
+
The `filtered_domain_centroids`, `filtered_domain_terms`, and `valid_indices` are modified in-place.
|
1180
|
+
"""
|
1181
|
+
# Process the domain terms
|
1182
|
+
domain_terms = self._process_terms(
|
1183
|
+
domain=domain,
|
1184
|
+
ids_to_replace=ids_to_replace,
|
1185
|
+
words_to_omit=words_to_omit,
|
1186
|
+
max_label_lines=max_label_lines,
|
1187
|
+
min_chars_per_line=min_chars_per_line,
|
1188
|
+
max_chars_per_line=max_chars_per_line,
|
1189
|
+
)
|
1190
|
+
# If domain_terms is empty, skip further processing
|
1191
|
+
if not domain_terms:
|
1192
|
+
return False
|
1193
|
+
|
1194
|
+
# Split the terms by TERM_DELIMITER and count the number of lines
|
1195
|
+
num_domain_lines = len(domain_terms.split(TERM_DELIMITER))
|
1196
|
+
# Check if the number of lines is greater than or equal to the minimum
|
1197
|
+
if num_domain_lines >= min_label_lines:
|
1198
|
+
filtered_domain_centroids[domain] = domain_centroid
|
1199
|
+
filtered_domain_terms[domain] = domain_terms
|
1200
|
+
# Add the index of the domain to the valid indices list
|
1201
|
+
valid_indices.append(list(domain_id_to_centroid_map.keys()).index(domain))
|
1202
|
+
return True
|
1203
|
+
|
1204
|
+
return False
|
1080
1205
|
|
1081
1206
|
def _process_terms(
|
1082
1207
|
self,
|
1083
1208
|
domain: str,
|
1084
1209
|
ids_to_replace: Union[Dict[str, str], None],
|
1085
1210
|
words_to_omit: Union[List[str], None],
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1211
|
+
max_label_lines: int,
|
1212
|
+
min_chars_per_line: int,
|
1213
|
+
max_chars_per_line: int,
|
1089
1214
|
) -> List[str]:
|
1090
1215
|
"""Process terms for a domain, applying word length constraints and combining words where appropriate.
|
1091
1216
|
|
@@ -1093,12 +1218,12 @@ class NetworkPlotter:
|
|
1093
1218
|
domain (str): The domain being processed.
|
1094
1219
|
ids_to_replace (dict, optional): Dictionary mapping domain IDs to custom labels.
|
1095
1220
|
words_to_omit (list, optional): List of words to omit from the labels.
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1221
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1222
|
+
min_chars_per_line (int): Minimum number of characters in a line to display.
|
1223
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
1099
1224
|
|
1100
1225
|
Returns:
|
1101
|
-
|
1226
|
+
str: Processed terms separated by TERM_DELIMITER, with words combined if necessary to fit within constraints.
|
1102
1227
|
"""
|
1103
1228
|
# Handle ids_to_replace logic
|
1104
1229
|
if ids_to_replace and domain in ids_to_replace:
|
@@ -1111,11 +1236,11 @@ class NetworkPlotter:
|
|
1111
1236
|
terms = [
|
1112
1237
|
term
|
1113
1238
|
for term in terms
|
1114
|
-
if term.lower() not in words_to_omit and len(term) >=
|
1239
|
+
if term.lower() not in words_to_omit and len(term) >= min_chars_per_line
|
1115
1240
|
]
|
1116
1241
|
|
1117
1242
|
# Use the combine_words function directly to handle word combinations and length constraints
|
1118
|
-
compressed_terms = _combine_words(tuple(terms),
|
1243
|
+
compressed_terms = _combine_words(tuple(terms), max_chars_per_line, max_label_lines)
|
1119
1244
|
|
1120
1245
|
return compressed_terms
|
1121
1246
|
|
@@ -1433,14 +1558,14 @@ def _calculate_bounding_box(
|
|
1433
1558
|
return center, radius
|
1434
1559
|
|
1435
1560
|
|
1436
|
-
def _combine_words(words: List[str],
|
1437
|
-
"""Combine words to fit within the
|
1438
|
-
and separate the final output by
|
1561
|
+
def _combine_words(words: List[str], max_chars_per_line: int, max_label_lines: int) -> str:
|
1562
|
+
"""Combine words to fit within the max_chars_per_line and max_label_lines constraints,
|
1563
|
+
and separate the final output by TERM_DELIMITER for plotting.
|
1439
1564
|
|
1440
1565
|
Args:
|
1441
1566
|
words (List[str]): List of words to combine.
|
1442
|
-
|
1443
|
-
|
1567
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
1568
|
+
max_label_lines (int): Maximum number of lines in a label.
|
1444
1569
|
|
1445
1570
|
Returns:
|
1446
1571
|
str: String of combined words separated by ':' for line breaks.
|
@@ -1456,34 +1581,38 @@ def _combine_words(words: List[str], max_length: int, max_words: int) -> str:
|
|
1456
1581
|
# Try to combine more words if possible, and ensure the combination fits within max_length
|
1457
1582
|
for j in range(i + 1, len(words_batch)):
|
1458
1583
|
next_word = words_batch[j]
|
1459
|
-
|
1584
|
+
# Ensure that the combined word fits within the max_chars_per_line limit
|
1585
|
+
if len(combined_word) + len(next_word) + 1 <= max_chars_per_line: # +1 for space
|
1460
1586
|
combined_word = f"{combined_word} {next_word}"
|
1461
1587
|
i += 1 # Move past the combined word
|
1462
1588
|
else:
|
1463
1589
|
break # Stop combining if the length is exceeded
|
1464
1590
|
|
1465
|
-
|
1466
|
-
|
1591
|
+
# Add the combined word only if it fits within the max_chars_per_line limit
|
1592
|
+
if len(combined_word) <= max_chars_per_line:
|
1593
|
+
combined_lines.append(combined_word) # Add the combined word
|
1594
|
+
# Move to the next word
|
1595
|
+
i += 1
|
1467
1596
|
|
1468
|
-
# Stop if we've reached the
|
1469
|
-
if len(combined_lines) >=
|
1597
|
+
# Stop if we've reached the max_label_lines limit
|
1598
|
+
if len(combined_lines) >= max_label_lines:
|
1470
1599
|
break
|
1471
1600
|
|
1472
1601
|
return combined_lines
|
1473
1602
|
|
1474
|
-
# Main logic: start with
|
1475
|
-
combined_lines = try_combinations(words[:
|
1476
|
-
remaining_words = words[
|
1603
|
+
# Main logic: start with max_label_lines number of words
|
1604
|
+
combined_lines = try_combinations(words[:max_label_lines])
|
1605
|
+
remaining_words = words[max_label_lines:] # Remaining words after the initial batch
|
1477
1606
|
|
1478
1607
|
# Continue pulling more words until we fill the lines
|
1479
|
-
while remaining_words and len(combined_lines) <
|
1480
|
-
available_slots =
|
1608
|
+
while remaining_words and len(combined_lines) < max_label_lines:
|
1609
|
+
available_slots = max_label_lines - len(combined_lines)
|
1481
1610
|
words_to_add = remaining_words[:available_slots]
|
1482
1611
|
remaining_words = remaining_words[available_slots:]
|
1483
1612
|
combined_lines += try_combinations(words_to_add)
|
1484
1613
|
|
1485
|
-
# Join the final combined lines with
|
1486
|
-
return
|
1614
|
+
# Join the final combined lines with TERM_DELIMITER, a special separator for line breaks
|
1615
|
+
return TERM_DELIMITER.join(combined_lines[:max_label_lines])
|
1487
1616
|
|
1488
1617
|
|
1489
1618
|
def _calculate_best_label_positions(
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|