risk-network 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/__init__.py +10 -0
- risk/{annotations/annotations.py → annotation/annotation.py} +62 -102
- risk/{annotations → annotation}/io.py +93 -92
- risk/annotation/nltk_setup.py +86 -0
- risk/log/__init__.py +1 -1
- risk/log/parameters.py +26 -27
- risk/neighborhoods/__init__.py +0 -1
- risk/neighborhoods/api.py +38 -38
- risk/neighborhoods/community.py +33 -4
- risk/neighborhoods/domains.py +26 -28
- risk/neighborhoods/neighborhoods.py +8 -2
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
- risk/{stats → neighborhoods/stats}/permutation/test_functions.py +5 -4
- risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
- risk/network/__init__.py +0 -2
- risk/network/graph/__init__.py +0 -2
- risk/network/graph/api.py +19 -19
- risk/network/graph/graph.py +73 -68
- risk/{stats/significance.py → network/graph/stats.py} +2 -2
- risk/network/graph/summary.py +12 -13
- risk/network/io.py +163 -20
- risk/network/plotter/__init__.py +0 -2
- risk/network/plotter/api.py +1 -1
- risk/network/plotter/canvas.py +36 -36
- risk/network/plotter/contour.py +14 -15
- risk/network/plotter/labels.py +303 -294
- risk/network/plotter/network.py +6 -6
- risk/network/plotter/plotter.py +8 -10
- risk/network/plotter/utils/colors.py +15 -8
- risk/network/plotter/utils/layout.py +3 -3
- risk/risk.py +6 -7
- risk_network-0.0.12.dist-info/METADATA +122 -0
- risk_network-0.0.12.dist-info/RECORD +40 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/network/geometry.py +0 -150
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.10.dist-info/METADATA +0 -798
- risk_network-0.0.10.dist-info/RECORD +0 -40
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
risk/network/plotter/labels.py
CHANGED
@@ -134,7 +134,7 @@ class Labels:
|
|
134
134
|
max_chars_per_line = int(1e6)
|
135
135
|
# Normalize words_to_omit to lowercase
|
136
136
|
if words_to_omit:
|
137
|
-
words_to_omit = set(word.lower() for word in words_to_omit)
|
137
|
+
words_to_omit = list(set(word.lower() for word in words_to_omit))
|
138
138
|
|
139
139
|
# Calculate the center and radius of domains to position labels around the network
|
140
140
|
domain_id_to_centroid_map = {}
|
@@ -188,7 +188,7 @@ class Labels:
|
|
188
188
|
# Calculate the bounding box around the network
|
189
189
|
center, radius = calculate_bounding_box(self.graph.node_coordinates, radius_margin=scale)
|
190
190
|
# Calculate the best positions for labels
|
191
|
-
best_label_positions = _calculate_best_label_positions(
|
191
|
+
best_label_positions = self._calculate_best_label_positions(
|
192
192
|
filtered_domain_centroids, center, radius, offset
|
193
193
|
)
|
194
194
|
# Convert all domain colors to RGBA using the to_rgba helper function
|
@@ -205,11 +205,11 @@ class Labels:
|
|
205
205
|
for idx, (domain, pos) in zip(valid_indices, best_label_positions.items()):
|
206
206
|
centroid = filtered_domain_centroids[domain]
|
207
207
|
# Split by special key TERM_DELIMITER to split annotation into multiple lines
|
208
|
-
|
208
|
+
terms = filtered_domain_terms[domain].split(TERM_DELIMITER)
|
209
209
|
if fontcase is not None:
|
210
|
-
|
210
|
+
terms = self._apply_str_transformation(words=terms, transformation=fontcase)
|
211
211
|
self.ax.annotate(
|
212
|
-
"\n".join(
|
212
|
+
"\n".join(terms),
|
213
213
|
xy=centroid,
|
214
214
|
xytext=pos,
|
215
215
|
textcoords="data",
|
@@ -235,7 +235,7 @@ class Labels:
|
|
235
235
|
self.ax.text(
|
236
236
|
centroid[0],
|
237
237
|
centroid[1],
|
238
|
-
domain,
|
238
|
+
str(domain),
|
239
239
|
ha="center",
|
240
240
|
va="center",
|
241
241
|
fontsize=fontsize,
|
@@ -281,6 +281,9 @@ class Labels:
|
|
281
281
|
found in arrow_alpha. Defaults to 1.0.
|
282
282
|
arrow_base_shrink (float, optional): Distance between the text and the base of the arrow. Defaults to 0.0.
|
283
283
|
arrow_tip_shrink (float, optional): Distance between the arrow tip and the centroid. Defaults to 0.0.
|
284
|
+
|
285
|
+
Raises:
|
286
|
+
ValueError: If no nodes are found in the network graph or if there are insufficient nodes to plot.
|
284
287
|
"""
|
285
288
|
# Check if nodes is a list of lists or a flat list
|
286
289
|
if any(isinstance(item, (list, tuple, np.ndarray)) for item in nodes):
|
@@ -368,7 +371,7 @@ class Labels:
|
|
368
371
|
|
369
372
|
def _process_ids_to_keep(
|
370
373
|
self,
|
371
|
-
domain_id_to_centroid_map: Dict[
|
374
|
+
domain_id_to_centroid_map: Dict[int, np.ndarray],
|
372
375
|
ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
|
373
376
|
ids_to_labels: Union[Dict[int, str], None],
|
374
377
|
words_to_omit: Union[List[str], None],
|
@@ -377,14 +380,14 @@ class Labels:
|
|
377
380
|
max_label_lines: int,
|
378
381
|
min_chars_per_line: int,
|
379
382
|
max_chars_per_line: int,
|
380
|
-
filtered_domain_centroids: Dict[
|
381
|
-
filtered_domain_terms: Dict[
|
383
|
+
filtered_domain_centroids: Dict[int, np.ndarray],
|
384
|
+
filtered_domain_terms: Dict[int, str],
|
382
385
|
valid_indices: List[int],
|
383
386
|
) -> None:
|
384
387
|
"""Process the ids_to_keep, apply filtering, and store valid domain centroids and terms.
|
385
388
|
|
386
389
|
Args:
|
387
|
-
domain_id_to_centroid_map (Dict[
|
390
|
+
domain_id_to_centroid_map (Dict[int, np.ndarray]): Mapping of domain IDs to their centroids.
|
388
391
|
ids_to_keep (List, Tuple, or np.ndarray, optional): IDs of domains that must be labeled.
|
389
392
|
ids_to_labels (Dict[int, str], None, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
390
393
|
words_to_omit (List, optional): List of words to omit from the labels. Defaults to None.
|
@@ -393,7 +396,7 @@ class Labels:
|
|
393
396
|
max_label_lines (int): Maximum number of lines in a label.
|
394
397
|
min_chars_per_line (int): Minimum number of characters in a line to display.
|
395
398
|
max_chars_per_line (int): Maximum number of characters in a line to display.
|
396
|
-
filtered_domain_centroids (Dict[
|
399
|
+
filtered_domain_centroids (Dict[int, np.ndarray]): Dictionary to store filtered domain centroids (output).
|
397
400
|
filtered_domain_terms (Dict[str, str]): Dictionary to store filtered domain terms (output).
|
398
401
|
valid_indices (List): List to store valid indices (output).
|
399
402
|
|
@@ -410,15 +413,15 @@ class Labels:
|
|
410
413
|
)
|
411
414
|
|
412
415
|
# Process each domain in ids_to_keep
|
413
|
-
for
|
416
|
+
for domain_id in ids_to_keep:
|
414
417
|
if (
|
415
|
-
|
416
|
-
and
|
418
|
+
domain_id in self.graph.domain_id_to_domain_terms_map
|
419
|
+
and domain_id in domain_id_to_centroid_map
|
417
420
|
):
|
418
|
-
domain_centroid = domain_id_to_centroid_map[
|
421
|
+
domain_centroid = domain_id_to_centroid_map[domain_id]
|
419
422
|
# No need to filter the domain terms if it is in ids_to_keep
|
420
423
|
_ = self._validate_and_update_domain(
|
421
|
-
|
424
|
+
domain_id=domain_id,
|
422
425
|
domain_centroid=domain_centroid,
|
423
426
|
domain_id_to_centroid_map=domain_id_to_centroid_map,
|
424
427
|
ids_to_labels=ids_to_labels,
|
@@ -434,7 +437,7 @@ class Labels:
|
|
434
437
|
|
435
438
|
def _process_remaining_domains(
|
436
439
|
self,
|
437
|
-
domain_id_to_centroid_map: Dict[
|
440
|
+
domain_id_to_centroid_map: Dict[int, np.ndarray],
|
438
441
|
ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
|
439
442
|
ids_to_labels: Union[Dict[int, str], None],
|
440
443
|
words_to_omit: Union[List[str], None],
|
@@ -443,14 +446,14 @@ class Labels:
|
|
443
446
|
max_label_lines: int,
|
444
447
|
min_chars_per_line: int,
|
445
448
|
max_chars_per_line: int,
|
446
|
-
filtered_domain_centroids: Dict[
|
447
|
-
filtered_domain_terms: Dict[
|
449
|
+
filtered_domain_centroids: Dict[int, np.ndarray],
|
450
|
+
filtered_domain_terms: Dict[int, str],
|
448
451
|
valid_indices: List[int],
|
449
452
|
) -> None:
|
450
453
|
"""Process remaining domains to fill in additional labels, respecting the remaining_labels limit.
|
451
454
|
|
452
455
|
Args:
|
453
|
-
domain_id_to_centroid_map (Dict[
|
456
|
+
domain_id_to_centroid_map (Dict[int, np.ndarray]): Mapping of domain IDs to their centroids.
|
454
457
|
ids_to_keep (List, Tuple, or np.ndarray, optional): IDs of domains that must be labeled.
|
455
458
|
ids_to_labels (Dict[int, str], None, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
456
459
|
words_to_omit (List, optional): List of words to omit from the labels. Defaults to None.
|
@@ -459,7 +462,7 @@ class Labels:
|
|
459
462
|
max_label_lines (int): Maximum number of lines in a label.
|
460
463
|
min_chars_per_line (int): Minimum number of characters in a line to display.
|
461
464
|
max_chars_per_line (int): Maximum number of characters in a line to display.
|
462
|
-
filtered_domain_centroids (Dict[
|
465
|
+
filtered_domain_centroids (Dict[int, np.ndarray]): Dictionary to store filtered domain centroids (output).
|
463
466
|
filtered_domain_terms (Dict[str, str]): Dictionary to store filtered domain terms (output).
|
464
467
|
valid_indices (List): List to store valid indices (output).
|
465
468
|
|
@@ -480,24 +483,24 @@ class Labels:
|
|
480
483
|
return np.linalg.norm(centroid1 - centroid2)
|
481
484
|
|
482
485
|
# Domains to plot on network
|
483
|
-
|
486
|
+
selected_domain_ids = []
|
484
487
|
# Find the farthest apart domains using centroids
|
485
488
|
if remaining_domains and remaining_labels:
|
486
489
|
first_domain = next(iter(remaining_domains)) # Pick the first domain to start
|
487
|
-
|
490
|
+
selected_domain_ids.append(first_domain)
|
488
491
|
|
489
|
-
while len(
|
492
|
+
while len(selected_domain_ids) < remaining_labels:
|
490
493
|
farthest_domain = None
|
491
494
|
max_distance = -1
|
492
495
|
# Find the domain farthest from any already selected domain
|
493
496
|
for candidate_domain, candidate_centroid in remaining_domains.items():
|
494
|
-
if candidate_domain in
|
497
|
+
if candidate_domain in selected_domain_ids:
|
495
498
|
continue
|
496
499
|
|
497
500
|
# Calculate the minimum distance to any selected domain
|
498
501
|
min_distance = min(
|
499
502
|
calculate_distance(candidate_centroid, remaining_domains[dom])
|
500
|
-
for dom in
|
503
|
+
for dom in selected_domain_ids
|
501
504
|
)
|
502
505
|
# Update the farthest domain if the minimum distance is greater
|
503
506
|
if min_distance > max_distance:
|
@@ -506,15 +509,15 @@ class Labels:
|
|
506
509
|
|
507
510
|
# Add the farthest domain to the selected domains
|
508
511
|
if farthest_domain:
|
509
|
-
|
512
|
+
selected_domain_ids.append(farthest_domain)
|
510
513
|
else:
|
511
514
|
break # No more domains to select
|
512
515
|
|
513
516
|
# Process the selected domains and add to filtered lists
|
514
|
-
for
|
515
|
-
domain_centroid = remaining_domains[
|
517
|
+
for domain_id in selected_domain_ids:
|
518
|
+
domain_centroid = remaining_domains[domain_id]
|
516
519
|
is_domain_valid = self._validate_and_update_domain(
|
517
|
-
|
520
|
+
domain_id=domain_id,
|
518
521
|
domain_centroid=domain_centroid,
|
519
522
|
domain_id_to_centroid_map=domain_id_to_centroid_map,
|
520
523
|
ids_to_labels=ids_to_labels,
|
@@ -535,45 +538,45 @@ class Labels:
|
|
535
538
|
|
536
539
|
def _validate_and_update_domain(
|
537
540
|
self,
|
538
|
-
|
541
|
+
domain_id: int,
|
539
542
|
domain_centroid: np.ndarray,
|
540
|
-
domain_id_to_centroid_map: Dict[
|
543
|
+
domain_id_to_centroid_map: Dict[int, np.ndarray],
|
541
544
|
ids_to_labels: Union[Dict[int, str], None],
|
542
545
|
words_to_omit: Union[List[str], None],
|
543
546
|
min_label_lines: int,
|
544
547
|
max_label_lines: int,
|
545
548
|
min_chars_per_line: int,
|
546
549
|
max_chars_per_line: int,
|
547
|
-
filtered_domain_centroids: Dict[
|
548
|
-
filtered_domain_terms: Dict[
|
550
|
+
filtered_domain_centroids: Dict[int, np.ndarray],
|
551
|
+
filtered_domain_terms: Dict[int, str],
|
549
552
|
valid_indices: List[int],
|
550
553
|
) -> bool:
|
551
554
|
"""Validate and process the domain terms, updating relevant dictionaries if valid.
|
552
555
|
|
553
556
|
Args:
|
554
|
-
|
557
|
+
domain_id (int): Domain ID to process.
|
555
558
|
domain_centroid (np.ndarray): Centroid position of the domain.
|
556
|
-
domain_id_to_centroid_map (Dict[
|
559
|
+
domain_id_to_centroid_map (Dict[int, np.ndarray]): Mapping of domain IDs to their centroids.
|
557
560
|
ids_to_labels (Dict[int, str], None, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
|
558
561
|
words_to_omit (List[str], None, optional): List of words to omit from the labels. Defaults to None.
|
559
562
|
min_label_lines (int): Minimum number of lines required in a label.
|
560
563
|
max_label_lines (int): Maximum number of lines allowed in a label.
|
561
564
|
min_chars_per_line (int): Minimum number of characters allowed per line.
|
562
565
|
max_chars_per_line (int): Maximum number of characters allowed per line.
|
563
|
-
filtered_domain_centroids (Dict[
|
566
|
+
filtered_domain_centroids (Dict[int, np.ndarray]): Dictionary to store valid domain centroids.
|
564
567
|
filtered_domain_terms (Dict[str, str]): Dictionary to store valid domain terms.
|
565
568
|
valid_indices (List[int]): List of valid domain indices.
|
566
569
|
|
567
570
|
Returns:
|
568
571
|
bool: True if the domain is valid and added to the filtered dictionaries, False otherwise.
|
569
572
|
"""
|
570
|
-
if ids_to_labels and
|
573
|
+
if ids_to_labels and domain_id in ids_to_labels:
|
571
574
|
# Directly use custom labels without filtering
|
572
|
-
domain_terms = ids_to_labels[
|
575
|
+
domain_terms = ids_to_labels[domain_id]
|
573
576
|
else:
|
574
577
|
# Process the domain terms automatically
|
575
578
|
domain_terms = self._process_terms(
|
576
|
-
|
579
|
+
domain_id=domain_id,
|
577
580
|
words_to_omit=words_to_omit,
|
578
581
|
max_label_lines=max_label_lines,
|
579
582
|
min_chars_per_line=min_chars_per_line,
|
@@ -590,24 +593,24 @@ class Labels:
|
|
590
593
|
return False
|
591
594
|
|
592
595
|
# Store the valid terms and centroids
|
593
|
-
filtered_domain_centroids[
|
594
|
-
filtered_domain_terms[
|
595
|
-
valid_indices.append(list(domain_id_to_centroid_map.keys()).index(
|
596
|
+
filtered_domain_centroids[domain_id] = domain_centroid
|
597
|
+
filtered_domain_terms[domain_id] = domain_terms
|
598
|
+
valid_indices.append(list(domain_id_to_centroid_map.keys()).index(domain_id))
|
596
599
|
|
597
600
|
return True
|
598
601
|
|
599
602
|
def _process_terms(
|
600
603
|
self,
|
601
|
-
|
604
|
+
domain_id: int,
|
602
605
|
words_to_omit: Union[List[str], None],
|
603
606
|
max_label_lines: int,
|
604
607
|
min_chars_per_line: int,
|
605
608
|
max_chars_per_line: int,
|
606
|
-
) ->
|
609
|
+
) -> str:
|
607
610
|
"""Process terms for a domain, applying word length constraints and combining words where appropriate.
|
608
611
|
|
609
612
|
Args:
|
610
|
-
|
613
|
+
domain_id (int): Domain ID to process.
|
611
614
|
words_to_omit (List[str], None): List of words to omit from the labels.
|
612
615
|
max_label_lines (int): Maximum number of lines in a label.
|
613
616
|
min_chars_per_line (int): Minimum number of characters in a line to display.
|
@@ -617,7 +620,7 @@ class Labels:
|
|
617
620
|
str: Processed terms separated by TERM_DELIMITER, with words combined if necessary to fit within constraints.
|
618
621
|
"""
|
619
622
|
# Set custom labels from significant terms
|
620
|
-
terms = self.graph.domain_id_to_domain_terms_map[
|
623
|
+
terms = self.graph.domain_id_to_domain_terms_map[domain_id].split(" ")
|
621
624
|
# Apply words_to_omit and word length constraints
|
622
625
|
if words_to_omit:
|
623
626
|
terms = [
|
@@ -627,7 +630,7 @@ class Labels:
|
|
627
630
|
]
|
628
631
|
|
629
632
|
# Use the combine_words function directly to handle word combinations and length constraints
|
630
|
-
compressed_terms = _combine_words(
|
633
|
+
compressed_terms = self._combine_words(list(terms), max_chars_per_line, max_label_lines)
|
631
634
|
|
632
635
|
return compressed_terms
|
633
636
|
|
@@ -642,8 +645,8 @@ class Labels:
|
|
642
645
|
scale_factor: float = 1.0,
|
643
646
|
ids_to_colors: Union[Dict[int, Any], None] = None,
|
644
647
|
random_seed: int = 888,
|
645
|
-
) ->
|
646
|
-
"""Get colors for the labels based on node
|
648
|
+
) -> List:
|
649
|
+
"""Get colors for the labels based on node annotation or a specified colormap.
|
647
650
|
|
648
651
|
Args:
|
649
652
|
cmap (str, optional): Name of the colormap to use for generating label colors. Defaults to "gist_rainbow".
|
@@ -661,7 +664,7 @@ class Labels:
|
|
661
664
|
random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
|
662
665
|
|
663
666
|
Returns:
|
664
|
-
|
667
|
+
List: Array of RGBA colors for label annotation.
|
665
668
|
"""
|
666
669
|
return get_annotated_domain_colors(
|
667
670
|
graph=self.graph,
|
@@ -676,249 +679,255 @@ class Labels:
|
|
676
679
|
random_seed=random_seed,
|
677
680
|
)
|
678
681
|
|
682
|
+
def _combine_words(
|
683
|
+
self, words: List[str], max_chars_per_line: int, max_label_lines: int
|
684
|
+
) -> str:
|
685
|
+
"""Combine words to fit within the max_chars_per_line and max_label_lines constraints,
|
686
|
+
and separate the final output by TERM_DELIMITER for plotting.
|
679
687
|
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
688
|
+
Args:
|
689
|
+
words (List[str]): List of words to combine.
|
690
|
+
max_chars_per_line (int): Maximum number of characters in a line to display.
|
691
|
+
max_label_lines (int): Maximum number of lines in a label.
|
692
|
+
|
693
|
+
Returns:
|
694
|
+
str: String of combined words separated by ':' for line breaks.
|
695
|
+
"""
|
696
|
+
|
697
|
+
def try_combinations(words_batch: List[str]) -> List[str]:
|
698
|
+
"""Try to combine words within a batch and return them with combined words separated by ':'."""
|
699
|
+
combined_lines = []
|
700
|
+
i = 0
|
701
|
+
while i < len(words_batch):
|
702
|
+
current_word = words_batch[i]
|
703
|
+
combined_word = current_word # Start with the current word
|
704
|
+
# Try to combine more words if possible, and ensure the combination fits within max_length
|
705
|
+
for j in range(i + 1, len(words_batch)):
|
706
|
+
next_word = words_batch[j]
|
707
|
+
# Ensure that the combined word fits within the max_chars_per_line limit
|
708
|
+
if (
|
709
|
+
len(combined_word) + len(next_word) + 1 <= max_chars_per_line
|
710
|
+
): # +1 for space
|
711
|
+
combined_word = f"{combined_word} {next_word}"
|
712
|
+
i += 1 # Move past the combined word
|
713
|
+
else:
|
714
|
+
break # Stop combining if the length is exceeded
|
715
|
+
|
716
|
+
# Add the combined word only if it fits within the max_chars_per_line limit
|
717
|
+
if len(combined_word) <= max_chars_per_line:
|
718
|
+
combined_lines.append(combined_word) # Add the combined word
|
719
|
+
# Move to the next word
|
720
|
+
i += 1
|
721
|
+
|
722
|
+
# Stop if we've reached the max_label_lines limit
|
723
|
+
if len(combined_lines) >= max_label_lines:
|
724
|
+
break
|
725
|
+
|
726
|
+
return combined_lines
|
727
|
+
|
728
|
+
# Main logic: start with max_label_lines number of words
|
729
|
+
combined_lines = try_combinations(words[:max_label_lines])
|
730
|
+
remaining_words = words[max_label_lines:] # Remaining words after the initial batch
|
731
|
+
# Track words that have already been added
|
732
|
+
existing_words = set(" ".join(combined_lines).split())
|
733
|
+
|
734
|
+
# Continue pulling more words until we fill the lines
|
735
|
+
while remaining_words and len(combined_lines) < max_label_lines:
|
736
|
+
available_slots = max_label_lines - len(combined_lines)
|
737
|
+
words_to_add = [
|
738
|
+
word for word in remaining_words[:available_slots] if word not in existing_words
|
739
|
+
]
|
740
|
+
remaining_words = remaining_words[available_slots:]
|
741
|
+
# Update the existing words set
|
742
|
+
existing_words.update(words_to_add)
|
743
|
+
# Add to combined_lines only unique words
|
744
|
+
combined_lines += try_combinations(words_to_add)
|
745
|
+
|
746
|
+
# Join the final combined lines with TERM_DELIMITER, a special separator for line breaks
|
747
|
+
return TERM_DELIMITER.join(combined_lines[:max_label_lines])
|
748
|
+
|
749
|
+
def _calculate_best_label_positions(
|
750
|
+
self,
|
751
|
+
filtered_domain_centroids: Dict[int, Any],
|
752
|
+
center: Tuple[float, float],
|
753
|
+
radius: float,
|
754
|
+
offset: float,
|
755
|
+
) -> Dict[int, Any]:
|
756
|
+
"""Calculate and optimize label positions for clarity.
|
757
|
+
|
758
|
+
Args:
|
759
|
+
filtered_domain_centroids (Dict[int, Any]): Centroids of the filtered domains.
|
760
|
+
center (Tuple[float, float]): The center point around which labels are positioned.
|
761
|
+
radius (float): The radius for positioning labels around the center.
|
762
|
+
offset (float): The offset distance from the radius for positioning labels.
|
763
|
+
|
764
|
+
Returns:
|
765
|
+
Dict[int, Any]: Optimized positions for labels.
|
766
|
+
"""
|
767
|
+
num_domains = len(filtered_domain_centroids)
|
768
|
+
# Calculate equidistant positions around the center for initial label placement
|
769
|
+
equidistant_positions = self._calculate_equidistant_positions_around_center(
|
770
|
+
center, radius, offset, num_domains
|
771
|
+
)
|
772
|
+
# Create a mapping of domains to their initial label positions
|
773
|
+
label_positions = dict(zip(filtered_domain_centroids.keys(), equidistant_positions))
|
774
|
+
# Optimize the label positions to minimize distance to domain centroids
|
775
|
+
return self._optimize_label_positions(label_positions, filtered_domain_centroids)
|
776
|
+
|
777
|
+
def _calculate_equidistant_positions_around_center(
|
778
|
+
self, center: Tuple[float, float], radius: float, label_offset: float, num_domains: int
|
779
|
+
) -> List[np.ndarray]:
|
780
|
+
"""Calculate positions around a center at equidistant angles.
|
781
|
+
|
782
|
+
Args:
|
783
|
+
center (Tuple[float, float]): The center point around which positions are calculated.
|
784
|
+
radius (float): The radius at which positions are calculated.
|
785
|
+
label_offset (float): The offset added to the radius for label positioning.
|
786
|
+
num_domains (int): The number of positions (or domains) to calculate.
|
787
|
+
|
788
|
+
Returns:
|
789
|
+
List[np.ndarray]: List of positions (as 2D numpy arrays) around the center.
|
790
|
+
"""
|
791
|
+
# Calculate equidistant angles in radians around the center
|
792
|
+
angles = np.linspace(0, 2 * np.pi, num_domains, endpoint=False)
|
793
|
+
# Compute the positions around the center using the angles
|
794
|
+
return [
|
795
|
+
center + (radius + label_offset) * np.array([np.cos(angle), np.sin(angle)])
|
796
|
+
for angle in angles
|
733
797
|
]
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
Returns:
|
756
|
-
Dict[str, Any]: Optimized positions for labels.
|
757
|
-
"""
|
758
|
-
num_domains = len(filtered_domain_centroids)
|
759
|
-
# Calculate equidistant positions around the center for initial label placement
|
760
|
-
equidistant_positions = _calculate_equidistant_positions_around_center(
|
761
|
-
center, radius, offset, num_domains
|
762
|
-
)
|
763
|
-
# Create a mapping of domains to their initial label positions
|
764
|
-
label_positions = dict(zip(filtered_domain_centroids.keys(), equidistant_positions))
|
765
|
-
# Optimize the label positions to minimize distance to domain centroids
|
766
|
-
return _optimize_label_positions(label_positions, filtered_domain_centroids)
|
767
|
-
|
768
|
-
|
769
|
-
def _calculate_equidistant_positions_around_center(
|
770
|
-
center: np.ndarray, radius: float, label_offset: float, num_domains: int
|
771
|
-
) -> List[np.ndarray]:
|
772
|
-
"""Calculate positions around a center at equidistant angles.
|
773
|
-
|
774
|
-
Args:
|
775
|
-
center (np.ndarray): The central point around which positions are calculated.
|
776
|
-
radius (float): The radius at which positions are calculated.
|
777
|
-
label_offset (float): The offset added to the radius for label positioning.
|
778
|
-
num_domains (int): The number of positions (or domains) to calculate.
|
779
|
-
|
780
|
-
Returns:
|
781
|
-
List[np.ndarray]: List of positions (as 2D numpy arrays) around the center.
|
782
|
-
"""
|
783
|
-
# Calculate equidistant angles in radians around the center
|
784
|
-
angles = np.linspace(0, 2 * np.pi, num_domains, endpoint=False)
|
785
|
-
# Compute the positions around the center using the angles
|
786
|
-
return [
|
787
|
-
center + (radius + label_offset) * np.array([np.cos(angle), np.sin(angle)])
|
788
|
-
for angle in angles
|
789
|
-
]
|
790
|
-
|
791
|
-
|
792
|
-
def _optimize_label_positions(
|
793
|
-
best_label_positions: Dict[str, Any], domain_centroids: Dict[str, Any]
|
794
|
-
) -> Dict[str, Any]:
|
795
|
-
"""Optimize label positions around the perimeter to minimize total distance to centroids.
|
796
|
-
|
797
|
-
Args:
|
798
|
-
best_label_positions (Dict[str, Any]): Initial positions of labels around the perimeter.
|
799
|
-
domain_centroids (Dict[str, Any]): Centroid positions of the domains.
|
800
|
-
|
801
|
-
Returns:
|
802
|
-
Dict[str, Any]: Optimized label positions.
|
803
|
-
"""
|
804
|
-
while True:
|
805
|
-
improvement = False # Start each iteration assuming no improvement
|
806
|
-
# Iterate through each pair of labels to check for potential improvements
|
807
|
-
for i in range(len(domain_centroids)):
|
808
|
-
for j in range(i + 1, len(domain_centroids)):
|
809
|
-
# Calculate the current total distance
|
810
|
-
current_distance = _calculate_total_distance(best_label_positions, domain_centroids)
|
811
|
-
# Evaluate the total distance after swapping two labels
|
812
|
-
swapped_distance = _swap_and_evaluate(best_label_positions, i, j, domain_centroids)
|
813
|
-
# If the swap improves the total distance, perform the swap
|
814
|
-
if swapped_distance < current_distance:
|
815
|
-
labels = list(best_label_positions.keys())
|
816
|
-
best_label_positions[labels[i]], best_label_positions[labels[j]] = (
|
817
|
-
best_label_positions[labels[j]],
|
818
|
-
best_label_positions[labels[i]],
|
798
|
+
|
799
|
+
def _optimize_label_positions(
|
800
|
+
self, best_label_positions: Dict[int, Any], domain_centroids: Dict[int, Any]
|
801
|
+
) -> Dict[int, Any]:
|
802
|
+
"""Optimize label positions around the perimeter to minimize total distance to centroids.
|
803
|
+
|
804
|
+
Args:
|
805
|
+
best_label_positions (Dict[int, Any]): Initial positions of labels around the perimeter.
|
806
|
+
domain_centroids (Dict[int, Any]): Centroid positions of the domains.
|
807
|
+
|
808
|
+
Returns:
|
809
|
+
Dict[int, Any]: Optimized label positions.
|
810
|
+
"""
|
811
|
+
while True:
|
812
|
+
improvement = False # Start each iteration assuming no improvement
|
813
|
+
# Iterate through each pair of labels to check for potential improvements
|
814
|
+
for i in range(len(domain_centroids)):
|
815
|
+
for j in range(i + 1, len(domain_centroids)):
|
816
|
+
# Calculate the current total distance
|
817
|
+
current_distance = self._calculate_total_distance(
|
818
|
+
best_label_positions, domain_centroids
|
819
819
|
)
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
)
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
820
|
+
# Evaluate the total distance after swapping two labels
|
821
|
+
swapped_distance = self._swap_and_evaluate(
|
822
|
+
best_label_positions, i, j, domain_centroids
|
823
|
+
)
|
824
|
+
# If the swap improves the total distance, perform the swap
|
825
|
+
if swapped_distance < current_distance:
|
826
|
+
labels = list(best_label_positions.keys())
|
827
|
+
best_label_positions[labels[i]], best_label_positions[labels[j]] = (
|
828
|
+
best_label_positions[labels[j]],
|
829
|
+
best_label_positions[labels[i]],
|
830
|
+
)
|
831
|
+
improvement = True # Found an improvement, so continue optimizing
|
832
|
+
|
833
|
+
if not improvement:
|
834
|
+
break # Exit the loop if no improvement was found in this iteration
|
835
|
+
|
836
|
+
return best_label_positions
|
837
|
+
|
838
|
+
def _calculate_total_distance(
|
839
|
+
self, label_positions: Dict[int, Any], domain_centroids: Dict[int, Any]
|
840
|
+
) -> float:
|
841
|
+
"""Calculate the total distance from label positions to their domain centroids.
|
842
|
+
|
843
|
+
Args:
|
844
|
+
label_positions (Dict[int, Any]): Positions of labels around the perimeter.
|
845
|
+
domain_centroids (Dict[int, Any]): Centroid positions of the domains.
|
846
|
+
|
847
|
+
Returns:
|
848
|
+
float: The total distance from labels to centroids.
|
849
|
+
"""
|
850
|
+
total_distance = 0.0
|
851
|
+
# Iterate through each domain and calculate the distance to its centroid
|
852
|
+
for domain, pos in label_positions.items():
|
853
|
+
centroid = domain_centroids[domain]
|
854
|
+
total_distance += float(np.linalg.norm(centroid - pos))
|
855
|
+
|
856
|
+
return total_distance
|
857
|
+
|
858
|
+
def _swap_and_evaluate(
|
859
|
+
self,
|
860
|
+
label_positions: Dict[int, Any],
|
861
|
+
i: int,
|
862
|
+
j: int,
|
863
|
+
domain_centroids: Dict[int, Any],
|
864
|
+
) -> float:
|
865
|
+
"""Swap two labels and evaluate the total distance after the swap.
|
866
|
+
|
867
|
+
Args:
|
868
|
+
label_positions (Dict[int, Any]): Positions of labels around the perimeter.
|
869
|
+
i (int): Index of the first label to swap.
|
870
|
+
j (int): Index of the second label to swap.
|
871
|
+
domain_centroids (Dict[int, Any]): Centroid positions of the domains.
|
872
|
+
|
873
|
+
Returns:
|
874
|
+
float: The total distance after swapping the two labels.
|
875
|
+
"""
|
876
|
+
# Get the list of labels from the dictionary keys
|
877
|
+
labels = list(label_positions.keys())
|
878
|
+
swapped_positions = copy.deepcopy(label_positions)
|
879
|
+
# Swap the positions of the two specified labels
|
880
|
+
swapped_positions[labels[i]], swapped_positions[labels[j]] = (
|
881
|
+
swapped_positions[labels[j]],
|
882
|
+
swapped_positions[labels[i]],
|
883
|
+
)
|
884
|
+
# Calculate and return the total distance after the swap
|
885
|
+
return self._calculate_total_distance(swapped_positions, domain_centroids)
|
886
|
+
|
887
|
+
def _apply_str_transformation(
|
888
|
+
self, words: List[str], transformation: Union[str, Dict[str, str]]
|
889
|
+
) -> List[str]:
|
890
|
+
"""Apply a user-specified case transformation to each word in the list without appending duplicates.
|
891
|
+
|
892
|
+
Args:
|
893
|
+
words (List[str]): A list of words to transform.
|
894
|
+
transformation (Union[str, Dict[str, str]]): A single transformation (e.g., 'lower', 'upper', 'title', 'capitalize')
|
895
|
+
or a dictionary mapping cases ('lower', 'upper', 'title') to transformations (e.g., 'lower'='upper').
|
896
|
+
|
897
|
+
Returns:
|
898
|
+
List[str]: A list of transformed words with no duplicates.
|
899
|
+
"""
|
900
|
+
# Initialize a list to store transformed words
|
901
|
+
transformed_words = []
|
902
|
+
for word in words:
|
903
|
+
# Split word into subwords by space
|
904
|
+
subwords = word.split(" ")
|
905
|
+
transformed_subwords = []
|
906
|
+
# Apply transformation to each subword
|
907
|
+
for subword in subwords:
|
908
|
+
transformed_subword = subword # Start with the original subword
|
909
|
+
# If transformation is a string, apply it to all subwords
|
910
|
+
if isinstance(transformation, str):
|
911
|
+
if hasattr(subword, transformation):
|
912
|
+
transformed_subword = getattr(subword, transformation)()
|
913
|
+
|
914
|
+
# If transformation is a dictionary, apply case-specific transformations
|
915
|
+
elif isinstance(transformation, dict):
|
916
|
+
for case_type, transform in transformation.items():
|
917
|
+
if case_type == "lower" and subword.islower() and transform:
|
918
|
+
transformed_subword = getattr(subword, transform)()
|
919
|
+
elif case_type == "upper" and subword.isupper() and transform:
|
920
|
+
transformed_subword = getattr(subword, transform)()
|
921
|
+
elif case_type == "title" and subword.istitle() and transform:
|
922
|
+
transformed_subword = getattr(subword, transform)()
|
923
|
+
|
924
|
+
# Append the transformed subword to the list
|
925
|
+
transformed_subwords.append(transformed_subword)
|
926
|
+
|
927
|
+
# Rejoin the transformed subwords into a single string to preserve structure
|
928
|
+
transformed_word = " ".join(transformed_subwords)
|
929
|
+
# Only append if the transformed word is not already in the list
|
930
|
+
if transformed_word not in transformed_words:
|
931
|
+
transformed_words.append(transformed_word)
|
932
|
+
|
933
|
+
return transformed_words
|