risk-network 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotation/__init__.py +10 -0
  3. risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
  4. risk/{annotations → annotation}/io.py +93 -92
  5. risk/{annotations → annotation}/nltk_setup.py +6 -5
  6. risk/log/__init__.py +1 -1
  7. risk/log/parameters.py +26 -27
  8. risk/neighborhoods/__init__.py +0 -1
  9. risk/neighborhoods/api.py +38 -38
  10. risk/neighborhoods/community.py +33 -4
  11. risk/neighborhoods/domains.py +26 -28
  12. risk/neighborhoods/neighborhoods.py +8 -2
  13. risk/neighborhoods/stats/__init__.py +13 -0
  14. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  15. risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
  16. risk/{stats → neighborhoods/stats}/permutation/test_functions.py +4 -4
  17. risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
  18. risk/network/__init__.py +0 -2
  19. risk/network/graph/__init__.py +0 -2
  20. risk/network/graph/api.py +19 -19
  21. risk/network/graph/graph.py +73 -68
  22. risk/{stats/significance.py → network/graph/stats.py} +2 -2
  23. risk/network/graph/summary.py +12 -13
  24. risk/network/io.py +163 -20
  25. risk/network/plotter/__init__.py +0 -2
  26. risk/network/plotter/api.py +1 -1
  27. risk/network/plotter/canvas.py +36 -36
  28. risk/network/plotter/contour.py +14 -15
  29. risk/network/plotter/labels.py +303 -294
  30. risk/network/plotter/network.py +6 -6
  31. risk/network/plotter/plotter.py +8 -10
  32. risk/network/plotter/utils/colors.py +15 -8
  33. risk/network/plotter/utils/layout.py +3 -3
  34. risk/risk.py +6 -6
  35. risk_network-0.0.12.dist-info/METADATA +122 -0
  36. risk_network-0.0.12.dist-info/RECORD +40 -0
  37. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
  38. risk/annotations/__init__.py +0 -7
  39. risk/network/geometry.py +0 -150
  40. risk/stats/__init__.py +0 -15
  41. risk/stats/permutation/__init__.py +0 -6
  42. risk_network-0.0.11.dist-info/METADATA +0 -798
  43. risk_network-0.0.11.dist-info/RECORD +0 -41
  44. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
  45. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
@@ -134,7 +134,7 @@ class Labels:
134
134
  max_chars_per_line = int(1e6)
135
135
  # Normalize words_to_omit to lowercase
136
136
  if words_to_omit:
137
- words_to_omit = set(word.lower() for word in words_to_omit)
137
+ words_to_omit = list(set(word.lower() for word in words_to_omit))
138
138
 
139
139
  # Calculate the center and radius of domains to position labels around the network
140
140
  domain_id_to_centroid_map = {}
@@ -188,7 +188,7 @@ class Labels:
188
188
  # Calculate the bounding box around the network
189
189
  center, radius = calculate_bounding_box(self.graph.node_coordinates, radius_margin=scale)
190
190
  # Calculate the best positions for labels
191
- best_label_positions = _calculate_best_label_positions(
191
+ best_label_positions = self._calculate_best_label_positions(
192
192
  filtered_domain_centroids, center, radius, offset
193
193
  )
194
194
  # Convert all domain colors to RGBA using the to_rgba helper function
@@ -205,11 +205,11 @@ class Labels:
205
205
  for idx, (domain, pos) in zip(valid_indices, best_label_positions.items()):
206
206
  centroid = filtered_domain_centroids[domain]
207
207
  # Split by special key TERM_DELIMITER to split annotation into multiple lines
208
- annotations = filtered_domain_terms[domain].split(TERM_DELIMITER)
208
+ terms = filtered_domain_terms[domain].split(TERM_DELIMITER)
209
209
  if fontcase is not None:
210
- annotations = _apply_str_transformation(words=annotations, transformation=fontcase)
210
+ terms = self._apply_str_transformation(words=terms, transformation=fontcase)
211
211
  self.ax.annotate(
212
- "\n".join(annotations),
212
+ "\n".join(terms),
213
213
  xy=centroid,
214
214
  xytext=pos,
215
215
  textcoords="data",
@@ -235,7 +235,7 @@ class Labels:
235
235
  self.ax.text(
236
236
  centroid[0],
237
237
  centroid[1],
238
- domain,
238
+ str(domain),
239
239
  ha="center",
240
240
  va="center",
241
241
  fontsize=fontsize,
@@ -281,6 +281,9 @@ class Labels:
281
281
  found in arrow_alpha. Defaults to 1.0.
282
282
  arrow_base_shrink (float, optional): Distance between the text and the base of the arrow. Defaults to 0.0.
283
283
  arrow_tip_shrink (float, optional): Distance between the arrow tip and the centroid. Defaults to 0.0.
284
+
285
+ Raises:
286
+ ValueError: If no nodes are found in the network graph or if there are insufficient nodes to plot.
284
287
  """
285
288
  # Check if nodes is a list of lists or a flat list
286
289
  if any(isinstance(item, (list, tuple, np.ndarray)) for item in nodes):
@@ -368,7 +371,7 @@ class Labels:
368
371
 
369
372
  def _process_ids_to_keep(
370
373
  self,
371
- domain_id_to_centroid_map: Dict[str, np.ndarray],
374
+ domain_id_to_centroid_map: Dict[int, np.ndarray],
372
375
  ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
373
376
  ids_to_labels: Union[Dict[int, str], None],
374
377
  words_to_omit: Union[List[str], None],
@@ -377,14 +380,14 @@ class Labels:
377
380
  max_label_lines: int,
378
381
  min_chars_per_line: int,
379
382
  max_chars_per_line: int,
380
- filtered_domain_centroids: Dict[str, np.ndarray],
381
- filtered_domain_terms: Dict[str, str],
383
+ filtered_domain_centroids: Dict[int, np.ndarray],
384
+ filtered_domain_terms: Dict[int, str],
382
385
  valid_indices: List[int],
383
386
  ) -> None:
384
387
  """Process the ids_to_keep, apply filtering, and store valid domain centroids and terms.
385
388
 
386
389
  Args:
387
- domain_id_to_centroid_map (Dict[str, np.ndarray]): Mapping of domain IDs to their centroids.
390
+ domain_id_to_centroid_map (Dict[int, np.ndarray]): Mapping of domain IDs to their centroids.
388
391
  ids_to_keep (List, Tuple, or np.ndarray, optional): IDs of domains that must be labeled.
389
392
  ids_to_labels (Dict[int, str], None, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
390
393
  words_to_omit (List, optional): List of words to omit from the labels. Defaults to None.
@@ -393,7 +396,7 @@ class Labels:
393
396
  max_label_lines (int): Maximum number of lines in a label.
394
397
  min_chars_per_line (int): Minimum number of characters in a line to display.
395
398
  max_chars_per_line (int): Maximum number of characters in a line to display.
396
- filtered_domain_centroids (Dict[str, np.ndarray]): Dictionary to store filtered domain centroids (output).
399
+ filtered_domain_centroids (Dict[int, np.ndarray]): Dictionary to store filtered domain centroids (output).
397
400
  filtered_domain_terms (Dict[str, str]): Dictionary to store filtered domain terms (output).
398
401
  valid_indices (List): List to store valid indices (output).
399
402
 
@@ -410,15 +413,15 @@ class Labels:
410
413
  )
411
414
 
412
415
  # Process each domain in ids_to_keep
413
- for domain in ids_to_keep:
416
+ for domain_id in ids_to_keep:
414
417
  if (
415
- domain in self.graph.domain_id_to_domain_terms_map
416
- and domain in domain_id_to_centroid_map
418
+ domain_id in self.graph.domain_id_to_domain_terms_map
419
+ and domain_id in domain_id_to_centroid_map
417
420
  ):
418
- domain_centroid = domain_id_to_centroid_map[domain]
421
+ domain_centroid = domain_id_to_centroid_map[domain_id]
419
422
  # No need to filter the domain terms if it is in ids_to_keep
420
423
  _ = self._validate_and_update_domain(
421
- domain=domain,
424
+ domain_id=domain_id,
422
425
  domain_centroid=domain_centroid,
423
426
  domain_id_to_centroid_map=domain_id_to_centroid_map,
424
427
  ids_to_labels=ids_to_labels,
@@ -434,7 +437,7 @@ class Labels:
434
437
 
435
438
  def _process_remaining_domains(
436
439
  self,
437
- domain_id_to_centroid_map: Dict[str, np.ndarray],
440
+ domain_id_to_centroid_map: Dict[int, np.ndarray],
438
441
  ids_to_keep: Union[List[str], Tuple[str], np.ndarray],
439
442
  ids_to_labels: Union[Dict[int, str], None],
440
443
  words_to_omit: Union[List[str], None],
@@ -443,14 +446,14 @@ class Labels:
443
446
  max_label_lines: int,
444
447
  min_chars_per_line: int,
445
448
  max_chars_per_line: int,
446
- filtered_domain_centroids: Dict[str, np.ndarray],
447
- filtered_domain_terms: Dict[str, str],
449
+ filtered_domain_centroids: Dict[int, np.ndarray],
450
+ filtered_domain_terms: Dict[int, str],
448
451
  valid_indices: List[int],
449
452
  ) -> None:
450
453
  """Process remaining domains to fill in additional labels, respecting the remaining_labels limit.
451
454
 
452
455
  Args:
453
- domain_id_to_centroid_map (Dict[str, np.ndarray]): Mapping of domain IDs to their centroids.
456
+ domain_id_to_centroid_map (Dict[int, np.ndarray]): Mapping of domain IDs to their centroids.
454
457
  ids_to_keep (List, Tuple, or np.ndarray, optional): IDs of domains that must be labeled.
455
458
  ids_to_labels (Dict[int, str], None, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
456
459
  words_to_omit (List, optional): List of words to omit from the labels. Defaults to None.
@@ -459,7 +462,7 @@ class Labels:
459
462
  max_label_lines (int): Maximum number of lines in a label.
460
463
  min_chars_per_line (int): Minimum number of characters in a line to display.
461
464
  max_chars_per_line (int): Maximum number of characters in a line to display.
462
- filtered_domain_centroids (Dict[str, np.ndarray]): Dictionary to store filtered domain centroids (output).
465
+ filtered_domain_centroids (Dict[int, np.ndarray]): Dictionary to store filtered domain centroids (output).
463
466
  filtered_domain_terms (Dict[str, str]): Dictionary to store filtered domain terms (output).
464
467
  valid_indices (List): List to store valid indices (output).
465
468
 
@@ -480,24 +483,24 @@ class Labels:
480
483
  return np.linalg.norm(centroid1 - centroid2)
481
484
 
482
485
  # Domains to plot on network
483
- selected_domains = []
486
+ selected_domain_ids = []
484
487
  # Find the farthest apart domains using centroids
485
488
  if remaining_domains and remaining_labels:
486
489
  first_domain = next(iter(remaining_domains)) # Pick the first domain to start
487
- selected_domains.append(first_domain)
490
+ selected_domain_ids.append(first_domain)
488
491
 
489
- while len(selected_domains) < remaining_labels:
492
+ while len(selected_domain_ids) < remaining_labels:
490
493
  farthest_domain = None
491
494
  max_distance = -1
492
495
  # Find the domain farthest from any already selected domain
493
496
  for candidate_domain, candidate_centroid in remaining_domains.items():
494
- if candidate_domain in selected_domains:
497
+ if candidate_domain in selected_domain_ids:
495
498
  continue
496
499
 
497
500
  # Calculate the minimum distance to any selected domain
498
501
  min_distance = min(
499
502
  calculate_distance(candidate_centroid, remaining_domains[dom])
500
- for dom in selected_domains
503
+ for dom in selected_domain_ids
501
504
  )
502
505
  # Update the farthest domain if the minimum distance is greater
503
506
  if min_distance > max_distance:
@@ -506,15 +509,15 @@ class Labels:
506
509
 
507
510
  # Add the farthest domain to the selected domains
508
511
  if farthest_domain:
509
- selected_domains.append(farthest_domain)
512
+ selected_domain_ids.append(farthest_domain)
510
513
  else:
511
514
  break # No more domains to select
512
515
 
513
516
  # Process the selected domains and add to filtered lists
514
- for domain in selected_domains:
515
- domain_centroid = remaining_domains[domain]
517
+ for domain_id in selected_domain_ids:
518
+ domain_centroid = remaining_domains[domain_id]
516
519
  is_domain_valid = self._validate_and_update_domain(
517
- domain=domain,
520
+ domain_id=domain_id,
518
521
  domain_centroid=domain_centroid,
519
522
  domain_id_to_centroid_map=domain_id_to_centroid_map,
520
523
  ids_to_labels=ids_to_labels,
@@ -535,45 +538,45 @@ class Labels:
535
538
 
536
539
  def _validate_and_update_domain(
537
540
  self,
538
- domain: str,
541
+ domain_id: int,
539
542
  domain_centroid: np.ndarray,
540
- domain_id_to_centroid_map: Dict[str, np.ndarray],
543
+ domain_id_to_centroid_map: Dict[int, np.ndarray],
541
544
  ids_to_labels: Union[Dict[int, str], None],
542
545
  words_to_omit: Union[List[str], None],
543
546
  min_label_lines: int,
544
547
  max_label_lines: int,
545
548
  min_chars_per_line: int,
546
549
  max_chars_per_line: int,
547
- filtered_domain_centroids: Dict[str, np.ndarray],
548
- filtered_domain_terms: Dict[str, str],
550
+ filtered_domain_centroids: Dict[int, np.ndarray],
551
+ filtered_domain_terms: Dict[int, str],
549
552
  valid_indices: List[int],
550
553
  ) -> bool:
551
554
  """Validate and process the domain terms, updating relevant dictionaries if valid.
552
555
 
553
556
  Args:
554
- domain (str): Domain ID to process.
557
+ domain_id (int): Domain ID to process.
555
558
  domain_centroid (np.ndarray): Centroid position of the domain.
556
- domain_id_to_centroid_map (Dict[str, np.ndarray]): Mapping of domain IDs to their centroids.
559
+ domain_id_to_centroid_map (Dict[int, np.ndarray]): Mapping of domain IDs to their centroids.
557
560
  ids_to_labels (Dict[int, str], None, optional): A dictionary mapping domain IDs to custom labels. Defaults to None.
558
561
  words_to_omit (List[str], None, optional): List of words to omit from the labels. Defaults to None.
559
562
  min_label_lines (int): Minimum number of lines required in a label.
560
563
  max_label_lines (int): Maximum number of lines allowed in a label.
561
564
  min_chars_per_line (int): Minimum number of characters allowed per line.
562
565
  max_chars_per_line (int): Maximum number of characters allowed per line.
563
- filtered_domain_centroids (Dict[str, np.ndarray]): Dictionary to store valid domain centroids.
566
+ filtered_domain_centroids (Dict[int, np.ndarray]): Dictionary to store valid domain centroids.
564
567
  filtered_domain_terms (Dict[str, str]): Dictionary to store valid domain terms.
565
568
  valid_indices (List[int]): List of valid domain indices.
566
569
 
567
570
  Returns:
568
571
  bool: True if the domain is valid and added to the filtered dictionaries, False otherwise.
569
572
  """
570
- if ids_to_labels and domain in ids_to_labels:
573
+ if ids_to_labels and domain_id in ids_to_labels:
571
574
  # Directly use custom labels without filtering
572
- domain_terms = ids_to_labels[domain]
575
+ domain_terms = ids_to_labels[domain_id]
573
576
  else:
574
577
  # Process the domain terms automatically
575
578
  domain_terms = self._process_terms(
576
- domain=domain,
579
+ domain_id=domain_id,
577
580
  words_to_omit=words_to_omit,
578
581
  max_label_lines=max_label_lines,
579
582
  min_chars_per_line=min_chars_per_line,
@@ -590,24 +593,24 @@ class Labels:
590
593
  return False
591
594
 
592
595
  # Store the valid terms and centroids
593
- filtered_domain_centroids[domain] = domain_centroid
594
- filtered_domain_terms[domain] = domain_terms
595
- valid_indices.append(list(domain_id_to_centroid_map.keys()).index(domain))
596
+ filtered_domain_centroids[domain_id] = domain_centroid
597
+ filtered_domain_terms[domain_id] = domain_terms
598
+ valid_indices.append(list(domain_id_to_centroid_map.keys()).index(domain_id))
596
599
 
597
600
  return True
598
601
 
599
602
  def _process_terms(
600
603
  self,
601
- domain: str,
604
+ domain_id: int,
602
605
  words_to_omit: Union[List[str], None],
603
606
  max_label_lines: int,
604
607
  min_chars_per_line: int,
605
608
  max_chars_per_line: int,
606
- ) -> List[str]:
609
+ ) -> str:
607
610
  """Process terms for a domain, applying word length constraints and combining words where appropriate.
608
611
 
609
612
  Args:
610
- domain (str): The domain being processed.
613
+ domain_id (int): Domain ID to process.
611
614
  words_to_omit (List[str], None): List of words to omit from the labels.
612
615
  max_label_lines (int): Maximum number of lines in a label.
613
616
  min_chars_per_line (int): Minimum number of characters in a line to display.
@@ -617,7 +620,7 @@ class Labels:
617
620
  str: Processed terms separated by TERM_DELIMITER, with words combined if necessary to fit within constraints.
618
621
  """
619
622
  # Set custom labels from significant terms
620
- terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
623
+ terms = self.graph.domain_id_to_domain_terms_map[domain_id].split(" ")
621
624
  # Apply words_to_omit and word length constraints
622
625
  if words_to_omit:
623
626
  terms = [
@@ -627,7 +630,7 @@ class Labels:
627
630
  ]
628
631
 
629
632
  # Use the combine_words function directly to handle word combinations and length constraints
630
- compressed_terms = _combine_words(tuple(terms), max_chars_per_line, max_label_lines)
633
+ compressed_terms = self._combine_words(list(terms), max_chars_per_line, max_label_lines)
631
634
 
632
635
  return compressed_terms
633
636
 
@@ -642,8 +645,8 @@ class Labels:
642
645
  scale_factor: float = 1.0,
643
646
  ids_to_colors: Union[Dict[int, Any], None] = None,
644
647
  random_seed: int = 888,
645
- ) -> np.ndarray:
646
- """Get colors for the labels based on node annotations or a specified colormap.
648
+ ) -> List:
649
+ """Get colors for the labels based on node annotation or a specified colormap.
647
650
 
648
651
  Args:
649
652
  cmap (str, optional): Name of the colormap to use for generating label colors. Defaults to "gist_rainbow".
@@ -661,7 +664,7 @@ class Labels:
661
664
  random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
662
665
 
663
666
  Returns:
664
- np.ndarray: Array of RGBA colors for label annotations.
667
+ List: Array of RGBA colors for label annotation.
665
668
  """
666
669
  return get_annotated_domain_colors(
667
670
  graph=self.graph,
@@ -676,249 +679,255 @@ class Labels:
676
679
  random_seed=random_seed,
677
680
  )
678
681
 
682
+ def _combine_words(
683
+ self, words: List[str], max_chars_per_line: int, max_label_lines: int
684
+ ) -> str:
685
+ """Combine words to fit within the max_chars_per_line and max_label_lines constraints,
686
+ and separate the final output by TERM_DELIMITER for plotting.
679
687
 
680
- def _combine_words(words: List[str], max_chars_per_line: int, max_label_lines: int) -> str:
681
- """Combine words to fit within the max_chars_per_line and max_label_lines constraints,
682
- and separate the final output by TERM_DELIMITER for plotting.
683
-
684
- Args:
685
- words (List[str]): List of words to combine.
686
- max_chars_per_line (int): Maximum number of characters in a line to display.
687
- max_label_lines (int): Maximum number of lines in a label.
688
-
689
- Returns:
690
- str: String of combined words separated by ':' for line breaks.
691
- """
692
-
693
- def try_combinations(words_batch: List[str]) -> List[str]:
694
- """Try to combine words within a batch and return them with combined words separated by ':'."""
695
- combined_lines = []
696
- i = 0
697
- while i < len(words_batch):
698
- current_word = words_batch[i]
699
- combined_word = current_word # Start with the current word
700
- # Try to combine more words if possible, and ensure the combination fits within max_length
701
- for j in range(i + 1, len(words_batch)):
702
- next_word = words_batch[j]
703
- # Ensure that the combined word fits within the max_chars_per_line limit
704
- if len(combined_word) + len(next_word) + 1 <= max_chars_per_line: # +1 for space
705
- combined_word = f"{combined_word} {next_word}"
706
- i += 1 # Move past the combined word
707
- else:
708
- break # Stop combining if the length is exceeded
709
-
710
- # Add the combined word only if it fits within the max_chars_per_line limit
711
- if len(combined_word) <= max_chars_per_line:
712
- combined_lines.append(combined_word) # Add the combined word
713
- # Move to the next word
714
- i += 1
715
-
716
- # Stop if we've reached the max_label_lines limit
717
- if len(combined_lines) >= max_label_lines:
718
- break
719
-
720
- return combined_lines
721
-
722
- # Main logic: start with max_label_lines number of words
723
- combined_lines = try_combinations(words[:max_label_lines])
724
- remaining_words = words[max_label_lines:] # Remaining words after the initial batch
725
- # Track words that have already been added
726
- existing_words = set(" ".join(combined_lines).split())
727
-
728
- # Continue pulling more words until we fill the lines
729
- while remaining_words and len(combined_lines) < max_label_lines:
730
- available_slots = max_label_lines - len(combined_lines)
731
- words_to_add = [
732
- word for word in remaining_words[:available_slots] if word not in existing_words
688
+ Args:
689
+ words (List[str]): List of words to combine.
690
+ max_chars_per_line (int): Maximum number of characters in a line to display.
691
+ max_label_lines (int): Maximum number of lines in a label.
692
+
693
+ Returns:
694
+ str: String of combined words separated by ':' for line breaks.
695
+ """
696
+
697
+ def try_combinations(words_batch: List[str]) -> List[str]:
698
+ """Try to combine words within a batch and return them with combined words separated by ':'."""
699
+ combined_lines = []
700
+ i = 0
701
+ while i < len(words_batch):
702
+ current_word = words_batch[i]
703
+ combined_word = current_word # Start with the current word
704
+ # Try to combine more words if possible, and ensure the combination fits within max_length
705
+ for j in range(i + 1, len(words_batch)):
706
+ next_word = words_batch[j]
707
+ # Ensure that the combined word fits within the max_chars_per_line limit
708
+ if (
709
+ len(combined_word) + len(next_word) + 1 <= max_chars_per_line
710
+ ): # +1 for space
711
+ combined_word = f"{combined_word} {next_word}"
712
+ i += 1 # Move past the combined word
713
+ else:
714
+ break # Stop combining if the length is exceeded
715
+
716
+ # Add the combined word only if it fits within the max_chars_per_line limit
717
+ if len(combined_word) <= max_chars_per_line:
718
+ combined_lines.append(combined_word) # Add the combined word
719
+ # Move to the next word
720
+ i += 1
721
+
722
+ # Stop if we've reached the max_label_lines limit
723
+ if len(combined_lines) >= max_label_lines:
724
+ break
725
+
726
+ return combined_lines
727
+
728
+ # Main logic: start with max_label_lines number of words
729
+ combined_lines = try_combinations(words[:max_label_lines])
730
+ remaining_words = words[max_label_lines:] # Remaining words after the initial batch
731
+ # Track words that have already been added
732
+ existing_words = set(" ".join(combined_lines).split())
733
+
734
+ # Continue pulling more words until we fill the lines
735
+ while remaining_words and len(combined_lines) < max_label_lines:
736
+ available_slots = max_label_lines - len(combined_lines)
737
+ words_to_add = [
738
+ word for word in remaining_words[:available_slots] if word not in existing_words
739
+ ]
740
+ remaining_words = remaining_words[available_slots:]
741
+ # Update the existing words set
742
+ existing_words.update(words_to_add)
743
+ # Add to combined_lines only unique words
744
+ combined_lines += try_combinations(words_to_add)
745
+
746
+ # Join the final combined lines with TERM_DELIMITER, a special separator for line breaks
747
+ return TERM_DELIMITER.join(combined_lines[:max_label_lines])
748
+
749
+ def _calculate_best_label_positions(
750
+ self,
751
+ filtered_domain_centroids: Dict[int, Any],
752
+ center: Tuple[float, float],
753
+ radius: float,
754
+ offset: float,
755
+ ) -> Dict[int, Any]:
756
+ """Calculate and optimize label positions for clarity.
757
+
758
+ Args:
759
+ filtered_domain_centroids (Dict[int, Any]): Centroids of the filtered domains.
760
+ center (Tuple[float, float]): The center point around which labels are positioned.
761
+ radius (float): The radius for positioning labels around the center.
762
+ offset (float): The offset distance from the radius for positioning labels.
763
+
764
+ Returns:
765
+ Dict[int, Any]: Optimized positions for labels.
766
+ """
767
+ num_domains = len(filtered_domain_centroids)
768
+ # Calculate equidistant positions around the center for initial label placement
769
+ equidistant_positions = self._calculate_equidistant_positions_around_center(
770
+ center, radius, offset, num_domains
771
+ )
772
+ # Create a mapping of domains to their initial label positions
773
+ label_positions = dict(zip(filtered_domain_centroids.keys(), equidistant_positions))
774
+ # Optimize the label positions to minimize distance to domain centroids
775
+ return self._optimize_label_positions(label_positions, filtered_domain_centroids)
776
+
777
+ def _calculate_equidistant_positions_around_center(
778
+ self, center: Tuple[float, float], radius: float, label_offset: float, num_domains: int
779
+ ) -> List[np.ndarray]:
780
+ """Calculate positions around a center at equidistant angles.
781
+
782
+ Args:
783
+ center (Tuple[float, float]): The center point around which positions are calculated.
784
+ radius (float): The radius at which positions are calculated.
785
+ label_offset (float): The offset added to the radius for label positioning.
786
+ num_domains (int): The number of positions (or domains) to calculate.
787
+
788
+ Returns:
789
+ List[np.ndarray]: List of positions (as 2D numpy arrays) around the center.
790
+ """
791
+ # Calculate equidistant angles in radians around the center
792
+ angles = np.linspace(0, 2 * np.pi, num_domains, endpoint=False)
793
+ # Compute the positions around the center using the angles
794
+ return [
795
+ center + (radius + label_offset) * np.array([np.cos(angle), np.sin(angle)])
796
+ for angle in angles
733
797
  ]
734
- remaining_words = remaining_words[available_slots:]
735
- # Update the existing words set
736
- existing_words.update(words_to_add)
737
- # Add to combined_lines only unique words
738
- combined_lines += try_combinations(words_to_add)
739
-
740
- # Join the final combined lines with TERM_DELIMITER, a special separator for line breaks
741
- return TERM_DELIMITER.join(combined_lines[:max_label_lines])
742
-
743
-
744
- def _calculate_best_label_positions(
745
- filtered_domain_centroids: Dict[str, Any], center: np.ndarray, radius: float, offset: float
746
- ) -> Dict[str, Any]:
747
- """Calculate and optimize label positions for clarity.
748
-
749
- Args:
750
- filtered_domain_centroids (Dict[str, Any]): Centroids of the filtered domains.
751
- center (np.ndarray): The center coordinates for label positioning.
752
- radius (float): The radius for positioning labels around the center.
753
- offset (float): The offset distance from the radius for positioning labels.
754
-
755
- Returns:
756
- Dict[str, Any]: Optimized positions for labels.
757
- """
758
- num_domains = len(filtered_domain_centroids)
759
- # Calculate equidistant positions around the center for initial label placement
760
- equidistant_positions = _calculate_equidistant_positions_around_center(
761
- center, radius, offset, num_domains
762
- )
763
- # Create a mapping of domains to their initial label positions
764
- label_positions = dict(zip(filtered_domain_centroids.keys(), equidistant_positions))
765
- # Optimize the label positions to minimize distance to domain centroids
766
- return _optimize_label_positions(label_positions, filtered_domain_centroids)
767
-
768
-
769
- def _calculate_equidistant_positions_around_center(
770
- center: np.ndarray, radius: float, label_offset: float, num_domains: int
771
- ) -> List[np.ndarray]:
772
- """Calculate positions around a center at equidistant angles.
773
-
774
- Args:
775
- center (np.ndarray): The central point around which positions are calculated.
776
- radius (float): The radius at which positions are calculated.
777
- label_offset (float): The offset added to the radius for label positioning.
778
- num_domains (int): The number of positions (or domains) to calculate.
779
-
780
- Returns:
781
- List[np.ndarray]: List of positions (as 2D numpy arrays) around the center.
782
- """
783
- # Calculate equidistant angles in radians around the center
784
- angles = np.linspace(0, 2 * np.pi, num_domains, endpoint=False)
785
- # Compute the positions around the center using the angles
786
- return [
787
- center + (radius + label_offset) * np.array([np.cos(angle), np.sin(angle)])
788
- for angle in angles
789
- ]
790
-
791
-
792
- def _optimize_label_positions(
793
- best_label_positions: Dict[str, Any], domain_centroids: Dict[str, Any]
794
- ) -> Dict[str, Any]:
795
- """Optimize label positions around the perimeter to minimize total distance to centroids.
796
-
797
- Args:
798
- best_label_positions (Dict[str, Any]): Initial positions of labels around the perimeter.
799
- domain_centroids (Dict[str, Any]): Centroid positions of the domains.
800
-
801
- Returns:
802
- Dict[str, Any]: Optimized label positions.
803
- """
804
- while True:
805
- improvement = False # Start each iteration assuming no improvement
806
- # Iterate through each pair of labels to check for potential improvements
807
- for i in range(len(domain_centroids)):
808
- for j in range(i + 1, len(domain_centroids)):
809
- # Calculate the current total distance
810
- current_distance = _calculate_total_distance(best_label_positions, domain_centroids)
811
- # Evaluate the total distance after swapping two labels
812
- swapped_distance = _swap_and_evaluate(best_label_positions, i, j, domain_centroids)
813
- # If the swap improves the total distance, perform the swap
814
- if swapped_distance < current_distance:
815
- labels = list(best_label_positions.keys())
816
- best_label_positions[labels[i]], best_label_positions[labels[j]] = (
817
- best_label_positions[labels[j]],
818
- best_label_positions[labels[i]],
798
+
799
+ def _optimize_label_positions(
800
+ self, best_label_positions: Dict[int, Any], domain_centroids: Dict[int, Any]
801
+ ) -> Dict[int, Any]:
802
+ """Optimize label positions around the perimeter to minimize total distance to centroids.
803
+
804
+ Args:
805
+ best_label_positions (Dict[int, Any]): Initial positions of labels around the perimeter.
806
+ domain_centroids (Dict[int, Any]): Centroid positions of the domains.
807
+
808
+ Returns:
809
+ Dict[int, Any]: Optimized label positions.
810
+ """
811
+ while True:
812
+ improvement = False # Start each iteration assuming no improvement
813
+ # Iterate through each pair of labels to check for potential improvements
814
+ for i in range(len(domain_centroids)):
815
+ for j in range(i + 1, len(domain_centroids)):
816
+ # Calculate the current total distance
817
+ current_distance = self._calculate_total_distance(
818
+ best_label_positions, domain_centroids
819
819
  )
820
- improvement = True # Found an improvement, so continue optimizing
821
-
822
- if not improvement:
823
- break # Exit the loop if no improvement was found in this iteration
824
-
825
- return best_label_positions
826
-
827
-
828
- def _calculate_total_distance(
829
- label_positions: Dict[str, Any], domain_centroids: Dict[str, Any]
830
- ) -> float:
831
- """Calculate the total distance from label positions to their domain centroids.
832
-
833
- Args:
834
- label_positions (Dict[str, Any]): Positions of labels around the perimeter.
835
- domain_centroids (Dict[str, Any]): Centroid positions of the domains.
836
-
837
- Returns:
838
- float: The total distance from labels to centroids.
839
- """
840
- total_distance = 0
841
- # Iterate through each domain and calculate the distance to its centroid
842
- for domain, pos in label_positions.items():
843
- centroid = domain_centroids[domain]
844
- total_distance += np.linalg.norm(centroid - pos)
845
-
846
- return total_distance
847
-
848
-
849
- def _swap_and_evaluate(
850
- label_positions: Dict[str, Any],
851
- i: int,
852
- j: int,
853
- domain_centroids: Dict[str, Any],
854
- ) -> float:
855
- """Swap two labels and evaluate the total distance after the swap.
856
-
857
- Args:
858
- label_positions (Dict[str, Any]): Positions of labels around the perimeter.
859
- i (int): Index of the first label to swap.
860
- j (int): Index of the second label to swap.
861
- domain_centroids (Dict[str, Any]): Centroid positions of the domains.
862
-
863
- Returns:
864
- float: The total distance after swapping the two labels.
865
- """
866
- # Get the list of labels from the dictionary keys
867
- labels = list(label_positions.keys())
868
- swapped_positions = copy.deepcopy(label_positions)
869
- # Swap the positions of the two specified labels
870
- swapped_positions[labels[i]], swapped_positions[labels[j]] = (
871
- swapped_positions[labels[j]],
872
- swapped_positions[labels[i]],
873
- )
874
- # Calculate and return the total distance after the swap
875
- return _calculate_total_distance(swapped_positions, domain_centroids)
876
-
877
-
878
- def _apply_str_transformation(
879
- words: List[str], transformation: Union[str, Dict[str, str]]
880
- ) -> List[str]:
881
- """Apply a user-specified case transformation to each word in the list without appending duplicates.
882
-
883
- Args:
884
- words (List[str]): A list of words to transform.
885
- transformation (Union[str, Dict[str, str]]): A single transformation (e.g., 'lower', 'upper', 'title', 'capitalize')
886
- or a dictionary mapping cases ('lower', 'upper', 'title') to transformations (e.g., 'lower'='upper').
887
-
888
- Returns:
889
- List[str]: A list of transformed words with no duplicates.
890
- """
891
- # Initialize a list to store transformed words
892
- transformed_words = []
893
- for word in words:
894
- # Split word into subwords by space
895
- subwords = word.split(" ")
896
- transformed_subwords = []
897
- # Apply transformation to each subword
898
- for subword in subwords:
899
- transformed_subword = subword # Start with the original subword
900
- # If transformation is a string, apply it to all subwords
901
- if isinstance(transformation, str):
902
- if hasattr(subword, transformation):
903
- transformed_subword = getattr(subword, transformation)()
904
-
905
- # If transformation is a dictionary, apply case-specific transformations
906
- elif isinstance(transformation, dict):
907
- for case_type, transform in transformation.items():
908
- if case_type == "lower" and subword.islower() and transform:
909
- transformed_subword = getattr(subword, transform)()
910
- elif case_type == "upper" and subword.isupper() and transform:
911
- transformed_subword = getattr(subword, transform)()
912
- elif case_type == "title" and subword.istitle() and transform:
913
- transformed_subword = getattr(subword, transform)()
914
-
915
- # Append the transformed subword to the list
916
- transformed_subwords.append(transformed_subword)
917
-
918
- # Rejoin the transformed subwords into a single string to preserve structure
919
- transformed_word = " ".join(transformed_subwords)
920
- # Only append if the transformed word is not already in the list
921
- if transformed_word not in transformed_words:
922
- transformed_words.append(transformed_word)
923
-
924
- return transformed_words
820
+ # Evaluate the total distance after swapping two labels
821
+ swapped_distance = self._swap_and_evaluate(
822
+ best_label_positions, i, j, domain_centroids
823
+ )
824
+ # If the swap improves the total distance, perform the swap
825
+ if swapped_distance < current_distance:
826
+ labels = list(best_label_positions.keys())
827
+ best_label_positions[labels[i]], best_label_positions[labels[j]] = (
828
+ best_label_positions[labels[j]],
829
+ best_label_positions[labels[i]],
830
+ )
831
+ improvement = True # Found an improvement, so continue optimizing
832
+
833
+ if not improvement:
834
+ break # Exit the loop if no improvement was found in this iteration
835
+
836
+ return best_label_positions
837
+
838
+ def _calculate_total_distance(
839
+ self, label_positions: Dict[int, Any], domain_centroids: Dict[int, Any]
840
+ ) -> float:
841
+ """Calculate the total distance from label positions to their domain centroids.
842
+
843
+ Args:
844
+ label_positions (Dict[int, Any]): Positions of labels around the perimeter.
845
+ domain_centroids (Dict[int, Any]): Centroid positions of the domains.
846
+
847
+ Returns:
848
+ float: The total distance from labels to centroids.
849
+ """
850
+ total_distance = 0.0
851
+ # Iterate through each domain and calculate the distance to its centroid
852
+ for domain, pos in label_positions.items():
853
+ centroid = domain_centroids[domain]
854
+ total_distance += float(np.linalg.norm(centroid - pos))
855
+
856
+ return total_distance
857
+
858
+ def _swap_and_evaluate(
859
+ self,
860
+ label_positions: Dict[int, Any],
861
+ i: int,
862
+ j: int,
863
+ domain_centroids: Dict[int, Any],
864
+ ) -> float:
865
+ """Swap two labels and evaluate the total distance after the swap.
866
+
867
+ Args:
868
+ label_positions (Dict[int, Any]): Positions of labels around the perimeter.
869
+ i (int): Index of the first label to swap.
870
+ j (int): Index of the second label to swap.
871
+ domain_centroids (Dict[int, Any]): Centroid positions of the domains.
872
+
873
+ Returns:
874
+ float: The total distance after swapping the two labels.
875
+ """
876
+ # Get the list of labels from the dictionary keys
877
+ labels = list(label_positions.keys())
878
+ swapped_positions = copy.deepcopy(label_positions)
879
+ # Swap the positions of the two specified labels
880
+ swapped_positions[labels[i]], swapped_positions[labels[j]] = (
881
+ swapped_positions[labels[j]],
882
+ swapped_positions[labels[i]],
883
+ )
884
+ # Calculate and return the total distance after the swap
885
+ return self._calculate_total_distance(swapped_positions, domain_centroids)
886
+
887
+ def _apply_str_transformation(
888
+ self, words: List[str], transformation: Union[str, Dict[str, str]]
889
+ ) -> List[str]:
890
+ """Apply a user-specified case transformation to each word in the list without appending duplicates.
891
+
892
+ Args:
893
+ words (List[str]): A list of words to transform.
894
+ transformation (Union[str, Dict[str, str]]): A single transformation (e.g., 'lower', 'upper', 'title', 'capitalize')
895
+ or a dictionary mapping cases ('lower', 'upper', 'title') to transformations (e.g., 'lower'='upper').
896
+
897
+ Returns:
898
+ List[str]: A list of transformed words with no duplicates.
899
+ """
900
+ # Initialize a list to store transformed words
901
+ transformed_words = []
902
+ for word in words:
903
+ # Split word into subwords by space
904
+ subwords = word.split(" ")
905
+ transformed_subwords = []
906
+ # Apply transformation to each subword
907
+ for subword in subwords:
908
+ transformed_subword = subword # Start with the original subword
909
+ # If transformation is a string, apply it to all subwords
910
+ if isinstance(transformation, str):
911
+ if hasattr(subword, transformation):
912
+ transformed_subword = getattr(subword, transformation)()
913
+
914
+ # If transformation is a dictionary, apply case-specific transformations
915
+ elif isinstance(transformation, dict):
916
+ for case_type, transform in transformation.items():
917
+ if case_type == "lower" and subword.islower() and transform:
918
+ transformed_subword = getattr(subword, transform)()
919
+ elif case_type == "upper" and subword.isupper() and transform:
920
+ transformed_subword = getattr(subword, transform)()
921
+ elif case_type == "title" and subword.istitle() and transform:
922
+ transformed_subword = getattr(subword, transform)()
923
+
924
+ # Append the transformed subword to the list
925
+ transformed_subwords.append(transformed_subword)
926
+
927
+ # Rejoin the transformed subwords into a single string to preserve structure
928
+ transformed_word = " ".join(transformed_subwords)
929
+ # Only append if the transformed word is not already in the list
930
+ if transformed_word not in transformed_words:
931
+ transformed_words.append(transformed_word)
932
+
933
+ return transformed_words