pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show
  1. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
  2. pg_sui-1.6.8.dist-info/RECORD +78 -0
  3. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
  5. pg_sui-1.6.8.dist-info/top_level.txt +1 -0
  6. pgsui/__init__.py +35 -54
  7. pgsui/_version.py +34 -0
  8. pgsui/cli.py +635 -0
  9. pgsui/data_processing/config.py +576 -0
  10. pgsui/data_processing/containers.py +1782 -0
  11. pgsui/data_processing/transformers.py +121 -1103
  12. pgsui/electron/app/__main__.py +5 -0
  13. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  14. pgsui/electron/app/icons/icons/128x128.png +0 -0
  15. pgsui/electron/app/icons/icons/16x16.png +0 -0
  16. pgsui/electron/app/icons/icons/24x24.png +0 -0
  17. pgsui/electron/app/icons/icons/256x256.png +0 -0
  18. pgsui/electron/app/icons/icons/32x32.png +0 -0
  19. pgsui/electron/app/icons/icons/48x48.png +0 -0
  20. pgsui/electron/app/icons/icons/512x512.png +0 -0
  21. pgsui/electron/app/icons/icons/64x64.png +0 -0
  22. pgsui/electron/app/icons/icons/icon.icns +0 -0
  23. pgsui/electron/app/icons/icons/icon.ico +0 -0
  24. pgsui/electron/app/main.js +189 -0
  25. pgsui/electron/app/package-lock.json +6893 -0
  26. pgsui/electron/app/package.json +50 -0
  27. pgsui/electron/app/preload.js +15 -0
  28. pgsui/electron/app/server.py +146 -0
  29. pgsui/electron/app/ui/logo.png +0 -0
  30. pgsui/electron/app/ui/renderer.js +130 -0
  31. pgsui/electron/app/ui/styles.css +59 -0
  32. pgsui/electron/app/ui/ui_shim.js +72 -0
  33. pgsui/electron/bootstrap.py +43 -0
  34. pgsui/electron/launch.py +59 -0
  35. pgsui/electron/package.json +14 -0
  36. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  37. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  38. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  39. pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
  40. pgsui/impute/deterministic/imputers/mode.py +679 -0
  41. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  42. pgsui/impute/deterministic/imputers/phylo.py +971 -0
  43. pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
  44. pgsui/impute/supervised/base.py +339 -0
  45. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
  46. pgsui/impute/supervised/imputers/random_forest.py +287 -0
  47. pgsui/impute/unsupervised/base.py +924 -0
  48. pgsui/impute/unsupervised/callbacks.py +89 -263
  49. pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
  50. pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
  51. pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
  52. pgsui/impute/unsupervised/imputers/vae.py +957 -0
  53. pgsui/impute/unsupervised/loss_functions.py +158 -0
  54. pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
  55. pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
  56. pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
  57. pgsui/impute/unsupervised/models/vae_model.py +259 -618
  58. pgsui/impute/unsupervised/nn_scorers.py +215 -0
  59. pgsui/utils/classification_viz.py +591 -0
  60. pgsui/utils/misc.py +35 -480
  61. pgsui/utils/plotting.py +514 -824
  62. pgsui/utils/scorers.py +212 -438
  63. pg_sui-1.0.2.1.dist-info/RECORD +0 -75
  64. pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
  65. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  66. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  67. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  68. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  69. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  70. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  71. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  72. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  73. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  74. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  75. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  76. pgsui/example_data/trees/test.iqtree +0 -376
  77. pgsui/example_data/trees/test.qmat +0 -5
  78. pgsui/example_data/trees/test.rate +0 -2033
  79. pgsui/example_data/trees/test.tre +0 -1
  80. pgsui/example_data/trees/test_n10.rate +0 -19
  81. pgsui/example_data/trees/test_n100.rate +0 -109
  82. pgsui/example_data/trees/test_n500.rate +0 -509
  83. pgsui/example_data/trees/test_siterates.txt +0 -2024
  84. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  85. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  86. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  87. pgsui/example_data/vcf_files/test.vcf +0 -244
  88. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  89. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  90. pgsui/impute/estimators.py +0 -735
  91. pgsui/impute/impute.py +0 -1486
  92. pgsui/impute/simple_imputers.py +0 -1439
  93. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
  94. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
  95. pgsui/impute/unsupervised/keras_classifiers.py +0 -702
  96. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  97. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
  98. pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
  99. pgsui/pg_sui.py +0 -261
  100. pgsui/utils/sequence_tools.py +0 -407
  101. simulation/sim_benchmarks.py +0 -333
  102. simulation/sim_treeparams.py +0 -475
  103. test/__init__.py +0 -0
  104. test/pg_sui_simtest.py +0 -215
  105. test/pg_sui_testing.py +0 -523
  106. test/test.py +0 -297
  107. test/test_pgsui.py +0 -374
  108. test/test_tkc.py +0 -214
  109. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
  110. /pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  111. /pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  112. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
@@ -0,0 +1,158 @@
1
+ from typing import List, Literal
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+
8
+ class WeightedMaskedCCELoss(nn.Module):
9
+ def __init__(
10
+ self,
11
+ alpha: float | List[float] | torch.Tensor | None = None,
12
+ reduction: Literal["mean", "sum"] = "mean",
13
+ ):
14
+ """A weighted, masked Categorical Cross-Entropy loss function.
15
+
16
+ This method computes the categorical cross-entropy loss while allowing for class weights and masking of invalid (missing) entries. It is particularly useful for sequence data where some positions may be missing or should not contribute to the loss calculation.
17
+
18
+ Args:
19
+ alpha (float | List | Tensor | None): A manual rescaling weight given to each class. If given, has to be a Tensor of size C (number of classes). Defaults to None.
20
+ reduction (str, optional): Specifies the reduction to apply to the output: 'mean' or 'sum'. Defaults to "mean".
21
+ """
22
+ super(WeightedMaskedCCELoss, self).__init__()
23
+ self.reduction = reduction
24
+ self.alpha = alpha
25
+
26
+ def forward(
27
+ self,
28
+ logits: torch.Tensor,
29
+ targets: torch.Tensor,
30
+ valid_mask: torch.Tensor | None = None,
31
+ ) -> torch.Tensor:
32
+ """Compute the masked categorical cross-entropy loss.
33
+
34
+ Args:
35
+ logits (torch.Tensor): Logits from the model of shape
36
+ (batch_size, seq_len, num_classes).
37
+ targets (torch.Tensor): Ground truth labels of shape (batch_size, seq_len).
38
+ valid_mask (torch.Tensor, optional): Boolean mask of shape (batch_size, seq_len) where True indicates a valid (observed) value to include in the loss.
39
+ Defaults to None, in which case all values are considered valid.
40
+
41
+ Returns:
42
+ torch.Tensor: The computed scalar loss value.
43
+ """
44
+ # Automatically detect the device from the input tensor
45
+ device = logits.device
46
+ num_classes = logits.shape[-1]
47
+
48
+ # Ensure targets are on the correct device and are Long type
49
+ targets = targets.to(device).long()
50
+
51
+ # Prepare weights and pass them directly to the loss function
52
+ class_weights = None
53
+ if self.alpha is not None:
54
+ if not isinstance(self.alpha, torch.Tensor):
55
+ class_weights = torch.tensor(
56
+ self.alpha, dtype=torch.float, device=device
57
+ )
58
+ else:
59
+ class_weights = self.alpha.to(device)
60
+
61
+ loss = F.cross_entropy(
62
+ logits.reshape(-1, num_classes),
63
+ targets.reshape(-1),
64
+ weight=class_weights,
65
+ reduction="none",
66
+ ignore_index=-1, # Ignore all targets with the value -1
67
+ )
68
+
69
+ # If a mask is provided, filter the losses for the training set
70
+ if valid_mask is not None:
71
+ loss = loss[valid_mask.reshape(-1)]
72
+
73
+ # If after masking no valid losses remain, return 0
74
+ if loss.numel() == 0:
75
+ return torch.tensor(0.0, device=device)
76
+
77
+ # Apply the final reduction
78
+ if self.reduction == "mean":
79
+ return loss.mean()
80
+ elif self.reduction == "sum":
81
+ return loss.sum()
82
+ else:
83
+ msg = f"Reduction mode '{self.reduction}' not supported."
84
+ raise ValueError(msg)
85
+
86
+
87
+ class MaskedFocalLoss(nn.Module):
88
+ """Focal loss (gamma > 0) with optional class weights and a boolean valid mask.
89
+
90
+ This method implements the focal loss function, which is designed to address class imbalance by down-weighting easy examples and focusing training on hard negatives. It also supports masking of invalid (missing) entries, making it suitable for sequence data with missing values.
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ gamma: float = 2.0,
96
+ alpha: torch.Tensor | None = None,
97
+ reduction: Literal["mean", "sum"] = "mean",
98
+ ):
99
+ """Initialize the MaskedFocalLoss.
100
+
101
+ This class sets up the focal loss with specified focusing parameter, class weights, and reduction method. It is designed to handle missing data through a valid mask, ensuring that only relevant entries contribute to the loss calculation.
102
+
103
+ Args:
104
+ gamma (float): Focusing parameter.
105
+ alpha (torch.Tensor | None): Class weights.
106
+ reduction (Literal["mean", "sum"]): Reduction mode ('mean' or 'sum').
107
+ """
108
+ super().__init__()
109
+ self.gamma = gamma
110
+ self.alpha = alpha
111
+ self.reduction = reduction
112
+
113
+ def forward(
114
+ self,
115
+ logits: torch.Tensor, # Expects (N, C) where N = batch*features
116
+ targets: torch.Tensor, # Expects (N,)
117
+ valid_mask: torch.Tensor, # Expects (N,)
118
+ ) -> torch.Tensor:
119
+ """Calculates the focal loss on pre-flattened tensors.
120
+
121
+ Args:
122
+ logits (torch.Tensor): Logits from the model of shape (N, C) where N is the number of samples (batch_size * seq_len) and C is the number of classes.
123
+ targets (torch.Tensor): Ground truth labels of shape (N,).
124
+ valid_mask (torch.Tensor): Boolean mask of shape (N,) where True indicates a valid (observed) value to include in the loss.
125
+
126
+ Returns:
127
+ torch.Tensor: The computed scalar loss value.
128
+ """
129
+ device = logits.device
130
+
131
+ # Calculate standard cross-entropy loss per-token (no reduction)
132
+ ce = F.cross_entropy(
133
+ logits,
134
+ targets,
135
+ weight=(self.alpha.to(device) if self.alpha is not None else None),
136
+ reduction="none",
137
+ ignore_index=-1,
138
+ )
139
+
140
+ # Calculate p_t from the cross-entropy loss
141
+ pt = torch.exp(-ce)
142
+ focal = ((1 - pt) ** self.gamma) * ce
143
+
144
+ # Apply the valid mask. We select only the elements that should contribute to the loss.
145
+ focal = focal[valid_mask]
146
+
147
+ # Return early if no valid elements exist to avoid NaN results
148
+ if focal.numel() == 0:
149
+ return torch.tensor(0.0, device=device)
150
+
151
+ # Apply reduction
152
+ if self.reduction == "mean":
153
+ return focal.mean()
154
+ elif self.reduction == "sum":
155
+ return focal.sum()
156
+ else:
157
+ msg = f"Reduction mode '{self.reduction}' not supported."
158
+ raise ValueError(msg)