pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pg-sui might be problematic. Click here for more details.
- {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
- pg_sui-1.6.8.dist-info/RECORD +78 -0
- {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
- pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
- pg_sui-1.6.8.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +635 -0
- pgsui/data_processing/config.py +576 -0
- pgsui/data_processing/containers.py +1782 -0
- pgsui/data_processing/transformers.py +121 -1103
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +189 -0
- pgsui/electron/app/package-lock.json +6893 -0
- pgsui/electron/app/package.json +50 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +146 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +130 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +59 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
- pgsui/impute/deterministic/imputers/mode.py +679 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +971 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
- pgsui/impute/supervised/base.py +339 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
- pgsui/impute/supervised/imputers/random_forest.py +287 -0
- pgsui/impute/unsupervised/base.py +924 -0
- pgsui/impute/unsupervised/callbacks.py +89 -263
- pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
- pgsui/impute/unsupervised/imputers/vae.py +957 -0
- pgsui/impute/unsupervised/loss_functions.py +158 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
- pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
- pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
- pgsui/impute/unsupervised/models/vae_model.py +259 -618
- pgsui/impute/unsupervised/nn_scorers.py +215 -0
- pgsui/utils/classification_viz.py +591 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +514 -824
- pgsui/utils/scorers.py +212 -438
- pg_sui-1.0.2.1.dist-info/RECORD +0 -75
- pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -735
- pgsui/impute/impute.py +0 -1486
- pgsui/impute/simple_imputers.py +0 -1439
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
- pgsui/impute/unsupervised/keras_classifiers.py +0 -702
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -297
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -214
- {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
- /pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- /pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
from typing import List, Literal
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
import torch.nn as nn
|
|
5
|
+
import torch.nn.functional as F
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class WeightedMaskedCCELoss(nn.Module):
|
|
9
|
+
def __init__(
|
|
10
|
+
self,
|
|
11
|
+
alpha: float | List[float] | torch.Tensor | None = None,
|
|
12
|
+
reduction: Literal["mean", "sum"] = "mean",
|
|
13
|
+
):
|
|
14
|
+
"""A weighted, masked Categorical Cross-Entropy loss function.
|
|
15
|
+
|
|
16
|
+
This method computes the categorical cross-entropy loss while allowing for class weights and masking of invalid (missing) entries. It is particularly useful for sequence data where some positions may be missing or should not contribute to the loss calculation.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
alpha (float | List | Tensor | None): A manual rescaling weight given to each class. If given, has to be a Tensor of size C (number of classes). Defaults to None.
|
|
20
|
+
reduction (str, optional): Specifies the reduction to apply to the output: 'mean' or 'sum'. Defaults to "mean".
|
|
21
|
+
"""
|
|
22
|
+
super(WeightedMaskedCCELoss, self).__init__()
|
|
23
|
+
self.reduction = reduction
|
|
24
|
+
self.alpha = alpha
|
|
25
|
+
|
|
26
|
+
def forward(
|
|
27
|
+
self,
|
|
28
|
+
logits: torch.Tensor,
|
|
29
|
+
targets: torch.Tensor,
|
|
30
|
+
valid_mask: torch.Tensor | None = None,
|
|
31
|
+
) -> torch.Tensor:
|
|
32
|
+
"""Compute the masked categorical cross-entropy loss.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
logits (torch.Tensor): Logits from the model of shape
|
|
36
|
+
(batch_size, seq_len, num_classes).
|
|
37
|
+
targets (torch.Tensor): Ground truth labels of shape (batch_size, seq_len).
|
|
38
|
+
valid_mask (torch.Tensor, optional): Boolean mask of shape (batch_size, seq_len) where True indicates a valid (observed) value to include in the loss.
|
|
39
|
+
Defaults to None, in which case all values are considered valid.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
torch.Tensor: The computed scalar loss value.
|
|
43
|
+
"""
|
|
44
|
+
# Automatically detect the device from the input tensor
|
|
45
|
+
device = logits.device
|
|
46
|
+
num_classes = logits.shape[-1]
|
|
47
|
+
|
|
48
|
+
# Ensure targets are on the correct device and are Long type
|
|
49
|
+
targets = targets.to(device).long()
|
|
50
|
+
|
|
51
|
+
# Prepare weights and pass them directly to the loss function
|
|
52
|
+
class_weights = None
|
|
53
|
+
if self.alpha is not None:
|
|
54
|
+
if not isinstance(self.alpha, torch.Tensor):
|
|
55
|
+
class_weights = torch.tensor(
|
|
56
|
+
self.alpha, dtype=torch.float, device=device
|
|
57
|
+
)
|
|
58
|
+
else:
|
|
59
|
+
class_weights = self.alpha.to(device)
|
|
60
|
+
|
|
61
|
+
loss = F.cross_entropy(
|
|
62
|
+
logits.reshape(-1, num_classes),
|
|
63
|
+
targets.reshape(-1),
|
|
64
|
+
weight=class_weights,
|
|
65
|
+
reduction="none",
|
|
66
|
+
ignore_index=-1, # Ignore all targets with the value -1
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# If a mask is provided, filter the losses for the training set
|
|
70
|
+
if valid_mask is not None:
|
|
71
|
+
loss = loss[valid_mask.reshape(-1)]
|
|
72
|
+
|
|
73
|
+
# If after masking no valid losses remain, return 0
|
|
74
|
+
if loss.numel() == 0:
|
|
75
|
+
return torch.tensor(0.0, device=device)
|
|
76
|
+
|
|
77
|
+
# Apply the final reduction
|
|
78
|
+
if self.reduction == "mean":
|
|
79
|
+
return loss.mean()
|
|
80
|
+
elif self.reduction == "sum":
|
|
81
|
+
return loss.sum()
|
|
82
|
+
else:
|
|
83
|
+
msg = f"Reduction mode '{self.reduction}' not supported."
|
|
84
|
+
raise ValueError(msg)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class MaskedFocalLoss(nn.Module):
|
|
88
|
+
"""Focal loss (gamma > 0) with optional class weights and a boolean valid mask.
|
|
89
|
+
|
|
90
|
+
This method implements the focal loss function, which is designed to address class imbalance by down-weighting easy examples and focusing training on hard negatives. It also supports masking of invalid (missing) entries, making it suitable for sequence data with missing values.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(
|
|
94
|
+
self,
|
|
95
|
+
gamma: float = 2.0,
|
|
96
|
+
alpha: torch.Tensor | None = None,
|
|
97
|
+
reduction: Literal["mean", "sum"] = "mean",
|
|
98
|
+
):
|
|
99
|
+
"""Initialize the MaskedFocalLoss.
|
|
100
|
+
|
|
101
|
+
This class sets up the focal loss with specified focusing parameter, class weights, and reduction method. It is designed to handle missing data through a valid mask, ensuring that only relevant entries contribute to the loss calculation.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
gamma (float): Focusing parameter.
|
|
105
|
+
alpha (torch.Tensor | None): Class weights.
|
|
106
|
+
reduction (Literal["mean", "sum"]): Reduction mode ('mean' or 'sum').
|
|
107
|
+
"""
|
|
108
|
+
super().__init__()
|
|
109
|
+
self.gamma = gamma
|
|
110
|
+
self.alpha = alpha
|
|
111
|
+
self.reduction = reduction
|
|
112
|
+
|
|
113
|
+
def forward(
|
|
114
|
+
self,
|
|
115
|
+
logits: torch.Tensor, # Expects (N, C) where N = batch*features
|
|
116
|
+
targets: torch.Tensor, # Expects (N,)
|
|
117
|
+
valid_mask: torch.Tensor, # Expects (N,)
|
|
118
|
+
) -> torch.Tensor:
|
|
119
|
+
"""Calculates the focal loss on pre-flattened tensors.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
logits (torch.Tensor): Logits from the model of shape (N, C) where N is the number of samples (batch_size * seq_len) and C is the number of classes.
|
|
123
|
+
targets (torch.Tensor): Ground truth labels of shape (N,).
|
|
124
|
+
valid_mask (torch.Tensor): Boolean mask of shape (N,) where True indicates a valid (observed) value to include in the loss.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
torch.Tensor: The computed scalar loss value.
|
|
128
|
+
"""
|
|
129
|
+
device = logits.device
|
|
130
|
+
|
|
131
|
+
# Calculate standard cross-entropy loss per-token (no reduction)
|
|
132
|
+
ce = F.cross_entropy(
|
|
133
|
+
logits,
|
|
134
|
+
targets,
|
|
135
|
+
weight=(self.alpha.to(device) if self.alpha is not None else None),
|
|
136
|
+
reduction="none",
|
|
137
|
+
ignore_index=-1,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Calculate p_t from the cross-entropy loss
|
|
141
|
+
pt = torch.exp(-ce)
|
|
142
|
+
focal = ((1 - pt) ** self.gamma) * ce
|
|
143
|
+
|
|
144
|
+
# Apply the valid mask. We select only the elements that should contribute to the loss.
|
|
145
|
+
focal = focal[valid_mask]
|
|
146
|
+
|
|
147
|
+
# Return early if no valid elements exist to avoid NaN results
|
|
148
|
+
if focal.numel() == 0:
|
|
149
|
+
return torch.tensor(0.0, device=device)
|
|
150
|
+
|
|
151
|
+
# Apply reduction
|
|
152
|
+
if self.reduction == "mean":
|
|
153
|
+
return focal.mean()
|
|
154
|
+
elif self.reduction == "sum":
|
|
155
|
+
return focal.sum()
|
|
156
|
+
else:
|
|
157
|
+
msg = f"Reduction mode '{self.reduction}' not supported."
|
|
158
|
+
raise ValueError(msg)
|