pg-sui 1.6.14.dev9__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. pg_sui-1.7.0.dist-info/METADATA +288 -0
  2. {pg_sui-1.6.14.dev9.dist-info → pg_sui-1.7.0.dist-info}/RECORD +29 -33
  3. pgsui/__init__.py +0 -8
  4. pgsui/_version.py +2 -2
  5. pgsui/cli.py +591 -126
  6. pgsui/data_processing/config.py +1 -2
  7. pgsui/data_processing/containers.py +218 -533
  8. pgsui/data_processing/transformers.py +44 -20
  9. pgsui/impute/deterministic/imputers/mode.py +475 -182
  10. pgsui/impute/deterministic/imputers/ref_allele.py +454 -147
  11. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +4 -3
  12. pgsui/impute/supervised/imputers/random_forest.py +3 -2
  13. pgsui/impute/unsupervised/base.py +1268 -530
  14. pgsui/impute/unsupervised/callbacks.py +28 -33
  15. pgsui/impute/unsupervised/imputers/autoencoder.py +869 -764
  16. pgsui/impute/unsupervised/imputers/vae.py +928 -696
  17. pgsui/impute/unsupervised/loss_functions.py +156 -202
  18. pgsui/impute/unsupervised/models/autoencoder_model.py +7 -49
  19. pgsui/impute/unsupervised/models/vae_model.py +40 -221
  20. pgsui/impute/unsupervised/nn_scorers.py +53 -13
  21. pgsui/utils/classification_viz.py +240 -97
  22. pgsui/utils/misc.py +201 -3
  23. pgsui/utils/plotting.py +73 -58
  24. pgsui/utils/pretty_metrics.py +2 -6
  25. pgsui/utils/scorers.py +39 -0
  26. pg_sui-1.6.14.dev9.dist-info/METADATA +0 -344
  27. pgsui/impute/unsupervised/imputers/nlpca.py +0 -1554
  28. pgsui/impute/unsupervised/imputers/ubp.py +0 -1575
  29. pgsui/impute/unsupervised/models/nlpca_model.py +0 -206
  30. pgsui/impute/unsupervised/models/ubp_model.py +0 -200
  31. {pg_sui-1.6.14.dev9.dist-info → pg_sui-1.7.0.dist-info}/WHEEL +0 -0
  32. {pg_sui-1.6.14.dev9.dist-info → pg_sui-1.7.0.dist-info}/entry_points.txt +0 -0
  33. {pg_sui-1.6.14.dev9.dist-info → pg_sui-1.7.0.dist-info}/licenses/LICENSE +0 -0
  34. {pg_sui-1.6.14.dev9.dist-info → pg_sui-1.7.0.dist-info}/top_level.txt +0 -0
@@ -209,8 +209,9 @@ class ImputeHistGradientBoosting(BaseImputer):
209
209
  X_int = self.pgenc.genotypes_012
210
210
  self.X012_ = X_int.astype(float)
211
211
  self.X012_[self.X012_ < 0] = np.nan # Ensure missing are NaN
212
- self.is_haploid_ = np.count_nonzero(self.X012_ == 1) == 0
213
- self.num_classes_ = 2 if self.is_haploid_ else 3
212
+ self.ploidy = self.cfg.io.ploidy
213
+ self.is_haploid = self.ploidy == 1
214
+ self.num_classes_ = 2 if self.is_haploid else 3
214
215
  self.n_samples_, self.n_features_ = X_int.shape
215
216
 
216
217
  # Split
@@ -296,7 +297,7 @@ class ImputeHistGradientBoosting(BaseImputer):
296
297
  This method applies the trained imputer to the entire dataset, filling in missing genotype values. It ensures that any remaining missing values after imputation are set to -9, and decodes the imputed 0/1/2 genotypes back to their original format.
297
298
 
298
299
  Returns:
299
- np.ndarray: (n_samples, n_loci) integers with no -9/-1/NaN.
300
+ np.ndarray: (n_samples, n_loci) IUPAC strings (single-character codes).
300
301
 
301
302
  Raises:
302
303
  NotFittedError: If fit() has not been called prior to transform().
@@ -185,7 +185,8 @@ class ImputeRandomForest(BaseImputer):
185
185
  X_int = self.pgenc.genotypes_012
186
186
  self.X012_ = X_int.astype(float)
187
187
  self.X012_[self.X012_ < 0] = np.nan # Ensure missing are NaN
188
- self.is_haploid_ = np.count_nonzero(self.X012_ == 1) == 0
188
+ self.ploidy = self.cfg.io.ploidy
189
+ self.is_haploid = self.ploidy == 1
189
190
  self.num_classes_ = 2 if self.is_haploid_ else 3
190
191
  self.n_samples_, self.n_features_ = X_int.shape
191
192
 
@@ -273,7 +274,7 @@ class ImputeRandomForest(BaseImputer):
273
274
  This method applies the trained imputer to the entire dataset, filling in missing genotype values. It ensures that any remaining missing values after imputation are set to -9, and decodes the imputed 0/1/2 genotypes back to their original format.
274
275
 
275
276
  Returns:
276
- np.ndarray: (n_samples, n_loci) integers with no -9/-1/NaN.
277
+ np.ndarray: (n_samples, n_loci) IUPAC strings (single-character codes).
277
278
  """
278
279
  if not self.is_fit_:
279
280
  msg = "Imputer has not been fit; call fit() before transform()."